全国服务热线:18888889999
在线报名
欧陆注册CURRICULUM
欧陆资讯 NEWS CENTER
联系我们 CONTACT US
手机:
18888889999
电话:
0898-66889888
邮箱:
admin@youweb.com
地址:
海南省海口市玉沙路58号
欧陆资讯
你的位置: 首页 > 欧陆资讯
爬取和分析NBA球员排名及各项数据
2024-09-09 12:50:32 点击量:

  1 #提取数据
  2 import requests
  3 from bs4 import BeautifulSoup
  4 import json
  5 #
  6 url='https://china.nba.cn/stats2/league/playerstats.json?conference=All&country=All&individual=All&locale=zh_CN&pageIndex=0&position=All&qualified=false&season=2021&seasonType=2&split=All+Team&statType=points&team=All&total=perGame'
  7 #
  8 def getHTMLText(url,timeout=30):
  9     try:
 10         r=requests.get(url,timeout=30) # 
 11         r.raise_for_status()
 12         r.encoding=r.apparent_encoding
 13         return r.text
 14     except:
 15         return'产生异常'
 16 
 17 #html.parser表示用BeautifulSoup库解析网页
 18 html=getHTMLText(url)
 19 soup=BeautifulSoup(html,'html.parser')
 20 print(soup.prettify())
 21 
 22 #创建空表
 23 pointsPg_list=[]
 24 assistsPg_list=[]
 25 rebsPg_list=[]
 26 name_list=[]
 27 data1=[]
 28 tppct=[]
 29 ftpct=[]
 30 fgpct=[]
 31 stealsPg_list=[]
 32 blocksPg_list=[]
 33 offRebsPg_list=[]
 34 defRebsPg_list=[]
 35 rank=[]
 36 html=getHTMLText(url)
 37 data=json.loads(html)
 38 a=data['payload']['players']
 39 data1.append(name_list)
 40 data1.append(assistsPg_list)
 41 b=1
 42 for i in a:   
 43     rank.append(b)
 44     name_list.append(i['playerProfile']['displayName'])
 45     pointsPg_list.append(i['statAverage'][ 'pointsPg'])
 46     rebsPg_list.append(i['statAverage'][ 'rebsPg'])
 47     assistsPg_list.append(i['statAverage'][ 'assistsPg'])
 48     stealsPg_list.append(i['statAverage'][ 'stealsPg'])
 49     blocksPg_list.append(i['statAverage'][ 'blocksPg'])
 50     offRebsPg_list.append(i['statAverage'][ 'offRebsPg'])
 51     defRebsPg_list.append(i['statAverage'][ 'defRebsPg'])
 52     tppct.append(i['statAverage']['tppct'])
 53     ftpct.append(i['statAverage']['ftpct'])
 54     fgpct.append(i['statAverage']['fgpct'])
 55     b=b+1
 56 list_1=['排名']
 57 
 58 #导出球员的各项数据
 59 import pandas as pd
 60 df=pd.DataFrame(columns=list_1)
 61 df['排名']=rank
 62 df['NAME']=name_list
 63 df['场均得分']=pointsPg_list
 64 df['场均篮板']=rebsPg_list
 65 df['场均助攻']=assistsPg_list
 66 df['投篮命中率']=fgpct
 67 df['三分命中率']=tppct
 68 df['罚球命中率']=ftpct
 69 df['进攻效率']=offRebsPg_list
 70 df['防守效率']=defRebsPg_list
 71 df['场均抢断']=stealsPg_list
 72 df['场均盖帽']=blocksPg_list
 73 df
 74 
 75 #将dataframe写入csv
 76 df.to_csv('D:/Python/NBA数据.csv',index=False)
 77 df.to_csv('D:/Python/NBA.csv',index=False)
 78 
 79 #检查并显示重复值
 80 print(df.duplicated())
 81 
 82  #删除重复值
 83 df= df.drop_duplicates()
 84 df.head()
 85 
 86 #异常值处理
 87 df.describe()
 88 
 89 #检查是否有空值
 90 print(df['排名'].isnull().value_counts())
 91 
 92 #查看统计信息
 93 print(df.describe())
 94 df
 95 
 96 #求取回归系数
 97 from sklearn.linear_model import LinearRegression
 98 X=df.drop('NAME',axis=1)
 99 predict_model=LinearRegression()
100 predict_model.fit(X,df['排名'])
101 print('回归系数为:',predict_model.coef_)
102 
103 #绘制回归图
104 import seaborn as sns
105 import matplotlib.pyplot as plt
106 plt.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签
107 X=df.drop('NAME',axis=1)
108 sns.regplot(df['排名'],df['进攻效率'])
109 sns.regplot(df['排名'],df['防守效率'])
110 plt.title('排名与进攻、防守效率图')
111 
112 #绘制柱状图
113 import pandas as pd
114 import numpy as np
115 import matplotlib.pyplot as plt
116 plt.rcParams['font.sans-serif']=['SimHei']
117 plt.bar(df.排名, df.投篮命中率, color='b')
118 plt.xlabel("排名")
119 plt.ylabel("投篮命中率")
120 plt.title('排名与投篮命中率柱状图')
121 plt.show()
122 
123 #绘制散点图
124 import pandas as pd
125 import numpy as np
126 import matplotlib.pyplot as plt
127 plt.rcParams['font.sans-serif']=['SimHei']
128 plt.rcParams['axes.unicode_minus']=False   # 用来正常显示负号
129 size=30
130 plt.scatter(df.排名, df.场均得分,size, color='b',alpha=0.6,marker='o')
131 plt.xlabel("排名")
132 plt.ylabel("场均得分")
133 plt.title('排名与场均得分柱状图')
134 plt.show()
135 
136 #罚球命中率与排名
137 import pandas as pd
138 import numpy as np
139 import matplotlib.pyplot as plt
140 plt.rcParams['font.sans-serif']=['SimHei']
141 plt.rcParams['axes.unicode_minus']=False   # 用来正常显示负号
142 plt.stackplot(df.排名, df.罚球命中率, color=['b',])
143 plt.xlabel("排名")
144 plt.ylabel("罚球命中率")
145 plt.title('排名与罚球命中率堆叠图')
146 plt.show()
147 
148 #绘制折线图
149 import pandas as pd
150 import numpy as np
151 import matplotlib.pyplot as plt
152 plt.rcParams['font.sans-serif']=['SimHei']
153 plt.rcParams['axes.unicode_minus']= False
154 plt.plot(df.排名, df.三分命中率, color='b')
155 plt.xlabel("排名")
156 plt.ylabel("三分命中率")
157 plt.title('排名与三分命中率折线图')
158 plt.show()
159 
160 #绘制拟合曲线
161 import matplotlib.pyplot as plt
162 import matplotlib
163 import numpy as np
164 import scipy.optimize as opt
165 import csv
166 x0=df['进攻效率']
167 y0=df['场均得分']
168 def func(x,c):
169     k,a=c
170     return k*x+a
171 def errfc(c,x,y):
172     return y-func(x,c)
173 c0=(100,20)
174 #调用拟合曲线
175 print(opt.leastsq(errfc,c0,args=(x0,y0)))
176 #s设置画布
177 chinese=matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc')  
178 
179 plt.plot(x0,y0,"o",label=u"进攻效率")
180 
181 plt.plot(x0,func(x0,opt.leastsq(errfc,c0,args=(x0,y0))[0]),label=u"场均得分")
182 plt.title('进攻效率与场均得分拟合曲线图')
183 plt.legend(loc=3,prop=chinese)
184 
185 plt.show()
186 
187 #绘制拟合曲线
188 import matplotlib.pyplot as plt
189 import matplotlib
190 import numpy as np
191 import scipy.optimize as opt
192 import csv
193 x0=df['防守效率']
194 y0=df['场均抢断']
195 #
196 def func(x,c):
197     k,a=c
198     return k*x+a
199 def errfc(c,x,y):
200     return y-func(x,c)
201 c0=(100,20)
202 #调用拟合曲线
203 print(opt.leastsq(errfc,c0,args=(x0,y0)))
204 #s设置画布
205 chinese=matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc')  
206 
207 plt.plot(x0,y0,"o",label=u"防守效率")
208 
209 plt.plot(x0,func(x0,opt.leastsq(errfc,c0,args=(x0,y0))[0]),label=u"场均抢断")
210 plt.title('防守效率与场均抢断拟合曲线图')
211 plt.legend(loc=3,prop=chinese)
212 
213 plt.show()
214 
215 #数据持久化
216 df=pd.DataFrame(df,columns=['排名','NAME','场均得分','场均篮板','场均助攻','投篮命中率','罚球命中率','三分命中率','进攻效率','防守效率','场均抢断','场均盖帽'])
217 df.to_csv('NBA.csv',encoding='gbk') #保存文件,数据持久化