欧陆资讯 NEWS CENTER
联系我们 CONTACT US
- 手机:
- 18888889999
- 电话:
- 0898-66889888
- 邮箱:
- admin@youweb.com
- 地址:
- 海南省海口市玉沙路58号
爬取和分析NBA球员排名及各项数据
2024-09-09 12:50:32 点击量:
1 #提取数据 2 import requests 3 from bs4 import BeautifulSoup 4 import json 5 # 6 url='https://china.nba.cn/stats2/league/playerstats.json?conference=All&country=All&individual=All&locale=zh_CN&pageIndex=0&position=All&qualified=false&season=2021&seasonType=2&split=All+Team&statType=points&team=All&total=perGame' 7 # 8 def getHTMLText(url,timeout=30): 9 try: 10 r=requests.get(url,timeout=30) # 11 r.raise_for_status() 12 r.encoding=r.apparent_encoding 13 return r.text 14 except: 15 return'产生异常' 16 17 #html.parser表示用BeautifulSoup库解析网页 18 html=getHTMLText(url) 19 soup=BeautifulSoup(html,'html.parser') 20 print(soup.prettify()) 21 22 #创建空表 23 pointsPg_list=[] 24 assistsPg_list=[] 25 rebsPg_list=[] 26 name_list=[] 27 data1=[] 28 tppct=[] 29 ftpct=[] 30 fgpct=[] 31 stealsPg_list=[] 32 blocksPg_list=[] 33 offRebsPg_list=[] 34 defRebsPg_list=[] 35 rank=[] 36 html=getHTMLText(url) 37 data=json.loads(html) 38 a=data['payload']['players'] 39 data1.append(name_list) 40 data1.append(assistsPg_list) 41 b=1 42 for i in a: 43 rank.append(b) 44 name_list.append(i['playerProfile']['displayName']) 45 pointsPg_list.append(i['statAverage'][ 'pointsPg']) 46 rebsPg_list.append(i['statAverage'][ 'rebsPg']) 47 assistsPg_list.append(i['statAverage'][ 'assistsPg']) 48 stealsPg_list.append(i['statAverage'][ 'stealsPg']) 49 blocksPg_list.append(i['statAverage'][ 'blocksPg']) 50 offRebsPg_list.append(i['statAverage'][ 'offRebsPg']) 51 defRebsPg_list.append(i['statAverage'][ 'defRebsPg']) 52 tppct.append(i['statAverage']['tppct']) 53 ftpct.append(i['statAverage']['ftpct']) 54 fgpct.append(i['statAverage']['fgpct']) 55 b=b+1 56 list_1=['排名'] 57 58 #导出球员的各项数据 59 import pandas as pd 60 df=pd.DataFrame(columns=list_1) 61 df['排名']=rank 62 df['NAME']=name_list 63 df['场均得分']=pointsPg_list 64 df['场均篮板']=rebsPg_list 65 df['场均助攻']=assistsPg_list 66 df['投篮命中率']=fgpct 67 df['三分命中率']=tppct 68 df['罚球命中率']=ftpct 69 df['进攻效率']=offRebsPg_list 70 df['防守效率']=defRebsPg_list 71 df['场均抢断']=stealsPg_list 72 df['场均盖帽']=blocksPg_list 73 df 74 75 #将dataframe写入csv 76 df.to_csv('D:/Python/NBA数据.csv',index=False) 77 df.to_csv('D:/Python/NBA.csv',index=False) 78 79 #检查并显示重复值 80 print(df.duplicated()) 81 82 #删除重复值 83 df= df.drop_duplicates() 84 df.head() 85 86 #异常值处理 87 df.describe() 88 89 #检查是否有空值 90 print(df['排名'].isnull().value_counts()) 91 92 #查看统计信息 93 print(df.describe()) 94 df 95 96 #求取回归系数 97 from sklearn.linear_model import LinearRegression 98 X=df.drop('NAME',axis=1) 99 predict_model=LinearRegression() 100 predict_model.fit(X,df['排名']) 101 print('回归系数为:',predict_model.coef_) 102 103 #绘制回归图 104 import seaborn as sns 105 import matplotlib.pyplot as plt 106 plt.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签 107 X=df.drop('NAME',axis=1) 108 sns.regplot(df['排名'],df['进攻效率']) 109 sns.regplot(df['排名'],df['防守效率']) 110 plt.title('排名与进攻、防守效率图') 111 112 #绘制柱状图 113 import pandas as pd 114 import numpy as np 115 import matplotlib.pyplot as plt 116 plt.rcParams['font.sans-serif']=['SimHei'] 117 plt.bar(df.排名, df.投篮命中率, color='b') 118 plt.xlabel("排名") 119 plt.ylabel("投篮命中率") 120 plt.title('排名与投篮命中率柱状图') 121 plt.show() 122 123 #绘制散点图 124 import pandas as pd 125 import numpy as np 126 import matplotlib.pyplot as plt 127 plt.rcParams['font.sans-serif']=['SimHei'] 128 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 129 size=30 130 plt.scatter(df.排名, df.场均得分,size, color='b',alpha=0.6,marker='o') 131 plt.xlabel("排名") 132 plt.ylabel("场均得分") 133 plt.title('排名与场均得分柱状图') 134 plt.show() 135 136 #罚球命中率与排名 137 import pandas as pd 138 import numpy as np 139 import matplotlib.pyplot as plt 140 plt.rcParams['font.sans-serif']=['SimHei'] 141 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 142 plt.stackplot(df.排名, df.罚球命中率, color=['b',]) 143 plt.xlabel("排名") 144 plt.ylabel("罚球命中率") 145 plt.title('排名与罚球命中率堆叠图') 146 plt.show() 147 148 #绘制折线图 149 import pandas as pd 150 import numpy as np 151 import matplotlib.pyplot as plt 152 plt.rcParams['font.sans-serif']=['SimHei'] 153 plt.rcParams['axes.unicode_minus']= False 154 plt.plot(df.排名, df.三分命中率, color='b') 155 plt.xlabel("排名") 156 plt.ylabel("三分命中率") 157 plt.title('排名与三分命中率折线图') 158 plt.show() 159 160 #绘制拟合曲线 161 import matplotlib.pyplot as plt 162 import matplotlib 163 import numpy as np 164 import scipy.optimize as opt 165 import csv 166 x0=df['进攻效率'] 167 y0=df['场均得分'] 168 def func(x,c): 169 k,a=c 170 return k*x+a 171 def errfc(c,x,y): 172 return y-func(x,c) 173 c0=(100,20) 174 #调用拟合曲线 175 print(opt.leastsq(errfc,c0,args=(x0,y0))) 176 #s设置画布 177 chinese=matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') 178 179 plt.plot(x0,y0,"o",label=u"进攻效率") 180 181 plt.plot(x0,func(x0,opt.leastsq(errfc,c0,args=(x0,y0))[0]),label=u"场均得分") 182 plt.title('进攻效率与场均得分拟合曲线图') 183 plt.legend(loc=3,prop=chinese) 184 185 plt.show() 186 187 #绘制拟合曲线 188 import matplotlib.pyplot as plt 189 import matplotlib 190 import numpy as np 191 import scipy.optimize as opt 192 import csv 193 x0=df['防守效率'] 194 y0=df['场均抢断'] 195 # 196 def func(x,c): 197 k,a=c 198 return k*x+a 199 def errfc(c,x,y): 200 return y-func(x,c) 201 c0=(100,20) 202 #调用拟合曲线 203 print(opt.leastsq(errfc,c0,args=(x0,y0))) 204 #s设置画布 205 chinese=matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') 206 207 plt.plot(x0,y0,"o",label=u"防守效率") 208 209 plt.plot(x0,func(x0,opt.leastsq(errfc,c0,args=(x0,y0))[0]),label=u"场均抢断") 210 plt.title('防守效率与场均抢断拟合曲线图') 211 plt.legend(loc=3,prop=chinese) 212 213 plt.show() 214 215 #数据持久化 216 df=pd.DataFrame(df,columns=['排名','NAME','场均得分','场均篮板','场均助攻','投篮命中率','罚球命中率','三分命中率','进攻效率','防守效率','场均抢断','场均盖帽']) 217 df.to_csv('NBA.csv',encoding='gbk') #保存文件,数据持久化