Monday, November 3, 2014

Exploring the NBA

Been trying to do some more data sciencey stuff with NBA data. Created a python script that scrapes data from www.basketball-reference.com/. Here's one that counts the wins and losses of the 2nd game in back-to-backs for the 2013-2014 NBA season of every team. Below is the graph and after the jump is the code.




from bs4 import BeautifulSoup
from datetime import timedelta, datetime
import numpy as np
import matplotlib.pyplot as plt
import requests

base_url = 'http://www.basketball-reference.com'
url = 'http://www.basketball-reference.com/leagues/NBA_2014.html'
r = requests.get(url)
soup = BeautifulSoup(r.text)
team_list = []
for link in soup.find(id="team").tbody.find_all('a'):
    team_list.append(base_url+link.get('href')[:-5]+'_games.html')


team_dict = {}
for team in team_list:
    team_name = team.split('/')[4]
    r = requests.get(team)
    soup = BeautifulSoup(r.text)
    wl_dict = {'W':0, 'L':0}
    prev_date = datetime.today()
    for link in soup.find(id="teams_games").tbody.find_all('tr'):
        if link.td:
            date = datetime.strptime(link.contents[3]['csk'], "%Y-%m-%d")
            day = timedelta(days=1)
            if prev_date != '':
                if (date-day) == prev_date:
                    wl_dict[link.contents[15].text] += 1
            prev_date = date
    team_dict[team_name] = wl_dict

N = len(team_dict)
win_bars = []
lose_bars = []
team_name_list = []
for team in team_dict:
    win_bars.append(team_dict[team]['W'])
    lose_bars.append(team_dict[team]['L'])
    team_name_list.append(team)
ind = np.arange(N)
width = .35

fig, ax = plt.subplots()
fig.autofmt_xdate()
fig.set_size_inches(18.5,10.5)
rects1 = ax.bar(ind, win_bars, width, color='g')
rects2 = ax.bar(ind+width, lose_bars, width, color='r')

ax.set_ylabel('Win/Loss Count')
ax.set_xlabel('Team')
ax.set_title('Win/Loss Count of the 2nd Game in a Back to Back 2013-2014')
ax.set_xticks(ind+width)
ax.set_xticklabels( team_name_list )

ax.legend( (rects1[0], rects2[0]), ('Win', 'Loss') )

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
                ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
plt.ylim((0,25))
plt.show()


No comments:

Post a Comment