2025-01-22 21:34:06 -05:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from dateutil.parser import parse
|
|
|
|
import json
|
2025-01-23 13:36:35 -05:00
|
|
|
import re
|
2025-01-22 21:34:06 -05:00
|
|
|
|
2025-01-23 13:36:35 -05:00
|
|
|
def calcPredictedScore(sF,sU,S,O):
|
|
|
|
scoreObj = {}
|
|
|
|
scoreObj["sU"] = (int(O) - int(S))/2
|
|
|
|
scoreObj["sF"] = int(O) - ((int(O)-int(S))/2)
|
|
|
|
return scoreObj
|
2025-01-22 21:34:06 -05:00
|
|
|
|
2025-01-23 13:36:35 -05:00
|
|
|
fullObj = {
|
|
|
|
"years":[]
|
|
|
|
}
|
2025-01-22 21:34:06 -05:00
|
|
|
|
|
|
|
|
2025-01-23 13:36:35 -05:00
|
|
|
start_year = 1952
|
|
|
|
|
|
|
|
for year in range(start_year, 2025):
|
|
|
|
|
|
|
|
with open("data/"+str(year)+".html") as fp:
|
|
|
|
soup = BeautifulSoup(fp, 'html.parser')
|
|
|
|
|
|
|
|
start_pos = soup.find(string=str(year)+" Regular Season - Week 1");
|
|
|
|
|
|
|
|
gamesObj = {
|
|
|
|
"year":year,
|
|
|
|
"games":[]
|
|
|
|
}
|
|
|
|
|
|
|
|
try:
|
|
|
|
for table in start_pos.find_all_next(class_="soh1"):
|
|
|
|
for child in table.find_all("tbody"):
|
|
|
|
for row in child.find_all("tr"):
|
|
|
|
cells = row.find_all("td")
|
|
|
|
game = {}
|
|
|
|
cursor = 0
|
|
|
|
if len(cells[1].string) == 3:
|
|
|
|
cursor = cursor+1
|
|
|
|
|
|
|
|
spreadText = cells[cursor+6].string
|
|
|
|
if re.match(r'.\s-?\d', spreadText):
|
|
|
|
game["date"] = cells[cursor+1].string
|
|
|
|
game["at"] = cells[cursor+3].string
|
|
|
|
game["fav"] = cells[cursor+4].string
|
|
|
|
game["und"] = cells[cursor+8].string
|
|
|
|
|
|
|
|
actualScore = re.search(r'.\s(\d+)-(\d+)', cells[cursor+5].string)
|
|
|
|
|
|
|
|
game["sF"] = actualScore.group(1)
|
|
|
|
game["sU"] = actualScore.group(2)
|
|
|
|
|
|
|
|
spreadValSearch = re.search(r'.\s-?(\d+)', spreadText)
|
|
|
|
if spreadValSearch:
|
|
|
|
spreadVal = spreadValSearch.group(1)
|
|
|
|
else:
|
|
|
|
spreadVal = -1
|
|
|
|
|
|
|
|
ouValSearch = re.search(r'.\s-?(\d+)', cells[cursor+8].string)
|
|
|
|
if ouValSearch:
|
|
|
|
ouVal = ouValSearch.group(1)
|
|
|
|
else:
|
|
|
|
ouVal = -1
|
|
|
|
|
|
|
|
game["spread"] = spreadVal
|
|
|
|
game["ou"] = ouVal
|
|
|
|
|
|
|
|
if spreadVal != -1 and ouVal != -1:
|
|
|
|
game["pScore"] = calcPredictedScore(game["sF"],game["sU"],spreadVal,ouVal)
|
|
|
|
|
|
|
|
gamesObj["games"].append(game)
|
|
|
|
except:
|
|
|
|
print("Error parsing " + str(year))
|
|
|
|
|
|
|
|
fullObj["years"].append(gamesObj)
|
|
|
|
|
|
|
|
objson = json.dumps(fullObj)
|
|
|
|
|
|
|
|
print(objson)
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
if
|
|
|
|
|
|
|
|
sF + sU = O
|
|
|
|
|
|
|
|
and
|
|
|
|
|
|
|
|
sF - sU = S
|
|
|
|
|
|
|
|
then
|
|
|
|
|
|
|
|
sF = O - sU
|
|
|
|
sF = S + sU
|
|
|
|
|
|
|
|
O - sU = S + sU
|
|
|
|
|
|
|
|
O = S + 2sU
|
|
|
|
|
|
|
|
2sU = O - S
|
|
|
|
|
|
|
|
sU = (O - S)/2
|
|
|
|
sF = O - ((O-S)/2)
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
2025-01-22 21:34:06 -05:00
|
|
|
|