NFLOddsVis/parse_odds.py

108 lines
2.0 KiB
Python

from bs4 import BeautifulSoup
from dateutil.parser import parse
import json
import re
def calcPredictedScore(sF,sU,S,O):
scoreObj = {}
scoreObj["sU"] = (int(O) - int(S))/2
scoreObj["sF"] = int(O) - ((int(O)-int(S))/2)
return scoreObj
fullObj = {
"years":[]
}
start_year = 1952
for year in range(start_year, 2025):
with open("data/"+str(year)+".html") as fp:
soup = BeautifulSoup(fp, 'html.parser')
start_pos = soup.find(string=str(year)+" Regular Season - Week 1");
gamesObj = {
"year":year,
"games":[]
}
try:
for table in start_pos.find_all_next(class_="soh1"):
for child in table.find_all("tbody"):
for row in child.find_all("tr"):
cells = row.find_all("td")
game = {}
cursor = 0
if len(cells[1].string) == 3:
cursor = cursor+1
spreadText = cells[cursor+6].string
if re.match(r'.\s-?\d', spreadText):
game["date"] = cells[cursor+1].string
game["at"] = cells[cursor+3].string
game["fav"] = cells[cursor+4].string
game["und"] = cells[cursor+8].string
actualScore = re.search(r'.\s(\d+)-(\d+)', cells[cursor+5].string)
game["sF"] = actualScore.group(1)
game["sU"] = actualScore.group(2)
spreadValSearch = re.search(r'.\s-?(\d+)', spreadText)
if spreadValSearch:
spreadVal = spreadValSearch.group(1)
else:
spreadVal = -1
ouValSearch = re.search(r'.\s-?(\d+)', cells[cursor+8].string)
if ouValSearch:
ouVal = ouValSearch.group(1)
else:
ouVal = -1
game["spread"] = spreadVal
game["ou"] = ouVal
if spreadVal != -1 and ouVal != -1:
game["pScore"] = calcPredictedScore(game["sF"],game["sU"],spreadVal,ouVal)
gamesObj["games"].append(game)
except:
print("Error parsing " + str(year))
fullObj["years"].append(gamesObj)
objson = json.dumps(fullObj)
print(objson)
'''
if
sF + sU = O
and
sF - sU = S
then
sF = O - sU
sF = S + sU
O - sU = S + sU
O = S + 2sU
2sU = O - S
sU = (O - S)/2
sF = O - ((O-S)/2)
'''