NFLOddsVis/parse_odds.py

120 lines
2.3 KiB
Python
Raw Normal View History

2025-01-22 21:34:06 -05:00
from bs4 import BeautifulSoup
from dateutil.parser import parse
import json
2025-01-28 11:54:31 -05:00
from tqdm import tqdm
import re
2025-01-22 21:34:06 -05:00
def calcPredictedScore(S,O):
scoreObj = {}
scoreObj["sU"] = (float(O) - float(S))/2
scoreObj["sF"] = float(O) - ((float(O)-float(S))/2)
return scoreObj
2025-01-22 21:34:06 -05:00
2025-01-28 11:54:31 -05:00
fullObj = []
2025-01-22 21:34:06 -05:00
start_year = 1952
2025-01-31 13:55:31 -05:00
for year in tqdm(range(start_year, 2025)):
2025-01-28 11:54:31 -05:00
with open("src/data/"+str(year)+".html") as fp:
soup = BeautifulSoup(fp, 'html.parser')
2025-01-28 11:54:31 -05:00
start_pos = soup.find(string=re.compile(".*Regular Season - Week 1"))
try:
for table in start_pos.find_all_next(class_="soh1"):
week = table.find_previous("h3").string
for child in table.find_all("tbody"):
for row in child.find_all("tr"):
cells = row.find_all("td")
game = {}
cursor = 0
if len(cells[1].string) == 3:
cursor = cursor+1
spreadText = cells[cursor+6].string
2025-02-08 22:27:38 -05:00
if re.match(r'.\s-?(\d|PK)', spreadText):
game["date"] = cells[cursor+1].string
game["at"] = cells[cursor+3].string
game["fav"] = cells[cursor+4].string
game["und"] = cells[cursor+8].string
actualScore = re.search(r'.\s(\d+)-(\d+)', cells[cursor+5].string)
game["sF"] = actualScore.group(1)
game["sU"] = actualScore.group(2)
2025-02-08 22:27:38 -05:00
spreadValSearch = re.search(r'.\s-?(\d*\.*\d|PK)', spreadText)
if spreadValSearch:
spreadVal = spreadValSearch.group(1)
2025-02-08 22:27:38 -05:00
if spreadVal=="PK":
spreadVal = 0
else:
spreadVal = -1
2025-02-08 22:27:38 -05:00
ou = []
for t in cells[cursor+9].string.split():
try:
ou.append(float(t))
except ValueError:
pass
if len(ou)>0:
ouVal = ou[0]
else:
ouVal = None
game["spread"] = spreadVal
game["ou"] = ouVal
2025-01-31 13:55:31 -05:00
if spreadVal != -1 and ouVal:
game["pScore"] = calcPredictedScore(spreadVal,ouVal)
else:
game["pScore"] = {}
game["pScore"]["sF"] = None
game["pScore"]["sU"] = None
game["week"] = week
2025-01-28 11:54:31 -05:00
fullObj.append(game)
2025-01-31 13:55:31 -05:00
except Exception as e:
print(e)
print("Error parsing " + str(year))
2025-01-28 11:54:31 -05:00
objson = json.dumps(fullObj, indent=4)
with open("src/data/odds_data.json", "w") as odds_file:
2025-01-28 11:54:31 -05:00
odds_file.write(
objson
)
'''
if
sF + sU = O
and
sF - sU = S
then
sF = O - sU
sF = S + sU
O - sU = S + sU
O = S + 2sU
2sU = O - S
sU = (O - S)/2
sF = O - ((O-S)/2)
'''
2025-01-22 21:34:06 -05:00