120 lines
2.3 KiB
Python
120 lines
2.3 KiB
Python
from bs4 import BeautifulSoup
|
|
from dateutil.parser import parse
|
|
import json
|
|
from tqdm import tqdm
|
|
import re
|
|
|
|
def calcPredictedScore(S,O):
|
|
scoreObj = {}
|
|
scoreObj["sU"] = (float(O) - float(S))/2
|
|
scoreObj["sF"] = float(O) - ((float(O)-float(S))/2)
|
|
return scoreObj
|
|
|
|
fullObj = []
|
|
|
|
|
|
start_year = 1952
|
|
|
|
for year in tqdm(range(start_year, 2025)):
|
|
|
|
with open("src/data/"+str(year)+".html") as fp:
|
|
soup = BeautifulSoup(fp, 'html.parser')
|
|
|
|
start_pos = soup.find(string=re.compile(".*Regular Season - Week 1"))
|
|
|
|
try:
|
|
for table in start_pos.find_all_next(class_="soh1"):
|
|
week = table.find_previous("h3").string
|
|
for child in table.find_all("tbody"):
|
|
for row in child.find_all("tr"):
|
|
cells = row.find_all("td")
|
|
game = {}
|
|
cursor = 0
|
|
if len(cells[1].string) == 3:
|
|
cursor = cursor+1
|
|
|
|
spreadText = cells[cursor+6].string
|
|
if re.match(r'.\s-?(\d|PK)', spreadText):
|
|
game["date"] = cells[cursor+1].string
|
|
game["at"] = cells[cursor+3].string
|
|
game["fav"] = cells[cursor+4].string
|
|
game["und"] = cells[cursor+8].string
|
|
|
|
actualScore = re.search(r'.\s(\d+)-(\d+)', cells[cursor+5].string)
|
|
|
|
game["sF"] = actualScore.group(1)
|
|
game["sU"] = actualScore.group(2)
|
|
|
|
spreadValSearch = re.search(r'.\s-?(\d*\.*\d|PK)', spreadText)
|
|
if spreadValSearch:
|
|
spreadVal = spreadValSearch.group(1)
|
|
if spreadVal=="PK":
|
|
spreadVal = 0
|
|
else:
|
|
spreadVal = -1
|
|
|
|
|
|
|
|
ou = []
|
|
for t in cells[cursor+9].string.split():
|
|
try:
|
|
ou.append(float(t))
|
|
except ValueError:
|
|
pass
|
|
if len(ou)>0:
|
|
ouVal = ou[0]
|
|
else:
|
|
ouVal = None
|
|
|
|
game["spread"] = spreadVal
|
|
game["ou"] = ouVal
|
|
|
|
if spreadVal != -1 and ouVal:
|
|
game["pScore"] = calcPredictedScore(spreadVal,ouVal)
|
|
else:
|
|
game["pScore"] = {}
|
|
game["pScore"]["sF"] = None
|
|
game["pScore"]["sU"] = None
|
|
|
|
game["week"] = week
|
|
|
|
fullObj.append(game)
|
|
except Exception as e:
|
|
print(e)
|
|
print("Error parsing " + str(year))
|
|
|
|
objson = json.dumps(fullObj, indent=4)
|
|
|
|
with open("src/data/odds_data.json", "w") as odds_file:
|
|
odds_file.write(
|
|
objson
|
|
)
|
|
|
|
'''
|
|
|
|
if
|
|
|
|
sF + sU = O
|
|
|
|
and
|
|
|
|
sF - sU = S
|
|
|
|
then
|
|
|
|
sF = O - sU
|
|
sF = S + sU
|
|
|
|
O - sU = S + sU
|
|
|
|
O = S + 2sU
|
|
|
|
2sU = O - S
|
|
|
|
sU = (O - S)/2
|
|
sF = O - ((O-S)/2)
|
|
|
|
|
|
'''
|
|
|