from bs4 import BeautifulSoup from dateutil.parser import parse import json from tqdm import tqdm import re def calcPredictedScore(S,O): scoreObj = {} scoreObj["sU"] = (float(O) - float(S))/2 scoreObj["sF"] = float(O) - ((float(O)-float(S))/2) return scoreObj fullObj = [] start_year = 1952 for year in tqdm(range(start_year, 2025)): with open("src/data/"+str(year)+".html") as fp: soup = BeautifulSoup(fp, 'html.parser') start_pos = soup.find(string=re.compile(".*Regular Season - Week 1")) try: for table in start_pos.find_all_next(class_="soh1"): week = table.find_previous("h3").string for child in table.find_all("tbody"): for row in child.find_all("tr"): cells = row.find_all("td") game = {} cursor = 0 if len(cells[1].string) == 3: cursor = cursor+1 spreadText = cells[cursor+6].string if re.match(r'.\s-?(\d|PK)', spreadText): game["date"] = cells[cursor+1].string game["at"] = cells[cursor+3].string game["fav"] = cells[cursor+4].string game["und"] = cells[cursor+8].string actualScore = re.search(r'.\s(\d+)-(\d+)', cells[cursor+5].string) game["sF"] = actualScore.group(1) game["sU"] = actualScore.group(2) spreadValSearch = re.search(r'.\s-?(\d*\.*\d|PK)', spreadText) if spreadValSearch: spreadVal = spreadValSearch.group(1) if spreadVal=="PK": spreadVal = 0 else: spreadVal = -1 ou = [] for t in cells[cursor+9].string.split(): try: ou.append(float(t)) except ValueError: pass if len(ou)>0: ouVal = ou[0] else: ouVal = None game["spread"] = spreadVal game["ou"] = ouVal if spreadVal != -1 and ouVal: game["pScore"] = calcPredictedScore(spreadVal,ouVal) else: game["pScore"] = {} game["pScore"]["sF"] = None game["pScore"]["sU"] = None game["week"] = week fullObj.append(game) except Exception as e: print(e) print("Error parsing " + str(year)) objson = json.dumps(fullObj, indent=4) with open("src/data/odds_data.json", "w") as odds_file: odds_file.write( objson ) ''' if sF + sU = O and sF - sU = S then sF = O - sU sF = S + sU O - sU = S + sU O = S + 2sU 2sU = O - S sU = (O - S)/2 sF = O - ((O-S)/2) '''