import streamlit as st import pandas as pd import numpy as np import nfl_data_py as nfl from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split import joblib import warnings from math import radians, sin, cos, sqrt, atan2 import requests import datetime import os import time # --- PAGE CONFIG --- st.set_page_config(page_title="Kevin's NFL Forecast Tool v3.1", page_icon="🏈") # --- CENTER LOGO --- logo_col1, logo_col2, logo_col3 = st.columns([1, 2, 1]) with logo_col2: st.image("Airmanv.png", width=220) # --- SINGLE-LINE CENTERED TITLE --- st.markdown( """

🏈 Kevin's NFL Forecast Tool v3.1

""", unsafe_allow_html=True ) # --- API SETUP --- ODDS_API_KEY = os.getenv("ODDS_API_KEY") ODDS_API_URL = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds" if not ODDS_API_KEY: st.error("⚠️ API Key not found. Please set ODDS_API_KEY in Settings > Secrets.") # --- SHARED CONSTANTS --- TEAM_ABBR_MAP = { "ARI": "ARI", "ATL": "ATL", "BAL": "BAL", "BUF": "BUF", "CAR": "CAR", "CHI": "CHI", "CIN": "CIN", "CLE": "CLE", "DAL": "DAL", "DEN": "DEN", "DET": "DET", "GB": "GB", "HOU": "HOU", "IND": "IND", "JAX": "JAX", "KC": "KC", "LV": "LV", "LAC": "LAC", "LAR": "LAR", "MIA": "MIA", "MIN": "MIN", "NE": "NE", "NO": "NO", "NYG": "NYG", "NYJ": "NYJ", "PHI": "PHI", "PIT": "PIT", "SEA": "SEA", "SF": "SF", "TB": "TB", "TEN": "TEN", "WAS": "WAS", } ODDS_API_TEAM_NAMES = { "ARI": "Arizona Cardinals", "ATL": "Atlanta Falcons", "BAL": "Baltimore Ravens", "BUF": "Buffalo Bills", "CAR": "Carolina Panthers", "CHI": "Chicago Bears", "CIN": "Cincinnati Bengals", "CLE": "Cleveland Browns", "DAL": "Dallas Cowboys", "DEN": "Denver Broncos", "DET": "Detroit Lions", "GB": "Green Bay Packers", "HOU": "Houston Texans", "IND": "Indianapolis Colts", "JAX": "Jacksonville Jaguars", "KC": "Kansas City Chiefs", "LA": "Los Angeles Rams", "LAC": "Los Angeles Chargers", "LV": "Las Vegas Raiders", "MIA": "Miami Dolphins", "MIN": "Minnesota Vikings", "NE": "New England Patriots", "NO": "New Orleans Saints", "NYG": "New York Giants", "NYJ": "New York Jets", "PHI": "Philadelphia Eagles", "PIT": "Pittsburgh Steelers", "SEA": "Seattle Seahawks", "SF": "San Francisco 49ers", "TB": "Tampa Bay Buccaneers", "TEN": "Tennessee Titans", "WAS": "Washington Commanders", } stadiums = pd.DataFrame({ "team_abbr": ["ARI","ATL","BAL","BUF","CAR","CHI","CIN","CLE","DAL","DEN","DET","GB","HOU","IND","JAX","KC","LV","LAC","LAR","MIA","MIN","NE","NO","NYG","NYJ","PHI","PIT","SEA","SF","TB","TEN","WAS"], "latitude": [33.5275,33.7550,39.2787,42.7737,35.2258,41.8625,39.0954,41.5061,32.7473,39.7439,42.3400,44.5013,29.6847,39.7601,30.3240,39.0490,36.0909,33.9535,34.0140,25.9580,44.9740,42.0909,29.9511,40.8128,40.8135,39.9008,40.4469,47.5952,37.4030,27.9759,36.1663,38.9078], "longitude": [-112.2626,-84.3915,-76.6227,-78.7868,-80.8528,-87.6167,-84.5160,-81.6995,-97.0945,-105.0201,-83.0458,-88.0622,-95.4107,-86.1637,-81.6377,-94.4839,-115.1833,-118.3391,-118.2879,-80.2389,-93.2577,-71.2643,-90.0812,-74.0743,-74.0743,-75.1675,-80.0158,-122.3316,-122.0829,-82.5034,-86.7713,-77.0074] }) team_coords = stadiums.set_index("team_abbr")[["latitude", "longitude"]].to_dict("index") _odds_cache = {} # --- HELPER: HAVERSINE --- def haversine(lat1, lon1, lat2, lon2): R = 3958.8 dlat = radians(lat2 - lat1) dlon = radians(lon2 - lon1) a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2 return 2 * R * atan2(sqrt(a), sqrt(1 - a)) # --- ODDS FUNCTIONS (PORTED FROM LOCAL SCRIPT) --- def fetch_oddsapi_draftkings_odds(home_team_abbr, away_team_abbr): """Fetch odds from The Odds API (DraftKings-style), similar to local script.""" home_abbr = home_team_abbr.upper() away_abbr = away_team_abbr.upper() cache_key = ("ODDS_API_DK", home_abbr, away_abbr) if cache_key in _odds_cache: return _odds_cache[cache_key] home_full = ODDS_API_TEAM_NAMES.get(home_abbr) away_full = ODDS_API_TEAM_NAMES.get(away_abbr) if not home_full or not away_full: return None params = { "apiKey": ODDS_API_KEY, "regions": "us", "markets": "h2h,spreads,totals", "oddsFormat": "american", } try: resp = requests.get(ODDS_API_URL, params=params, timeout=10) if resp.status_code != 200: return None data = resp.json() except Exception: return None game_obj = None for g in data: if g.get("home_team") == home_full and g.get("away_team") == away_full: game_obj = g break if game_obj is None: return None bookmakers = game_obj.get("bookmakers", []) dk = None for b in bookmakers: title = (b.get("title") or "").lower() if b.get("key") == "draftkings" or title.startswith("draftkings"): dk = b break if dk is None and bookmakers: dk = bookmakers[0] spread_line = None over_under_line = None ml_home = None ml_away = None if dk: for m in dk.get("markets", []): mkey = m.get("key") outcomes = m.get("outcomes", []) if mkey == "spreads": for o in outcomes: if o.get("name") == home_full: try: spread_line = float(o.get("point")) except Exception: pass elif mkey == "totals": for o in outcomes: name = (o.get("name") or "").lower() if name.startswith("over"): try: over_under_line = float(o.get("point")) except Exception: pass elif mkey == "h2h": for o in outcomes: if o.get("name") == home_full: ml_home = o.get("price") elif o.get("name") == away_full: ml_away = o.get("price") result = { "spread_line": spread_line, "over_under_line": over_under_line, "ml_home": ml_home, "ml_away": ml_away, } _odds_cache[cache_key] = result return result def fetch_combined_odds(home_team_abbr, away_team_abbr): odds = fetch_oddsapi_draftkings_odds(home_team_abbr, away_team_abbr) if odds is not None: return odds return None # --- DATA LOAD FUNCTION --- @st.cache_data def load_nfl_data(): today_dt = datetime.date.today() current_season = today_dt.year if today_dt.month >= 9 else today_dt.year - 1 games = nfl.import_schedules([current_season]) historical_games = nfl.import_schedules(range(2010, current_season + 1)) date_col = "game_date" if "game_date" in historical_games.columns else "gameday" cols_to_keep = [ "game_id", "season", "week", "home_team", "away_team", "home_score", "away_score", date_col ] for col in ["spread_line", "over_under_line", "over_under_line_close"]: if col in historical_games.columns: cols_to_keep.append(col) df = historical_games[cols_to_keep].copy() df = df.dropna(subset=["home_score", "away_score"]) df["game_date"] = pd.to_datetime(df[date_col]) df["home_win"] = (df["home_score"] > df["away_score"]).astype(int) df["point_diff"] = df["home_score"] - df["away_score"] if "spread_line" not in df.columns: df["spread_line"] = 0.0 if "over_under_line" not in df.columns: df["over_under_line"] = df["point_diff"].abs().mean() # Rest days df = df.sort_values(["season", "week"]) rest_records = [] team_last_game = {} for _, row in df.iterrows(): ht, at = row["home_team"], row["away_team"] gd = row["game_date"] home_rest = (gd - team_last_game[ht]).days if ht in team_last_game else 7 away_rest = (gd - team_last_game[at]).days if at in team_last_game else 7 team_last_game[ht] = gd team_last_game[at] = gd rest_records.append({ "season": row["season"], "home_team": ht, "away_team": at, "home_rest_days": home_rest, "away_rest_days": away_rest }) rest_df = pd.DataFrame(rest_records) df = pd.merge(df, rest_df, on=["season", "home_team", "away_team"], how="left") # Travel distance df["travel_distance"] = [ haversine( team_coords[row["home_team"]]["latitude"], team_coords[row["home_team"]]["longitude"], team_coords[row["away_team"]]["latitude"], team_coords[row["away_team"]]["longitude"], ) if row["home_team"] in team_coords and row["away_team"] in team_coords else np.nan for _, row in df.iterrows() ] # Cover target df["home_cover"] = ((df["home_score"] - df["away_score"]) > (-df["spread_line"])).astype(int) # Rolling team stats records = [] for season in df["season"].unique(): season_df = df[df["season"] == season].copy() team_stats = {} for _, row in season_df.iterrows(): ht, at = row["home_team"], row["away_team"] hs, as_ = row["home_score"], row["away_score"] for t in [ht, at]: if t not in team_stats: team_stats[t] = {"wins": 0, "games": 0, "point_diff": 0} team_stats[ht]["games"] += 1 team_stats[at]["games"] += 1 team_stats[ht]["point_diff"] += (hs - as_) team_stats[at]["point_diff"] += (as_ - hs) if hs > as_: team_stats[ht]["wins"] += 1 else: team_stats[at]["wins"] += 1 records.append({ "season": season, "home_team": ht, "away_team": at, "home_win_pct": team_stats[ht]["wins"] / team_stats[ht]["games"], "away_win_pct": team_stats[at]["wins"] / team_stats[at]["games"], "home_point_diff": team_stats[ht]["point_diff"], "away_point_diff": team_stats[at]["point_diff"] }) df_features = pd.DataFrame(records) df = pd.merge(df, df_features, on=["season", "home_team", "away_team"], how="left") return games, df, current_season with st.spinner("Downloading NFL Data..."): games, df, current_season = load_nfl_data() # --- MODEL TRAINING - MATCH LOCAL SCRIPT (TRAIN/TEST SPLIT) --- @st.cache_resource def load_models(df): features = [ "home_win_pct", "away_win_pct", "home_point_diff", "away_point_diff", "home_rest_days", "away_rest_days", "travel_distance", "spread_line", "over_under_line" ] df_model = df.dropna(subset=features + ["home_win", "home_cover"]) X = df_model[features] y_win = df_model["home_win"] y_cover = df_model["home_cover"] X_train, X_test, y_train, y_test = train_test_split( X, y_win, test_size=0.25, random_state=42 ) Xc_train, Xc_test, yc_train, yc_test = train_test_split( X, y_cover, test_size=0.25, random_state=42 ) pipe_win = Pipeline([ ("scaler", StandardScaler()), ("rf", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)) ]) pipe_cover = Pipeline([ ("scaler", StandardScaler()), ("rf", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)) ]) pipe_win.fit(X_train, y_train) pipe_cover.fit(Xc_train, yc_train) win_acc = pipe_win.score(X_test, y_test) cover_acc = pipe_cover.score(Xc_test, yc_test) return pipe_win, pipe_cover, win_acc, cover_acc with st.spinner("Training models..."): model_win, model_cover, win_acc, cover_acc = load_models(df) # --- FEATURE COMPUTATION FOR A MATCHUP --- def compute_matchup_features(home_team, away_team): home_team = home_team.upper() away_team = away_team.upper() mask = (games["home_team"] == home_team) & (games["away_team"] == away_team) if not mask.any(): mask_hist = (df["home_team"] == home_team) & (df["away_team"] == away_team) if mask_hist.any(): game_row = df[mask_hist].iloc[-1] else: raise ValueError(f"No game found for {away_team} at {home_team}.") else: game_row = games[mask].iloc[-1] season = int(game_row["season"]) d_col = "game_date" if "game_date" in game_row.index else "gameday" game_date = pd.to_datetime(game_row[d_col]) past_games = df[(df["season"] == season) & (df["game_date"] < game_date)].copy() past_games = past_games.sort_values("game_date") team_stats = {} def init_team(t): if t not in team_stats: team_stats[t] = {"wins": 0, "games": 0, "point_diff": 0, "last_game": None} for _, row in past_games.iterrows(): ht, at = row["home_team"], row["away_team"] hs, as_ = row["home_score"], row["away_score"] gd = row["game_date"] for t in (ht, at): init_team(t) team_stats[t]["games"] += 1 team_stats[t]["last_game"] = gd team_stats[ht]["point_diff"] += (hs - as_) team_stats[at]["point_diff"] += (as_ - hs) if hs > as_: team_stats[ht]["wins"] += 1 else: team_stats[at]["wins"] += 1 def derive_features(team): if team not in team_stats or team_stats[team]["games"] == 0: return {"win_pct": 0.0, "point_diff": 0.0, "rest_days": 7.0} s = team_stats[team] win_pct = s["wins"] / s["games"] point_diff = s["point_diff"] rest_days = 7.0 if s["last_game"] is None else float( (game_date.normalize() - s["last_game"].normalize()).days ) return {"win_pct": win_pct, "point_diff": point_diff, "rest_days": rest_days} hs = derive_features(home_team) as_ = derive_features(away_team) td = 0.0 if home_team in team_coords and away_team in team_coords: td = haversine( team_coords[home_team]["latitude"], team_coords[home_team]["longitude"], team_coords[away_team]["latitude"], team_coords[away_team]["longitude"] ) return { "home_win_pct": hs["win_pct"], "away_win_pct": as_["win_pct"], "home_point_diff": hs["point_diff"], "away_point_diff": as_["point_diff"], "home_rest_days": hs["rest_days"], "away_rest_days": as_["rest_days"], "travel_distance": td } def get_single_game_stats(ht, at): try: f = compute_matchup_features(ht, at) except Exception: return None return f # ------------------------ # UI LAYOUT # ------------------------ tab1, tab2 = st.tabs(["Single Game Prediction", "Upcoming Week Dump"]) # TAB 1 - SINGLE GAME PREDICTION with tab1: st.subheader("Predict Single Game") col1, col2 = st.columns(2) with col1: home_team = st.selectbox("Home Team", sorted(TEAM_ABBR_MAP.keys())) with col2: away_team = st.selectbox("Away Team", sorted(TEAM_ABBR_MAP.keys()), index=1) if st.button("Analyze Matchup"): with st.spinner("Crunching numbers..."): stats = get_single_game_stats(home_team, away_team) if not stats: st.error("No schedule or historical game found for this matchup.") else: s_col1, s_col2 = st.columns(2) s_col1.info( f"**HOME ({home_team})**\n\n" f"Win%: {stats['home_win_pct']:.3f}\n\n" f"Diff: {stats['home_point_diff']:.1f}\n\n" f"Rest: {stats['home_rest_days']:.1f}" ) s_col2.info( f"**AWAY ({away_team})**\n\n" f"Win%: {stats['away_win_pct']:.3f}\n\n" f"Diff: {stats['away_point_diff']:.1f}\n\n" f"Rest: {stats['away_rest_days']:.1f}" ) odds = fetch_combined_odds(home_team, away_team) sp_val = odds.get("spread_line") if odds and odds.get("spread_line") is not None else 0.0 ou_val = odds.get("over_under_line") if odds and odds.get("over_under_line") is not None else 45.0 st.write("---") st.subheader("Betting Lines") c1, c2 = st.columns(2) spread_input = c1.number_input("Spread (Home)", value=float(sp_val)) total_input = c2.number_input("Total (O/U)", value=float(ou_val)) row = pd.DataFrame([stats]) row["spread_line"] = spread_input row["over_under_line"] = total_input win_prob = model_win.predict_proba(row)[0][1] cover_prob = model_cover.predict_proba(row)[0][1] st.success(f"**Win Probability:** {win_prob:.1%}") st.success(f"**Cover Probability:** {cover_prob:.1%}") # TAB 2 - UPCOMING WEEK DUMP WITH TERMINAL STYLE LOGS with tab2: st.subheader("Upcoming Week Predictions") log_box = st.empty() log_lines = [] def log(line, delay=0.35): log_lines.append(line) log_box.text("\n".join(log_lines)) time.sleep(delay) if st.button("Generate Report"): log("Environment check passed.") log("Downloading NFL schedule and historical data...") log("Calculating rest days...") log("Adding travel distances...") log("Building historical team records for training...") log("Training Random Forest models...") log(f"Models trained. Win Acc: {win_acc:.2f}, Cover Acc: {cover_acc:.2f}") log("") log("--- NFL 2.8 Menu ---") log("1. Single Game Prediction") log("2. Batch Prediction (Excel)") log("3. Dump Upcoming Week Data (Excel)") log("Q. Quit") log("Select: 3") log("") log("--- Option 3: Upcoming Week Data Dump & Prediction ---") d_col = "game_date" if "game_date" in games.columns else "gameday" games[d_col] = pd.to_datetime(games[d_col]) upcoming = games[games[d_col].dt.date >= datetime.date.today()].copy().sort_values(d_col) if upcoming.empty: st.warning("No upcoming games found.") log("No upcoming games found.") else: next_week = upcoming["week"].min() week_games = upcoming[upcoming["week"] == next_week] log(f"Processing {len(week_games)} games for Week {next_week}...") dump_data = [] progress_bar = st.progress(0) for i, (idx, row_g) in enumerate(week_games.iterrows()): ht = row_g["home_team"] at = row_g["away_team"] try: feats = compute_matchup_features(ht, at) except Exception: feats = { "home_win_pct": np.nan, "away_win_pct": np.nan, "home_point_diff": np.nan, "away_point_diff": np.nan, "home_rest_days": np.nan, "away_rest_days": np.nan, "travel_distance": np.nan } odds = fetch_combined_odds(ht, at) or {} sp_val = odds.get("spread_line") if odds.get("spread_line") is not None else 0.0 ou_val = odds.get("over_under_line") if odds.get("over_under_line") is not None else 45.0 input_row = pd.DataFrame([{ "home_win_pct": feats["home_win_pct"], "away_win_pct": feats["away_win_pct"], "home_point_diff": feats["home_point_diff"], "away_point_diff": feats["away_point_diff"], "home_rest_days": feats["home_rest_days"], "away_rest_days": feats["away_rest_days"], "travel_distance": feats["travel_distance"], "spread_line": sp_val, "over_under_line": ou_val }]).fillna(0.0) win_prob = model_win.predict_proba(input_row)[0][1] * 100 cover_prob = model_cover.predict_proba(input_row)[0][1] * 100 dump_data.append({ "Season": current_season, "Week": next_week, "Date": str(row_g[d_col].date()), "Home": ht, "Away": at, "Home Win Pct": feats["home_win_pct"], "Away Win Pct": feats["away_win_pct"], "Home Pt Diff": feats["home_point_diff"], "Away Pt Diff": feats["away_point_diff"], "Home Rest Days": feats["home_rest_days"], "Away Rest Days": feats["away_rest_days"], "Travel Distance": feats["travel_distance"], "Spread (Home)": odds.get("spread_line"), "Total (O/U)": odds.get("over_under_line"), "Home ML": odds.get("ml_home"), "Away ML": odds.get("ml_away"), "Model Win %": round(win_prob, 2), "Model Cover %": round(cover_prob, 2), }) progress_bar.progress((i + 1) / len(week_games)) result_df = pd.DataFrame(dump_data) st.write("### Upcoming Week Data Dump") st.dataframe(result_df) from io import BytesIO excel_buffer = BytesIO() result_df.to_excel(excel_buffer, index=False, sheet_name=f"Week_{next_week}") excel_buffer.seek(0) st.download_button( label=f"Download Week {next_week} Data Dump (Excel)", data=excel_buffer, file_name=f"NFL_Week_{next_week}_Data_Dump.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) csv_data = result_df.to_csv(index=False).encode("utf-8") st.download_button( label="Download Predictions CSV", data=csv_data, file_name="nfl_predictions.csv", mime="text/csv", )