NFL3.1 / app.py
Airmanv's picture
Update app.py
8fd1fbd verified
import streamlit as st
import pandas as pd
import numpy as np
import nfl_data_py as nfl
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import joblib
import warnings
from math import radians, sin, cos, sqrt, atan2
import requests
import datetime
import os
import time
# --- PAGE CONFIG ---
st.set_page_config(page_title="Kevin's NFL Forecast Tool v3.1", page_icon="๐Ÿˆ")
# --- CENTER LOGO ---
logo_col1, logo_col2, logo_col3 = st.columns([1, 2, 1])
with logo_col2:
st.image("Airmanv.png", width=220)
# --- SINGLE-LINE CENTERED TITLE ---
st.markdown(
"""
<h1 style='text-align: center; color: white; font-size: 40px; margin-top: -10px;'>
๐Ÿˆ Kevin's NFL Forecast Tool v3.1
</h1>
""",
unsafe_allow_html=True
)
# --- API SETUP ---
ODDS_API_KEY = os.getenv("ODDS_API_KEY")
ODDS_API_URL = "https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds"
if not ODDS_API_KEY:
st.error("โš ๏ธ API Key not found. Please set ODDS_API_KEY in Settings > Secrets.")
# --- SHARED CONSTANTS ---
TEAM_ABBR_MAP = {
"ARI": "ARI", "ATL": "ATL", "BAL": "BAL", "BUF": "BUF", "CAR": "CAR", "CHI": "CHI",
"CIN": "CIN", "CLE": "CLE", "DAL": "DAL", "DEN": "DEN", "DET": "DET", "GB": "GB",
"HOU": "HOU", "IND": "IND", "JAX": "JAX", "KC": "KC", "LV": "LV", "LAC": "LAC",
"LAR": "LAR", "MIA": "MIA", "MIN": "MIN", "NE": "NE", "NO": "NO", "NYG": "NYG",
"NYJ": "NYJ", "PHI": "PHI", "PIT": "PIT", "SEA": "SEA", "SF": "SF", "TB": "TB",
"TEN": "TEN", "WAS": "WAS",
}
ODDS_API_TEAM_NAMES = {
"ARI": "Arizona Cardinals", "ATL": "Atlanta Falcons", "BAL": "Baltimore Ravens",
"BUF": "Buffalo Bills", "CAR": "Carolina Panthers", "CHI": "Chicago Bears",
"CIN": "Cincinnati Bengals", "CLE": "Cleveland Browns", "DAL": "Dallas Cowboys",
"DEN": "Denver Broncos", "DET": "Detroit Lions", "GB": "Green Bay Packers",
"HOU": "Houston Texans", "IND": "Indianapolis Colts", "JAX": "Jacksonville Jaguars",
"KC": "Kansas City Chiefs", "LA": "Los Angeles Rams", "LAC": "Los Angeles Chargers",
"LV": "Las Vegas Raiders", "MIA": "Miami Dolphins", "MIN": "Minnesota Vikings",
"NE": "New England Patriots", "NO": "New Orleans Saints", "NYG": "New York Giants",
"NYJ": "New York Jets", "PHI": "Philadelphia Eagles", "PIT": "Pittsburgh Steelers",
"SEA": "Seattle Seahawks", "SF": "San Francisco 49ers", "TB": "Tampa Bay Buccaneers",
"TEN": "Tennessee Titans", "WAS": "Washington Commanders",
}
stadiums = pd.DataFrame({
"team_abbr": ["ARI","ATL","BAL","BUF","CAR","CHI","CIN","CLE","DAL","DEN","DET","GB","HOU","IND","JAX","KC","LV","LAC","LAR","MIA","MIN","NE","NO","NYG","NYJ","PHI","PIT","SEA","SF","TB","TEN","WAS"],
"latitude": [33.5275,33.7550,39.2787,42.7737,35.2258,41.8625,39.0954,41.5061,32.7473,39.7439,42.3400,44.5013,29.6847,39.7601,30.3240,39.0490,36.0909,33.9535,34.0140,25.9580,44.9740,42.0909,29.9511,40.8128,40.8135,39.9008,40.4469,47.5952,37.4030,27.9759,36.1663,38.9078],
"longitude": [-112.2626,-84.3915,-76.6227,-78.7868,-80.8528,-87.6167,-84.5160,-81.6995,-97.0945,-105.0201,-83.0458,-88.0622,-95.4107,-86.1637,-81.6377,-94.4839,-115.1833,-118.3391,-118.2879,-80.2389,-93.2577,-71.2643,-90.0812,-74.0743,-74.0743,-75.1675,-80.0158,-122.3316,-122.0829,-82.5034,-86.7713,-77.0074]
})
team_coords = stadiums.set_index("team_abbr")[["latitude", "longitude"]].to_dict("index")
_odds_cache = {}
# --- HELPER: HAVERSINE ---
def haversine(lat1, lon1, lat2, lon2):
R = 3958.8
dlat = radians(lat2 - lat1)
dlon = radians(lon2 - lon1)
a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
return 2 * R * atan2(sqrt(a), sqrt(1 - a))
# --- ODDS FUNCTIONS (PORTED FROM LOCAL SCRIPT) ---
def fetch_oddsapi_draftkings_odds(home_team_abbr, away_team_abbr):
"""Fetch odds from The Odds API (DraftKings-style), similar to local script."""
home_abbr = home_team_abbr.upper()
away_abbr = away_team_abbr.upper()
cache_key = ("ODDS_API_DK", home_abbr, away_abbr)
if cache_key in _odds_cache:
return _odds_cache[cache_key]
home_full = ODDS_API_TEAM_NAMES.get(home_abbr)
away_full = ODDS_API_TEAM_NAMES.get(away_abbr)
if not home_full or not away_full:
return None
params = {
"apiKey": ODDS_API_KEY,
"regions": "us",
"markets": "h2h,spreads,totals",
"oddsFormat": "american",
}
try:
resp = requests.get(ODDS_API_URL, params=params, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
except Exception:
return None
game_obj = None
for g in data:
if g.get("home_team") == home_full and g.get("away_team") == away_full:
game_obj = g
break
if game_obj is None:
return None
bookmakers = game_obj.get("bookmakers", [])
dk = None
for b in bookmakers:
title = (b.get("title") or "").lower()
if b.get("key") == "draftkings" or title.startswith("draftkings"):
dk = b
break
if dk is None and bookmakers:
dk = bookmakers[0]
spread_line = None
over_under_line = None
ml_home = None
ml_away = None
if dk:
for m in dk.get("markets", []):
mkey = m.get("key")
outcomes = m.get("outcomes", [])
if mkey == "spreads":
for o in outcomes:
if o.get("name") == home_full:
try:
spread_line = float(o.get("point"))
except Exception:
pass
elif mkey == "totals":
for o in outcomes:
name = (o.get("name") or "").lower()
if name.startswith("over"):
try:
over_under_line = float(o.get("point"))
except Exception:
pass
elif mkey == "h2h":
for o in outcomes:
if o.get("name") == home_full:
ml_home = o.get("price")
elif o.get("name") == away_full:
ml_away = o.get("price")
result = {
"spread_line": spread_line,
"over_under_line": over_under_line,
"ml_home": ml_home,
"ml_away": ml_away,
}
_odds_cache[cache_key] = result
return result
def fetch_combined_odds(home_team_abbr, away_team_abbr):
odds = fetch_oddsapi_draftkings_odds(home_team_abbr, away_team_abbr)
if odds is not None:
return odds
return None
# --- DATA LOAD FUNCTION ---
@st.cache_data
def load_nfl_data():
today_dt = datetime.date.today()
current_season = today_dt.year if today_dt.month >= 9 else today_dt.year - 1
games = nfl.import_schedules([current_season])
historical_games = nfl.import_schedules(range(2010, current_season + 1))
date_col = "game_date" if "game_date" in historical_games.columns else "gameday"
cols_to_keep = [
"game_id", "season", "week", "home_team", "away_team",
"home_score", "away_score", date_col
]
for col in ["spread_line", "over_under_line", "over_under_line_close"]:
if col in historical_games.columns:
cols_to_keep.append(col)
df = historical_games[cols_to_keep].copy()
df = df.dropna(subset=["home_score", "away_score"])
df["game_date"] = pd.to_datetime(df[date_col])
df["home_win"] = (df["home_score"] > df["away_score"]).astype(int)
df["point_diff"] = df["home_score"] - df["away_score"]
if "spread_line" not in df.columns:
df["spread_line"] = 0.0
if "over_under_line" not in df.columns:
df["over_under_line"] = df["point_diff"].abs().mean()
# Rest days
df = df.sort_values(["season", "week"])
rest_records = []
team_last_game = {}
for _, row in df.iterrows():
ht, at = row["home_team"], row["away_team"]
gd = row["game_date"]
home_rest = (gd - team_last_game[ht]).days if ht in team_last_game else 7
away_rest = (gd - team_last_game[at]).days if at in team_last_game else 7
team_last_game[ht] = gd
team_last_game[at] = gd
rest_records.append({
"season": row["season"],
"home_team": ht,
"away_team": at,
"home_rest_days": home_rest,
"away_rest_days": away_rest
})
rest_df = pd.DataFrame(rest_records)
df = pd.merge(df, rest_df, on=["season", "home_team", "away_team"], how="left")
# Travel distance
df["travel_distance"] = [
haversine(
team_coords[row["home_team"]]["latitude"],
team_coords[row["home_team"]]["longitude"],
team_coords[row["away_team"]]["latitude"],
team_coords[row["away_team"]]["longitude"],
)
if row["home_team"] in team_coords and row["away_team"] in team_coords
else np.nan
for _, row in df.iterrows()
]
# Cover target
df["home_cover"] = ((df["home_score"] - df["away_score"]) > (-df["spread_line"])).astype(int)
# Rolling team stats
records = []
for season in df["season"].unique():
season_df = df[df["season"] == season].copy()
team_stats = {}
for _, row in season_df.iterrows():
ht, at = row["home_team"], row["away_team"]
hs, as_ = row["home_score"], row["away_score"]
for t in [ht, at]:
if t not in team_stats:
team_stats[t] = {"wins": 0, "games": 0, "point_diff": 0}
team_stats[ht]["games"] += 1
team_stats[at]["games"] += 1
team_stats[ht]["point_diff"] += (hs - as_)
team_stats[at]["point_diff"] += (as_ - hs)
if hs > as_:
team_stats[ht]["wins"] += 1
else:
team_stats[at]["wins"] += 1
records.append({
"season": season,
"home_team": ht,
"away_team": at,
"home_win_pct": team_stats[ht]["wins"] / team_stats[ht]["games"],
"away_win_pct": team_stats[at]["wins"] / team_stats[at]["games"],
"home_point_diff": team_stats[ht]["point_diff"],
"away_point_diff": team_stats[at]["point_diff"]
})
df_features = pd.DataFrame(records)
df = pd.merge(df, df_features, on=["season", "home_team", "away_team"], how="left")
return games, df, current_season
with st.spinner("Downloading NFL Data..."):
games, df, current_season = load_nfl_data()
# --- MODEL TRAINING - MATCH LOCAL SCRIPT (TRAIN/TEST SPLIT) ---
@st.cache_resource
def load_models(df):
features = [
"home_win_pct", "away_win_pct",
"home_point_diff", "away_point_diff",
"home_rest_days", "away_rest_days",
"travel_distance", "spread_line", "over_under_line"
]
df_model = df.dropna(subset=features + ["home_win", "home_cover"])
X = df_model[features]
y_win = df_model["home_win"]
y_cover = df_model["home_cover"]
X_train, X_test, y_train, y_test = train_test_split(
X, y_win, test_size=0.25, random_state=42
)
Xc_train, Xc_test, yc_train, yc_test = train_test_split(
X, y_cover, test_size=0.25, random_state=42
)
pipe_win = Pipeline([
("scaler", StandardScaler()),
("rf", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
])
pipe_cover = Pipeline([
("scaler", StandardScaler()),
("rf", RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
])
pipe_win.fit(X_train, y_train)
pipe_cover.fit(Xc_train, yc_train)
win_acc = pipe_win.score(X_test, y_test)
cover_acc = pipe_cover.score(Xc_test, yc_test)
return pipe_win, pipe_cover, win_acc, cover_acc
with st.spinner("Training models..."):
model_win, model_cover, win_acc, cover_acc = load_models(df)
# --- FEATURE COMPUTATION FOR A MATCHUP ---
def compute_matchup_features(home_team, away_team):
home_team = home_team.upper()
away_team = away_team.upper()
mask = (games["home_team"] == home_team) & (games["away_team"] == away_team)
if not mask.any():
mask_hist = (df["home_team"] == home_team) & (df["away_team"] == away_team)
if mask_hist.any():
game_row = df[mask_hist].iloc[-1]
else:
raise ValueError(f"No game found for {away_team} at {home_team}.")
else:
game_row = games[mask].iloc[-1]
season = int(game_row["season"])
d_col = "game_date" if "game_date" in game_row.index else "gameday"
game_date = pd.to_datetime(game_row[d_col])
past_games = df[(df["season"] == season) & (df["game_date"] < game_date)].copy()
past_games = past_games.sort_values("game_date")
team_stats = {}
def init_team(t):
if t not in team_stats:
team_stats[t] = {"wins": 0, "games": 0, "point_diff": 0, "last_game": None}
for _, row in past_games.iterrows():
ht, at = row["home_team"], row["away_team"]
hs, as_ = row["home_score"], row["away_score"]
gd = row["game_date"]
for t in (ht, at):
init_team(t)
team_stats[t]["games"] += 1
team_stats[t]["last_game"] = gd
team_stats[ht]["point_diff"] += (hs - as_)
team_stats[at]["point_diff"] += (as_ - hs)
if hs > as_:
team_stats[ht]["wins"] += 1
else:
team_stats[at]["wins"] += 1
def derive_features(team):
if team not in team_stats or team_stats[team]["games"] == 0:
return {"win_pct": 0.0, "point_diff": 0.0, "rest_days": 7.0}
s = team_stats[team]
win_pct = s["wins"] / s["games"]
point_diff = s["point_diff"]
rest_days = 7.0 if s["last_game"] is None else float(
(game_date.normalize() - s["last_game"].normalize()).days
)
return {"win_pct": win_pct, "point_diff": point_diff, "rest_days": rest_days}
hs = derive_features(home_team)
as_ = derive_features(away_team)
td = 0.0
if home_team in team_coords and away_team in team_coords:
td = haversine(
team_coords[home_team]["latitude"], team_coords[home_team]["longitude"],
team_coords[away_team]["latitude"], team_coords[away_team]["longitude"]
)
return {
"home_win_pct": hs["win_pct"], "away_win_pct": as_["win_pct"],
"home_point_diff": hs["point_diff"], "away_point_diff": as_["point_diff"],
"home_rest_days": hs["rest_days"], "away_rest_days": as_["rest_days"],
"travel_distance": td
}
def get_single_game_stats(ht, at):
try:
f = compute_matchup_features(ht, at)
except Exception:
return None
return f
# ------------------------
# UI LAYOUT
# ------------------------
tab1, tab2 = st.tabs(["Single Game Prediction", "Upcoming Week Dump"])
# TAB 1 - SINGLE GAME PREDICTION
with tab1:
st.subheader("Predict Single Game")
col1, col2 = st.columns(2)
with col1:
home_team = st.selectbox("Home Team", sorted(TEAM_ABBR_MAP.keys()))
with col2:
away_team = st.selectbox("Away Team", sorted(TEAM_ABBR_MAP.keys()), index=1)
if st.button("Analyze Matchup"):
with st.spinner("Crunching numbers..."):
stats = get_single_game_stats(home_team, away_team)
if not stats:
st.error("No schedule or historical game found for this matchup.")
else:
s_col1, s_col2 = st.columns(2)
s_col1.info(
f"**HOME ({home_team})**\n\n"
f"Win%: {stats['home_win_pct']:.3f}\n\n"
f"Diff: {stats['home_point_diff']:.1f}\n\n"
f"Rest: {stats['home_rest_days']:.1f}"
)
s_col2.info(
f"**AWAY ({away_team})**\n\n"
f"Win%: {stats['away_win_pct']:.3f}\n\n"
f"Diff: {stats['away_point_diff']:.1f}\n\n"
f"Rest: {stats['away_rest_days']:.1f}"
)
odds = fetch_combined_odds(home_team, away_team)
sp_val = odds.get("spread_line") if odds and odds.get("spread_line") is not None else 0.0
ou_val = odds.get("over_under_line") if odds and odds.get("over_under_line") is not None else 45.0
st.write("---")
st.subheader("Betting Lines")
c1, c2 = st.columns(2)
spread_input = c1.number_input("Spread (Home)", value=float(sp_val))
total_input = c2.number_input("Total (O/U)", value=float(ou_val))
row = pd.DataFrame([stats])
row["spread_line"] = spread_input
row["over_under_line"] = total_input
win_prob = model_win.predict_proba(row)[0][1]
cover_prob = model_cover.predict_proba(row)[0][1]
st.success(f"**Win Probability:** {win_prob:.1%}")
st.success(f"**Cover Probability:** {cover_prob:.1%}")
# TAB 2 - UPCOMING WEEK DUMP WITH TERMINAL STYLE LOGS
with tab2:
st.subheader("Upcoming Week Predictions")
log_box = st.empty()
log_lines = []
def log(line, delay=0.35):
log_lines.append(line)
log_box.text("\n".join(log_lines))
time.sleep(delay)
if st.button("Generate Report"):
log("Environment check passed.")
log("Downloading NFL schedule and historical data...")
log("Calculating rest days...")
log("Adding travel distances...")
log("Building historical team records for training...")
log("Training Random Forest models...")
log(f"Models trained. Win Acc: {win_acc:.2f}, Cover Acc: {cover_acc:.2f}")
log("")
log("--- NFL 2.8 Menu ---")
log("1. Single Game Prediction")
log("2. Batch Prediction (Excel)")
log("3. Dump Upcoming Week Data (Excel)")
log("Q. Quit")
log("Select: 3")
log("")
log("--- Option 3: Upcoming Week Data Dump & Prediction ---")
d_col = "game_date" if "game_date" in games.columns else "gameday"
games[d_col] = pd.to_datetime(games[d_col])
upcoming = games[games[d_col].dt.date >= datetime.date.today()].copy().sort_values(d_col)
if upcoming.empty:
st.warning("No upcoming games found.")
log("No upcoming games found.")
else:
next_week = upcoming["week"].min()
week_games = upcoming[upcoming["week"] == next_week]
log(f"Processing {len(week_games)} games for Week {next_week}...")
dump_data = []
progress_bar = st.progress(0)
for i, (idx, row_g) in enumerate(week_games.iterrows()):
ht = row_g["home_team"]
at = row_g["away_team"]
try:
feats = compute_matchup_features(ht, at)
except Exception:
feats = {
"home_win_pct": np.nan, "away_win_pct": np.nan,
"home_point_diff": np.nan, "away_point_diff": np.nan,
"home_rest_days": np.nan, "away_rest_days": np.nan,
"travel_distance": np.nan
}
odds = fetch_combined_odds(ht, at) or {}
sp_val = odds.get("spread_line") if odds.get("spread_line") is not None else 0.0
ou_val = odds.get("over_under_line") if odds.get("over_under_line") is not None else 45.0
input_row = pd.DataFrame([{
"home_win_pct": feats["home_win_pct"],
"away_win_pct": feats["away_win_pct"],
"home_point_diff": feats["home_point_diff"],
"away_point_diff": feats["away_point_diff"],
"home_rest_days": feats["home_rest_days"],
"away_rest_days": feats["away_rest_days"],
"travel_distance": feats["travel_distance"],
"spread_line": sp_val,
"over_under_line": ou_val
}]).fillna(0.0)
win_prob = model_win.predict_proba(input_row)[0][1] * 100
cover_prob = model_cover.predict_proba(input_row)[0][1] * 100
dump_data.append({
"Season": current_season,
"Week": next_week,
"Date": str(row_g[d_col].date()),
"Home": ht,
"Away": at,
"Home Win Pct": feats["home_win_pct"],
"Away Win Pct": feats["away_win_pct"],
"Home Pt Diff": feats["home_point_diff"],
"Away Pt Diff": feats["away_point_diff"],
"Home Rest Days": feats["home_rest_days"],
"Away Rest Days": feats["away_rest_days"],
"Travel Distance": feats["travel_distance"],
"Spread (Home)": odds.get("spread_line"),
"Total (O/U)": odds.get("over_under_line"),
"Home ML": odds.get("ml_home"),
"Away ML": odds.get("ml_away"),
"Model Win %": round(win_prob, 2),
"Model Cover %": round(cover_prob, 2),
})
progress_bar.progress((i + 1) / len(week_games))
result_df = pd.DataFrame(dump_data)
st.write("### Upcoming Week Data Dump")
st.dataframe(result_df)
from io import BytesIO
excel_buffer = BytesIO()
result_df.to_excel(excel_buffer, index=False, sheet_name=f"Week_{next_week}")
excel_buffer.seek(0)
st.download_button(
label=f"Download Week {next_week} Data Dump (Excel)",
data=excel_buffer,
file_name=f"NFL_Week_{next_week}_Data_Dump.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
csv_data = result_df.to_csv(index=False).encode("utf-8")
st.download_button(
label="Download Predictions CSV",
data=csv_data,
file_name="nfl_predictions.csv",
mime="text/csv",
)