Spaces:
Running
Running
| """Gradio app for Pipeline 1: Current Season Race Predictor. | |
| Loads XGBoost + LightGBM ensemble model + lightweight parquet lookups from HF Hub. | |
| No SQLite dependency — everything comes from the model repo. | |
| """ | |
| import json | |
| import os | |
| import tempfile | |
| from pathlib import Path | |
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| from huggingface_hub import hf_hub_download | |
| # --- Config --- | |
| MODEL_REPO = "datamatters24/f1-race-predictor-model" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| CACHE_DIR = Path(tempfile.gettempdir()) / "racetel_cache" | |
| CACHE_DIR.mkdir(exist_ok=True) | |
| # F1 points system (top 10) | |
| POINTS_SYSTEM = {1: 25, 2: 18, 3: 15, 4: 12, 5: 10, 6: 8, 7: 6, 8: 4, 9: 2, 10: 1} | |
| def dl(filename): | |
| return hf_hub_download(MODEL_REPO, filename, cache_dir=CACHE_DIR, token=HF_TOKEN) | |
| # --- Load everything at startup --- | |
| print("Loading model and data...") | |
| # Try ensemble first (XGB + LGBM blend), fall back to single XGBoost | |
| ENSEMBLE_CONFIG = None | |
| XGB_MODEL = None | |
| LGBM_MODEL = None | |
| try: | |
| ENSEMBLE_CONFIG = joblib.load(dl("ensemble_config.joblib")) | |
| XGB_MODEL = joblib.load(dl("race_winner_xgb_tuned.joblib")) | |
| LGBM_MODEL = joblib.load(dl("race_winner_lgbm_tuned.joblib")) | |
| print(f"Loaded ensemble: XGB weight={ENSEMBLE_CONFIG['xgb_weight']:.2f}, " | |
| f"LGBM weight={ENSEMBLE_CONFIG['lgbm_weight']:.2f}") | |
| except Exception as e: | |
| print(f"Ensemble not available ({e}), falling back to single XGBoost") | |
| try: | |
| XGB_MODEL = joblib.load(dl("race_winner_xgb_tuned.joblib")) | |
| except Exception: | |
| XGB_MODEL = joblib.load(dl("race_winner_xgb.joblib")) | |
| with open(dl("feature_metadata.json")) as f: | |
| META = json.load(f) | |
| # Use ensemble feature columns if available, else metadata | |
| if ENSEMBLE_CONFIG and "feature_columns" in ENSEMBLE_CONFIG: | |
| FEATURE_COLS = ENSEMBLE_CONFIG["feature_columns"] | |
| else: | |
| FEATURE_COLS = META["feature_columns"] | |
| RACES = pd.read_parquet(dl("data/races_lookup.parquet")) | |
| DRIVERS = pd.read_parquet(dl("data/drivers_lookup.parquet")) | |
| WINNERS = pd.read_parquet(dl("data/actual_winners.parquet")) | |
| TRAINING = pd.read_parquet(dl("data/training_dataset.parquet")) | |
| # Try loading constructors lookup | |
| try: | |
| CONSTRUCTORS = pd.read_parquet(dl("data/constructors_lookup.parquet")) | |
| CONSTRUCTOR_NAMES = dict(zip(CONSTRUCTORS["constructorId"], CONSTRUCTORS["name"])) | |
| except Exception: | |
| CONSTRUCTORS = pd.DataFrame() | |
| CONSTRUCTOR_NAMES = {} | |
| DRIVER_NAMES = dict(zip(DRIVERS["driverId"], DRIVERS["name"])) | |
| WINNER_MAP = dict(zip(WINNERS["raceId"], WINNERS["winner_code"])) | |
| # Build race dropdown choices | |
| RACE_CHOICES = [] | |
| RACE_IDS = {} | |
| for _, row in RACES.iterrows(): | |
| label = f"{row['year']} R{row['round']:02d} — {row['race_name']} ({row['country']})" | |
| RACE_CHOICES.append(label) | |
| RACE_IDS[label] = row["raceId"] | |
| # Build year choices for constructor tab | |
| YEAR_CHOICES = sorted(RACES["year"].unique().tolist(), reverse=True) | |
| print(f"Loaded: {len(RACE_CHOICES)} races, {len(DRIVER_NAMES)} drivers, " | |
| f"{len(CONSTRUCTOR_NAMES)} constructors, {len(TRAINING)} feature rows") | |
| def _get_probs(X): | |
| """Get win probabilities from ensemble or single model.""" | |
| if ENSEMBLE_CONFIG and XGB_MODEL and LGBM_MODEL: | |
| xgb_probs = XGB_MODEL.predict_proba(X)[:, 1] | |
| lgbm_probs = LGBM_MODEL.predict_proba(X)[:, 1] | |
| return (ENSEMBLE_CONFIG["xgb_weight"] * xgb_probs + | |
| ENSEMBLE_CONFIG["lgbm_weight"] * lgbm_probs) | |
| return XGB_MODEL.predict_proba(X)[:, 1] | |
| def predict_race(race_choice): | |
| """Generate predictions for selected race.""" | |
| if not race_choice or race_choice not in RACE_IDS: | |
| return None, pd.DataFrame({"Info": ["Select a race to see predictions"]}) | |
| race_id = RACE_IDS[race_choice] | |
| race_data = TRAINING[TRAINING["raceId"] == race_id].copy() | |
| if len(race_data) == 0: | |
| return None, pd.DataFrame({"Info": ["No feature data for this race"]}) | |
| # Predict | |
| X = race_data[FEATURE_COLS] | |
| race_data["win_prob"] = _get_probs(X) | |
| race_data["driver"] = race_data["driverId"].map(DRIVER_NAMES) | |
| race_data = race_data.sort_values("win_prob", ascending=False).reset_index(drop=True) | |
| # Normalize to sum to 100% | |
| total = race_data["win_prob"].sum() | |
| if total > 0: | |
| race_data["win_pct"] = race_data["win_prob"] / total * 100 | |
| else: | |
| race_data["win_pct"] = 0 | |
| # Actual winner | |
| actual = WINNER_MAP.get(race_id) | |
| actual_str = f"Winner: {actual}" if actual else "Not yet raced" | |
| # Chart | |
| top_n = min(10, len(race_data)) | |
| plot_df = race_data.head(top_n) | |
| colors = [] | |
| for i, (_, row) in enumerate(plot_df.iterrows()): | |
| driver_code = DRIVERS[DRIVERS["driverId"] == row["driverId"]]["code"].values | |
| code = driver_code[0] if len(driver_code) > 0 else "" | |
| if actual and code == actual: | |
| colors.append("#00d200") # green for actual winner | |
| elif i == 0: | |
| colors.append("#e10600") # red for predicted favorite | |
| else: | |
| colors.append("#1e1e1e") | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=plot_df["win_pct"].values, | |
| y=plot_df["driver"].values, | |
| orientation="h", | |
| marker_color=colors, | |
| text=[f"{p:.1f}%" for p in plot_df["win_pct"]], | |
| textposition="outside", | |
| textfont=dict(size=14), | |
| )) | |
| fig.update_layout( | |
| title=f"Win Probability | {actual_str}", | |
| xaxis_title="Win Probability (%)", | |
| yaxis=dict(autorange="reversed"), | |
| height=max(400, top_n * 50), | |
| margin=dict(l=10, r=80, t=50, b=40), | |
| plot_bgcolor="white", | |
| font=dict(family="Arial"), | |
| ) | |
| # Table | |
| tbl = race_data[["driver", "grid_position", "win_pct"]].head(20).copy() | |
| tbl.columns = ["Driver", "Grid", "Win %"] | |
| tbl["Grid"] = tbl["Grid"].astype(int) | |
| tbl["Win %"] = tbl["Win %"].apply(lambda x: f"{x:.1f}%") | |
| tbl.index = range(1, len(tbl) + 1) | |
| return fig, tbl | |
| def constructor_standings(year_choice): | |
| """Compute constructor championship projections for a season.""" | |
| if not CONSTRUCTOR_NAMES: | |
| return None, pd.DataFrame({"Info": ["Constructor data not available"]}) | |
| year = int(year_choice) | |
| season_data = TRAINING[(TRAINING["year"] == year) & (TRAINING["constructorId"].isin(CONSTRUCTOR_NAMES))].copy() | |
| if len(season_data) == 0: | |
| return None, pd.DataFrame({"Info": [f"No data for {year}"]}) | |
| race_ids = sorted(season_data["raceId"].unique()) | |
| # For each race, predict and compute expected points per constructor | |
| constructor_points = {cid: 0.0 for cid in CONSTRUCTOR_NAMES} | |
| actual_points = {cid: 0.0 for cid in CONSTRUCTOR_NAMES} | |
| races_with_results = 0 | |
| for race_id in race_ids: | |
| race_data = season_data[season_data["raceId"] == race_id].copy() | |
| if len(race_data) == 0: | |
| continue | |
| X = race_data[FEATURE_COLS] | |
| race_data["win_prob"] = _get_probs(X) | |
| # Rank by predicted probability | |
| race_data = race_data.sort_values("win_prob", ascending=False).reset_index(drop=True) | |
| # Assign expected points based on predicted finishing order | |
| for rank, (_, row) in enumerate(race_data.iterrows()): | |
| pos = rank + 1 | |
| cid = row["constructorId"] | |
| if cid in constructor_points and pos in POINTS_SYSTEM: | |
| constructor_points[cid] += POINTS_SYSTEM[pos] | |
| # Actual points from results | |
| has_winner = race_id in WINNER_MAP | |
| if has_winner: | |
| races_with_results += 1 | |
| for _, row in race_data.iterrows(): | |
| cid = row["constructorId"] | |
| finish = row.get("finish", None) | |
| if cid in actual_points and finish is not None and not np.isnan(finish): | |
| fp = int(finish) | |
| if fp in POINTS_SYSTEM: | |
| actual_points[cid] += POINTS_SYSTEM[fp] | |
| # Build results | |
| rows = [] | |
| for cid, name in CONSTRUCTOR_NAMES.items(): | |
| rows.append({ | |
| "Constructor": name, | |
| "Predicted Pts": round(constructor_points.get(cid, 0), 1), | |
| "Actual Pts": round(actual_points.get(cid, 0), 1), | |
| }) | |
| df = pd.DataFrame(rows).sort_values("Predicted Pts", ascending=False).reset_index(drop=True) | |
| # Chart | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| name="Predicted", | |
| x=df["Constructor"], | |
| y=df["Predicted Pts"], | |
| marker_color="#e10600", | |
| text=[f"{p:.0f}" for p in df["Predicted Pts"]], | |
| textposition="outside", | |
| )) | |
| if races_with_results > 0: | |
| fig.add_trace(go.Bar( | |
| name="Actual", | |
| x=df["Constructor"], | |
| y=df["Actual Pts"], | |
| marker_color="#00d200", | |
| text=[f"{p:.0f}" for p in df["Actual Pts"]], | |
| textposition="outside", | |
| )) | |
| fig.update_layout( | |
| title=f"{year} Constructor Championship — {len(race_ids)} races", | |
| yaxis_title="Points", | |
| barmode="group", | |
| height=450, | |
| margin=dict(t=60, b=40), | |
| plot_bgcolor="white", | |
| font=dict(family="Arial"), | |
| ) | |
| tbl = df.copy() | |
| tbl.index = range(1, len(tbl) + 1) | |
| return fig, tbl | |
| def load_live_predictions(): | |
| """Load live race predictions from HF Hub.""" | |
| try: | |
| # Use a short-lived local cache to avoid hammering HF on every refresh | |
| # force_download ensures we get the latest during a live race | |
| live_path = hf_hub_download( | |
| MODEL_REPO, "data/live_predictions.json", | |
| cache_dir=CACHE_DIR, token=HF_TOKEN, | |
| force_download=True, | |
| ) | |
| with open(live_path) as f: | |
| data = json.load(f) | |
| return data | |
| except Exception: | |
| # File may not exist yet (no race has been tracked) | |
| return None | |
| def live_race_display(): | |
| """Show live race predictions.""" | |
| data = load_live_predictions() | |
| if data is None: | |
| return (None, | |
| pd.DataFrame({"Info": ["No live data available. Check back during a race weekend."]}), | |
| "No live race data available.") | |
| predictions = data.get("predictions", []) | |
| if not predictions: | |
| return (None, | |
| pd.DataFrame({"Info": ["Waiting for race to start..."]}), | |
| f"Race: {data.get('race', '?')} | Status: waiting") | |
| race = data.get("race", "?") | |
| lap = data.get("current_lap", 0) | |
| total = data.get("total_laps", "?") | |
| status = data.get("status", "unknown") | |
| sc = " | SAFETY CAR" if data.get("safety_car") else "" | |
| ts = data.get("timestamp", "")[:19].replace("T", " ") | |
| status_text = (f"**{race}** | Lap {lap}/{total}{sc} | " | |
| f"Status: {status.upper()} | Updated: {ts} UTC") | |
| # Chart | |
| drivers = [p["driver"] for p in predictions[:10]] | |
| probs = [p["win_prob"] for p in predictions[:10]] | |
| colors = ["#e10600" if i == 0 else "#1e1e1e" for i in range(len(drivers))] | |
| # Mark DNFs in grey | |
| for i, p in enumerate(predictions[:10]): | |
| if p.get("status") == "DNF": | |
| colors[i] = "#555555" | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=probs, y=drivers, orientation="h", | |
| marker_color=colors, | |
| text=[f"{p:.1f}%" for p in probs], | |
| textposition="outside", | |
| textfont=dict(size=14), | |
| )) | |
| fig.update_layout( | |
| title=f"Live Win Probability | Lap {lap}/{total}", | |
| xaxis_title="Win Probability (%)", | |
| yaxis=dict(autorange="reversed"), | |
| height=max(400, len(drivers) * 50), | |
| margin=dict(l=10, r=80, t=50, b=40), | |
| plot_bgcolor="white", | |
| font=dict(family="Arial"), | |
| ) | |
| # Table | |
| rows = [] | |
| for p in predictions: | |
| rows.append({ | |
| "Driver": p["driver"], | |
| "Pos": p["position"], | |
| "Win %": f"{p['win_prob']:.1f}%", | |
| "Gap": f"+{p['gap']:.1f}s" if p.get("gap") else "-", | |
| "Tyre": f"{p.get('compound', '?')} L{p.get('tyre_age', '?')}", | |
| "Pits": p.get("pit_stops", 0), | |
| "Status": p.get("status", ""), | |
| }) | |
| tbl = pd.DataFrame(rows) | |
| tbl.index = range(1, len(tbl) + 1) | |
| return fig, tbl, status_text | |
| # --- UI --- | |
| with gr.Blocks( | |
| title="F1 Race Predictor", | |
| theme=gr.themes.Base(primary_hue="red", neutral_hue="slate"), | |
| ) as app: | |
| gr.Markdown("# F1 Race Predictor") | |
| gr.Markdown( | |
| "XGBoost + LightGBM ensemble trained on 2014-2023 F1 data | 470K+ telemetry laps | " | |
| "21 features | Optuna-tuned (200 trials) | " | |
| "[telemetrychaos.space](https://telemetrychaos.space)" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Live Race"): | |
| live_status = gr.Markdown("Loading live data...") | |
| live_chart = gr.Plot(label="Live Win Probabilities") | |
| live_table = gr.Dataframe(label="Race State", wrap=True) | |
| live_btn = gr.Button("Refresh", variant="primary") | |
| live_btn.click(live_race_display, outputs=[live_chart, live_table, live_status]) | |
| with gr.Tab("Race Winner"): | |
| with gr.Row(): | |
| race_dd = gr.Dropdown(choices=RACE_CHOICES, label="Select Race", | |
| value=RACE_CHOICES[0] if RACE_CHOICES else None) | |
| race_btn = gr.Button("Predict", variant="primary", scale=0) | |
| race_chart = gr.Plot(label="Win Probabilities") | |
| race_table = gr.Dataframe(label="Full Predictions", wrap=True) | |
| race_btn.click(predict_race, inputs=[race_dd], outputs=[race_chart, race_table]) | |
| race_dd.change(predict_race, inputs=[race_dd], outputs=[race_chart, race_table]) | |
| with gr.Tab("Constructor Championship"): | |
| with gr.Row(): | |
| year_dd = gr.Dropdown(choices=[str(y) for y in YEAR_CHOICES], | |
| label="Season", | |
| value=str(YEAR_CHOICES[0]) if YEAR_CHOICES else None) | |
| con_btn = gr.Button("Project", variant="primary", scale=0) | |
| con_chart = gr.Plot(label="Constructor Points") | |
| con_table = gr.Dataframe(label="Standings", wrap=True) | |
| con_btn.click(constructor_standings, inputs=[year_dd], outputs=[con_chart, con_table]) | |
| year_dd.change(constructor_standings, inputs=[year_dd], outputs=[con_chart, con_table]) | |
| app.load(predict_race, inputs=[race_dd], outputs=[race_chart, race_table]) | |
| if __name__ == "__main__": | |
| app.launch() | |