Spaces:
Running
Running
| """Gradio app for Virtual Race Simulator β cross-era F1 what-if races. | |
| Pick drivers from any era, choose a circuit, and run Monte Carlo simulations | |
| to see who would win in a head-to-head battle across time. | |
| """ | |
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import yaml | |
| from huggingface_hub import hf_hub_download | |
| # ββ Download data from HF Hub ββββββββββββββββββββββββββββββββββββββββββ | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| DATA_REPO = "datamatters24/f1-race-data" | |
| CACHE_DIR = Path("/tmp/virtual-race-cache") | |
| CACHE_DIR.mkdir(exist_ok=True) | |
| def download_file(repo_id, filename, repo_type="dataset"): | |
| local = CACHE_DIR / filename | |
| if not local.exists(): | |
| hf_hub_download( | |
| repo_id, filename, | |
| repo_type=repo_type, | |
| local_dir=str(CACHE_DIR), | |
| token=HF_TOKEN or None, | |
| ) | |
| return local | |
| # ββ Inline simulation engine (self-contained for HF Space) βββββββββββββ | |
| # Tyre compounds | |
| COMPOUND_DATA = { | |
| "SOFT": {"pace_offset": -0.8, "deg_rate": 0.08, "max_life": 25}, | |
| "MEDIUM": {"pace_offset": 0.0, "deg_rate": 0.04, "max_life": 35}, | |
| "HARD": {"pace_offset": 0.5, "deg_rate": 0.02, "max_life": 50}, | |
| "INTERMEDIATE": {"pace_offset": 3.0, "deg_rate": 0.03, "max_life": 40}, | |
| "WET": {"pace_offset": 8.0, "deg_rate": 0.02, "max_life": 50}, | |
| } | |
| SAFETY_CAR_LAP_TIME = 110.0 | |
| def tyre_degradation(compound, tyre_age, track_deg): | |
| data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"]) | |
| deg = data["deg_rate"] * track_deg * tyre_age | |
| max_life = data["max_life"] | |
| if tyre_age > max_life * 0.8: | |
| cliff = ((tyre_age - max_life * 0.8) / (max_life * 0.2)) ** 2 * 2.0 | |
| deg += cliff | |
| return deg | |
| def compound_pace_offset(compound, is_raining): | |
| data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"]) | |
| offset = data["pace_offset"] | |
| if is_raining and compound in ("SOFT", "MEDIUM", "HARD"): | |
| offset += 8.0 | |
| elif not is_raining and compound in ("INTERMEDIATE", "WET"): | |
| offset += 15.0 | |
| return offset | |
| def should_pit(tyre_age, compound, lap, n_laps, track_deg, is_raining, pit_loss, rng): | |
| data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"]) | |
| laps_remaining = n_laps - lap | |
| if laps_remaining <= 5: | |
| return False | |
| if lap < 5 and not is_raining: | |
| return False | |
| if is_raining and compound in ("SOFT", "MEDIUM", "HARD"): | |
| return rng.random() < 0.6 | |
| if not is_raining and compound in ("INTERMEDIATE", "WET"): | |
| return rng.random() < 0.7 | |
| deg_cost = sum(tyre_degradation(compound, tyre_age + i, track_deg) for i in range(1, 6)) | |
| fresh_cost = sum(tyre_degradation("MEDIUM", i, track_deg) for i in range(1, 6)) | |
| if deg_cost - fresh_cost > pit_loss * 0.8: | |
| return True | |
| if tyre_age >= data["max_life"]: | |
| return True | |
| return False | |
| def choose_compound(lap, n_laps, is_raining, used): | |
| if is_raining: | |
| return "INTERMEDIATE" | |
| laps_remaining = n_laps - lap | |
| dry_used = [c for c in used if c in ("SOFT", "MEDIUM", "HARD")] | |
| if laps_remaining <= 20: | |
| return "SOFT" if "SOFT" not in dry_used else "MEDIUM" | |
| elif laps_remaining <= 35: | |
| return "MEDIUM" if "MEDIUM" not in dry_used else "HARD" | |
| return "HARD" if "HARD" not in dry_used else "MEDIUM" | |
| def simulate_race_batch(drivers_df, track, n_sims=1000, seed=42): | |
| """Run Monte Carlo simulation. Returns results DataFrame.""" | |
| rng = np.random.default_rng(seed) | |
| n = len(drivers_df) | |
| names = drivers_df["name"].tolist() | |
| win_counts = np.zeros(n) | |
| podium_counts = np.zeros(n) | |
| pos_sums = np.zeros(n) | |
| finish_counts = np.zeros(n) | |
| dnf_counts = np.zeros(n) | |
| # Pre-extract profiles | |
| profiles = [] | |
| for _, d in drivers_df.iterrows(): | |
| profiles.append({ | |
| "quali": d.get("quali_dominance", 0.5), | |
| "pace": d.get("race_pace", 0.5), | |
| "consistency": d.get("consistency", 0.5), | |
| "wet": d.get("wet_mastery", 0.5), | |
| "overtaking": d.get("overtaking", 0.5), | |
| }) | |
| sc_prob = track["safety_car_prob"] / track["laps"] | |
| for _ in range(n_sims): | |
| srng = np.random.default_rng(rng.integers(0, 2**32)) | |
| # Qualifying | |
| quali_scores = [] | |
| for i, p in enumerate(profiles): | |
| score = p["quali"] * track["quali_importance"] + p["pace"] * (1 - track["quali_importance"]) | |
| score += srng.normal(0, 0.08) | |
| quali_scores.append((i, score)) | |
| quali_scores.sort(key=lambda x: -x[1]) | |
| positions = [idx for idx, _ in quali_scores] | |
| # State | |
| compounds = ["MEDIUM"] * n | |
| tyre_ages = [0] * n | |
| total_times = [0.0] * n | |
| dnf = [False] * n | |
| compounds_used = [["MEDIUM"] for _ in range(n)] | |
| pit_stops_count = [0] * n | |
| sc_active = False | |
| sc_remaining = 0 | |
| is_raining = False | |
| for lap in range(1, track["laps"] + 1): | |
| # Safety car | |
| if sc_active: | |
| sc_remaining -= 1 | |
| if sc_remaining <= 0: | |
| sc_active = False | |
| elif lap > 1 and srng.random() < sc_prob: | |
| sc_active = True | |
| sc_remaining = srng.integers(2, 6) | |
| # Rain | |
| rain_per_lap = track["rain_prob"] / track["laps"] * 3 | |
| if not is_raining and srng.random() < rain_per_lap: | |
| is_raining = True | |
| elif is_raining and srng.random() < 0.2: | |
| is_raining = False | |
| lap_times = {} | |
| for driver_idx in positions: | |
| if dnf[driver_idx]: | |
| continue | |
| # DNF check | |
| reliability = 0.7 + profiles[driver_idx]["consistency"] * 0.3 | |
| dnf_prob = 0.0005 + (1 - reliability) * 0.003 | |
| if lap > track["laps"] * 0.5: | |
| dnf_prob *= 1.5 | |
| if srng.random() < dnf_prob: | |
| dnf[driver_idx] = True | |
| continue | |
| # Pit stop | |
| if should_pit(tyre_ages[driver_idx], compounds[driver_idx], lap, | |
| track["laps"], track["tire_deg"], is_raining, | |
| track["pit_loss"], srng): | |
| new_c = choose_compound(lap, track["laps"], is_raining, compounds_used[driver_idx]) | |
| compounds[driver_idx] = new_c | |
| compounds_used[driver_idx].append(new_c) | |
| tyre_ages[driver_idx] = 0 | |
| pit_stops_count[driver_idx] += 1 | |
| # Lap time | |
| if sc_active: | |
| lt = SAFETY_CAR_LAP_TIME + srng.normal(0, 0.1) | |
| else: | |
| p = profiles[driver_idx] | |
| lt = 90.0 + (1 - p["pace"]) * 4.0 | |
| lt += compound_pace_offset(compounds[driver_idx], is_raining) | |
| lt += tyre_degradation(compounds[driver_idx], tyre_ages[driver_idx], track["tire_deg"]) | |
| if is_raining: | |
| lt += 5.0 * (1 - p["wet"] * 0.6) | |
| noise = 0.3 + (1 - p["consistency"]) * 0.7 | |
| lt += srng.normal(0, noise) | |
| lt = max(lt, 60.0) | |
| if pit_stops_count[driver_idx] > 0 and tyre_ages[driver_idx] == 0: | |
| lt += track["pit_loss"] | |
| lap_times[driver_idx] = lt | |
| total_times[driver_idx] += lt | |
| tyre_ages[driver_idx] += 1 | |
| # Overtaking (simplified) | |
| if not sc_active: | |
| active = [p for p in positions if not dnf[p]] | |
| for i in range(len(active) - 1, 0, -1): | |
| att, defn = active[i], active[i-1] | |
| if att in lap_times and defn in lap_times: | |
| delta = lap_times[defn] - lap_times[att] | |
| if delta > 0: | |
| prob = (1 - np.exp(-delta * 1.5)) * (1 - track["overtaking_diff"] * 0.8) | |
| if is_raining: | |
| prob *= 1.3 | |
| prob *= 0.8 + profiles[att]["overtaking"] * 0.4 | |
| if srng.random() < min(prob, 0.95): | |
| active[i-1], active[i] = active[i], active[i-1] | |
| positions = active + [p for p in positions if dnf[p]] | |
| # Record | |
| for pos, idx in enumerate(positions): | |
| if not dnf[idx]: | |
| fp = pos + 1 | |
| pos_sums[idx] += fp | |
| finish_counts[idx] += 1 | |
| if fp == 1: | |
| win_counts[idx] += 1 | |
| if fp <= 3: | |
| podium_counts[idx] += 1 | |
| else: | |
| dnf_counts[idx] += 1 | |
| results = pd.DataFrame({ | |
| "Driver": names, | |
| "Win %": (win_counts / n_sims * 100).round(1), | |
| "Podium %": (podium_counts / n_sims * 100).round(1), | |
| "Avg Finish": np.where(finish_counts > 0, (pos_sums / finish_counts).round(1), n), | |
| "DNF %": (dnf_counts / n_sims * 100).round(1), | |
| }).sort_values("Win %", ascending=False).reset_index(drop=True) | |
| return results | |
| # ββ Load data βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| try: | |
| profiles_path = download_file(DATA_REPO, "driver_profiles.parquet") | |
| ALL_PROFILES = pd.read_parquet(profiles_path) | |
| except Exception: | |
| ALL_PROFILES = pd.read_parquet("data/features/driver_profiles.parquet") | |
| # Build driver list for dropdown | |
| DRIVER_CHOICES = ALL_PROFILES.sort_values("overall_rating", ascending=False)["name"].tolist() | |
| # Load tracks | |
| try: | |
| tracks_path = download_file(DATA_REPO, "tracks.yaml") | |
| with open(tracks_path) as f: | |
| TRACKS_RAW = yaml.safe_load(f)["tracks"] | |
| except Exception: | |
| with open("config/tracks.yaml") as f: | |
| TRACKS_RAW = yaml.safe_load(f)["tracks"] | |
| TRACK_CHOICES = {v["name"]: k for k, v in TRACKS_RAW.items()} | |
| def parse_track(track_name): | |
| key = TRACK_CHOICES[track_name] | |
| t = TRACKS_RAW[key] | |
| return { | |
| "name": t["name"], | |
| "laps": t["laps"], | |
| "overtaking_diff": t["overtaking_difficulty"], | |
| "pit_loss": t["pit_time_loss_seconds"], | |
| "safety_car_prob": t["safety_car_probability"], | |
| "tire_deg": t["tire_degradation_multiplier"], | |
| "rain_prob": t["rain_probability"], | |
| "quali_importance": t["qualifying_importance"], | |
| } | |
| def find_drivers(names): | |
| selected = [] | |
| for name in names: | |
| match = ALL_PROFILES[ALL_PROFILES["name"] == name] | |
| if not match.empty: | |
| selected.append(match.iloc[0]) | |
| return pd.DataFrame(selected) | |
| # ββ Gradio interface ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_simulation(driver_names, track_name, n_sims): | |
| if len(driver_names) < 2: | |
| return None, None, "Select at least 2 drivers." | |
| n_sims = int(n_sims) | |
| drivers = find_drivers(driver_names) | |
| if len(drivers) < 2: | |
| return None, None, "Could not find enough valid drivers." | |
| track = parse_track(track_name) | |
| results = simulate_race_batch(drivers, track, n_sims=n_sims, seed=np.random.randint(0, 100000)) | |
| # Win probability bar chart | |
| fig = go.Figure() | |
| colors = ["#e10600" if i == 0 else "#333" for i in range(len(results))] | |
| fig.add_trace(go.Bar( | |
| x=results["Driver"], | |
| y=results["Win %"], | |
| marker_color=colors, | |
| text=[f"{v:.1f}%" for v in results["Win %"]], | |
| textposition="outside", | |
| )) | |
| fig.update_layout( | |
| title=f"Win Probability β {track['name']} ({n_sims:,} simulations)", | |
| yaxis_title="Win %", | |
| template="plotly_dark", | |
| height=400, | |
| margin=dict(t=60, b=40), | |
| font=dict(family="monospace"), | |
| ) | |
| # Format summary text | |
| summary = f"**Virtual Race at {track['name']}** ({n_sims:,} simulations)\n\n" | |
| summary += f"| Driver | Win % | Podium % | Avg Finish | DNF % |\n" | |
| summary += f"|--------|-------|----------|------------|-------|\n" | |
| for _, row in results.iterrows(): | |
| summary += f"| {row['Driver']} | {row['Win %']:.1f}% | {row['Podium %']:.1f}% | {row['Avg Finish']:.1f} | {row['DNF %']:.1f}% |\n" | |
| return fig, results, summary | |
| # ββ Build app βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Default legendary grid | |
| DEFAULT_DRIVERS = [ | |
| "Max Verstappen", "Lewis Hamilton", "Michael Schumacher", | |
| "Ayrton Senna", "Alain Prost", "Sebastian Vettel", | |
| "Fernando Alonso", "Mika HΓ€kkinen", "Nigel Mansell", "Juan Fangio", | |
| ] | |
| # Filter to drivers that exist in our profiles | |
| DEFAULT_DRIVERS = [d for d in DEFAULT_DRIVERS if d in DRIVER_CHOICES] | |
| with gr.Blocks( | |
| title="F1 Virtual Race Simulator", | |
| theme=gr.themes.Base(primary_hue="red", neutral_hue="gray"), | |
| css=".gradio-container { max-width: 960px !important; }" | |
| ) as app: | |
| gr.Markdown( | |
| "# F1 Virtual Race Simulator\n" | |
| "Pick drivers from **any era** (1950-2025), choose a circuit, and run " | |
| "Monte Carlo simulations to see who would win. Driver abilities are " | |
| "normalized across eras using relative performance metrics." | |
| ) | |
| with gr.Row(): | |
| driver_select = gr.Dropdown( | |
| choices=DRIVER_CHOICES, | |
| value=DEFAULT_DRIVERS, | |
| multiselect=True, | |
| label="Select Drivers (2-20)", | |
| info="Search by name β all F1 drivers with 10+ races available", | |
| ) | |
| with gr.Row(): | |
| track_select = gr.Dropdown( | |
| choices=list(TRACK_CHOICES.keys()), | |
| value="Monza", | |
| label="Circuit", | |
| ) | |
| sim_count = gr.Slider( | |
| minimum=100, maximum=10000, value=1000, step=100, | |
| label="Number of Simulations", | |
| ) | |
| run_btn = gr.Button("Run Simulation", variant="primary") | |
| chart = gr.Plot(label="Win Probability") | |
| table = gr.DataFrame(label="Full Results") | |
| summary = gr.Markdown() | |
| run_btn.click( | |
| fn=run_simulation, | |
| inputs=[driver_select, track_select, sim_count], | |
| outputs=[chart, table, summary], | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |