f1-virtual-race / app.py
datamatters24's picture
Upload app.py with huggingface_hub
2333224 verified
"""Gradio app for Virtual Race Simulator β€” cross-era F1 what-if races.
Pick drivers from any era, choose a circuit, and run Monte Carlo simulations
to see who would win in a head-to-head battle across time.
"""
import os
from pathlib import Path
import gradio as gr
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import yaml
from huggingface_hub import hf_hub_download
# ── Download data from HF Hub ──────────────────────────────────────────
HF_TOKEN = os.environ.get("HF_TOKEN", "")
DATA_REPO = "datamatters24/f1-race-data"
CACHE_DIR = Path("/tmp/virtual-race-cache")
CACHE_DIR.mkdir(exist_ok=True)
def download_file(repo_id, filename, repo_type="dataset"):
local = CACHE_DIR / filename
if not local.exists():
hf_hub_download(
repo_id, filename,
repo_type=repo_type,
local_dir=str(CACHE_DIR),
token=HF_TOKEN or None,
)
return local
# ── Inline simulation engine (self-contained for HF Space) ─────────────
# Tyre compounds
COMPOUND_DATA = {
"SOFT": {"pace_offset": -0.8, "deg_rate": 0.08, "max_life": 25},
"MEDIUM": {"pace_offset": 0.0, "deg_rate": 0.04, "max_life": 35},
"HARD": {"pace_offset": 0.5, "deg_rate": 0.02, "max_life": 50},
"INTERMEDIATE": {"pace_offset": 3.0, "deg_rate": 0.03, "max_life": 40},
"WET": {"pace_offset": 8.0, "deg_rate": 0.02, "max_life": 50},
}
SAFETY_CAR_LAP_TIME = 110.0
def tyre_degradation(compound, tyre_age, track_deg):
data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
deg = data["deg_rate"] * track_deg * tyre_age
max_life = data["max_life"]
if tyre_age > max_life * 0.8:
cliff = ((tyre_age - max_life * 0.8) / (max_life * 0.2)) ** 2 * 2.0
deg += cliff
return deg
def compound_pace_offset(compound, is_raining):
data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
offset = data["pace_offset"]
if is_raining and compound in ("SOFT", "MEDIUM", "HARD"):
offset += 8.0
elif not is_raining and compound in ("INTERMEDIATE", "WET"):
offset += 15.0
return offset
def should_pit(tyre_age, compound, lap, n_laps, track_deg, is_raining, pit_loss, rng):
data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
laps_remaining = n_laps - lap
if laps_remaining <= 5:
return False
if lap < 5 and not is_raining:
return False
if is_raining and compound in ("SOFT", "MEDIUM", "HARD"):
return rng.random() < 0.6
if not is_raining and compound in ("INTERMEDIATE", "WET"):
return rng.random() < 0.7
deg_cost = sum(tyre_degradation(compound, tyre_age + i, track_deg) for i in range(1, 6))
fresh_cost = sum(tyre_degradation("MEDIUM", i, track_deg) for i in range(1, 6))
if deg_cost - fresh_cost > pit_loss * 0.8:
return True
if tyre_age >= data["max_life"]:
return True
return False
def choose_compound(lap, n_laps, is_raining, used):
if is_raining:
return "INTERMEDIATE"
laps_remaining = n_laps - lap
dry_used = [c for c in used if c in ("SOFT", "MEDIUM", "HARD")]
if laps_remaining <= 20:
return "SOFT" if "SOFT" not in dry_used else "MEDIUM"
elif laps_remaining <= 35:
return "MEDIUM" if "MEDIUM" not in dry_used else "HARD"
return "HARD" if "HARD" not in dry_used else "MEDIUM"
def simulate_race_batch(drivers_df, track, n_sims=1000, seed=42):
"""Run Monte Carlo simulation. Returns results DataFrame."""
rng = np.random.default_rng(seed)
n = len(drivers_df)
names = drivers_df["name"].tolist()
win_counts = np.zeros(n)
podium_counts = np.zeros(n)
pos_sums = np.zeros(n)
finish_counts = np.zeros(n)
dnf_counts = np.zeros(n)
# Pre-extract profiles
profiles = []
for _, d in drivers_df.iterrows():
profiles.append({
"quali": d.get("quali_dominance", 0.5),
"pace": d.get("race_pace", 0.5),
"consistency": d.get("consistency", 0.5),
"wet": d.get("wet_mastery", 0.5),
"overtaking": d.get("overtaking", 0.5),
})
sc_prob = track["safety_car_prob"] / track["laps"]
for _ in range(n_sims):
srng = np.random.default_rng(rng.integers(0, 2**32))
# Qualifying
quali_scores = []
for i, p in enumerate(profiles):
score = p["quali"] * track["quali_importance"] + p["pace"] * (1 - track["quali_importance"])
score += srng.normal(0, 0.08)
quali_scores.append((i, score))
quali_scores.sort(key=lambda x: -x[1])
positions = [idx for idx, _ in quali_scores]
# State
compounds = ["MEDIUM"] * n
tyre_ages = [0] * n
total_times = [0.0] * n
dnf = [False] * n
compounds_used = [["MEDIUM"] for _ in range(n)]
pit_stops_count = [0] * n
sc_active = False
sc_remaining = 0
is_raining = False
for lap in range(1, track["laps"] + 1):
# Safety car
if sc_active:
sc_remaining -= 1
if sc_remaining <= 0:
sc_active = False
elif lap > 1 and srng.random() < sc_prob:
sc_active = True
sc_remaining = srng.integers(2, 6)
# Rain
rain_per_lap = track["rain_prob"] / track["laps"] * 3
if not is_raining and srng.random() < rain_per_lap:
is_raining = True
elif is_raining and srng.random() < 0.2:
is_raining = False
lap_times = {}
for driver_idx in positions:
if dnf[driver_idx]:
continue
# DNF check
reliability = 0.7 + profiles[driver_idx]["consistency"] * 0.3
dnf_prob = 0.0005 + (1 - reliability) * 0.003
if lap > track["laps"] * 0.5:
dnf_prob *= 1.5
if srng.random() < dnf_prob:
dnf[driver_idx] = True
continue
# Pit stop
if should_pit(tyre_ages[driver_idx], compounds[driver_idx], lap,
track["laps"], track["tire_deg"], is_raining,
track["pit_loss"], srng):
new_c = choose_compound(lap, track["laps"], is_raining, compounds_used[driver_idx])
compounds[driver_idx] = new_c
compounds_used[driver_idx].append(new_c)
tyre_ages[driver_idx] = 0
pit_stops_count[driver_idx] += 1
# Lap time
if sc_active:
lt = SAFETY_CAR_LAP_TIME + srng.normal(0, 0.1)
else:
p = profiles[driver_idx]
lt = 90.0 + (1 - p["pace"]) * 4.0
lt += compound_pace_offset(compounds[driver_idx], is_raining)
lt += tyre_degradation(compounds[driver_idx], tyre_ages[driver_idx], track["tire_deg"])
if is_raining:
lt += 5.0 * (1 - p["wet"] * 0.6)
noise = 0.3 + (1 - p["consistency"]) * 0.7
lt += srng.normal(0, noise)
lt = max(lt, 60.0)
if pit_stops_count[driver_idx] > 0 and tyre_ages[driver_idx] == 0:
lt += track["pit_loss"]
lap_times[driver_idx] = lt
total_times[driver_idx] += lt
tyre_ages[driver_idx] += 1
# Overtaking (simplified)
if not sc_active:
active = [p for p in positions if not dnf[p]]
for i in range(len(active) - 1, 0, -1):
att, defn = active[i], active[i-1]
if att in lap_times and defn in lap_times:
delta = lap_times[defn] - lap_times[att]
if delta > 0:
prob = (1 - np.exp(-delta * 1.5)) * (1 - track["overtaking_diff"] * 0.8)
if is_raining:
prob *= 1.3
prob *= 0.8 + profiles[att]["overtaking"] * 0.4
if srng.random() < min(prob, 0.95):
active[i-1], active[i] = active[i], active[i-1]
positions = active + [p for p in positions if dnf[p]]
# Record
for pos, idx in enumerate(positions):
if not dnf[idx]:
fp = pos + 1
pos_sums[idx] += fp
finish_counts[idx] += 1
if fp == 1:
win_counts[idx] += 1
if fp <= 3:
podium_counts[idx] += 1
else:
dnf_counts[idx] += 1
results = pd.DataFrame({
"Driver": names,
"Win %": (win_counts / n_sims * 100).round(1),
"Podium %": (podium_counts / n_sims * 100).round(1),
"Avg Finish": np.where(finish_counts > 0, (pos_sums / finish_counts).round(1), n),
"DNF %": (dnf_counts / n_sims * 100).round(1),
}).sort_values("Win %", ascending=False).reset_index(drop=True)
return results
# ── Load data ───────────────────────────────────────────────────────────
try:
profiles_path = download_file(DATA_REPO, "driver_profiles.parquet")
ALL_PROFILES = pd.read_parquet(profiles_path)
except Exception:
ALL_PROFILES = pd.read_parquet("data/features/driver_profiles.parquet")
# Build driver list for dropdown
DRIVER_CHOICES = ALL_PROFILES.sort_values("overall_rating", ascending=False)["name"].tolist()
# Load tracks
try:
tracks_path = download_file(DATA_REPO, "tracks.yaml")
with open(tracks_path) as f:
TRACKS_RAW = yaml.safe_load(f)["tracks"]
except Exception:
with open("config/tracks.yaml") as f:
TRACKS_RAW = yaml.safe_load(f)["tracks"]
TRACK_CHOICES = {v["name"]: k for k, v in TRACKS_RAW.items()}
def parse_track(track_name):
key = TRACK_CHOICES[track_name]
t = TRACKS_RAW[key]
return {
"name": t["name"],
"laps": t["laps"],
"overtaking_diff": t["overtaking_difficulty"],
"pit_loss": t["pit_time_loss_seconds"],
"safety_car_prob": t["safety_car_probability"],
"tire_deg": t["tire_degradation_multiplier"],
"rain_prob": t["rain_probability"],
"quali_importance": t["qualifying_importance"],
}
def find_drivers(names):
selected = []
for name in names:
match = ALL_PROFILES[ALL_PROFILES["name"] == name]
if not match.empty:
selected.append(match.iloc[0])
return pd.DataFrame(selected)
# ── Gradio interface ────────────────────────────────────────────────────
def run_simulation(driver_names, track_name, n_sims):
if len(driver_names) < 2:
return None, None, "Select at least 2 drivers."
n_sims = int(n_sims)
drivers = find_drivers(driver_names)
if len(drivers) < 2:
return None, None, "Could not find enough valid drivers."
track = parse_track(track_name)
results = simulate_race_batch(drivers, track, n_sims=n_sims, seed=np.random.randint(0, 100000))
# Win probability bar chart
fig = go.Figure()
colors = ["#e10600" if i == 0 else "#333" for i in range(len(results))]
fig.add_trace(go.Bar(
x=results["Driver"],
y=results["Win %"],
marker_color=colors,
text=[f"{v:.1f}%" for v in results["Win %"]],
textposition="outside",
))
fig.update_layout(
title=f"Win Probability β€” {track['name']} ({n_sims:,} simulations)",
yaxis_title="Win %",
template="plotly_dark",
height=400,
margin=dict(t=60, b=40),
font=dict(family="monospace"),
)
# Format summary text
summary = f"**Virtual Race at {track['name']}** ({n_sims:,} simulations)\n\n"
summary += f"| Driver | Win % | Podium % | Avg Finish | DNF % |\n"
summary += f"|--------|-------|----------|------------|-------|\n"
for _, row in results.iterrows():
summary += f"| {row['Driver']} | {row['Win %']:.1f}% | {row['Podium %']:.1f}% | {row['Avg Finish']:.1f} | {row['DNF %']:.1f}% |\n"
return fig, results, summary
# ── Build app ───────────────────────────────────────────────────────────
# Default legendary grid
DEFAULT_DRIVERS = [
"Max Verstappen", "Lewis Hamilton", "Michael Schumacher",
"Ayrton Senna", "Alain Prost", "Sebastian Vettel",
"Fernando Alonso", "Mika HΓ€kkinen", "Nigel Mansell", "Juan Fangio",
]
# Filter to drivers that exist in our profiles
DEFAULT_DRIVERS = [d for d in DEFAULT_DRIVERS if d in DRIVER_CHOICES]
with gr.Blocks(
title="F1 Virtual Race Simulator",
theme=gr.themes.Base(primary_hue="red", neutral_hue="gray"),
css=".gradio-container { max-width: 960px !important; }"
) as app:
gr.Markdown(
"# F1 Virtual Race Simulator\n"
"Pick drivers from **any era** (1950-2025), choose a circuit, and run "
"Monte Carlo simulations to see who would win. Driver abilities are "
"normalized across eras using relative performance metrics."
)
with gr.Row():
driver_select = gr.Dropdown(
choices=DRIVER_CHOICES,
value=DEFAULT_DRIVERS,
multiselect=True,
label="Select Drivers (2-20)",
info="Search by name β€” all F1 drivers with 10+ races available",
)
with gr.Row():
track_select = gr.Dropdown(
choices=list(TRACK_CHOICES.keys()),
value="Monza",
label="Circuit",
)
sim_count = gr.Slider(
minimum=100, maximum=10000, value=1000, step=100,
label="Number of Simulations",
)
run_btn = gr.Button("Run Simulation", variant="primary")
chart = gr.Plot(label="Win Probability")
table = gr.DataFrame(label="Full Results")
summary = gr.Markdown()
run_btn.click(
fn=run_simulation,
inputs=[driver_select, track_select, sim_count],
outputs=[chart, table, summary],
)
if __name__ == "__main__":
app.launch()