Spaces:

datamatters24
/

f1-virtual-race

Running

App Files Files Community

f1-virtual-race / app.py

datamatters24

Upload app.py with huggingface_hub

2333224 verified about 1 month ago

raw

history blame contribute delete

14.7 kB

	"""Gradio app for Virtual Race Simulator — cross-era F1 what-if races.

	Pick drivers from any era, choose a circuit, and run Monte Carlo simulations
	to see who would win in a head-to-head battle across time.
	"""

	import os
	from pathlib import Path

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.graph_objects as go
	import yaml
	from huggingface_hub import hf_hub_download

	# ── Download data from HF Hub ──────────────────────────────────────────
	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	DATA_REPO = "datamatters24/f1-race-data"

	CACHE_DIR = Path("/tmp/virtual-race-cache")
	CACHE_DIR.mkdir(exist_ok=True)


	def download_file(repo_id, filename, repo_type="dataset"):
	local = CACHE_DIR / filename
	if not local.exists():
	hf_hub_download(
	repo_id, filename,
	repo_type=repo_type,
	local_dir=str(CACHE_DIR),
	token=HF_TOKEN or None,
	)
	return local


	# ── Inline simulation engine (self-contained for HF Space) ─────────────
	# Tyre compounds
	COMPOUND_DATA = {
	"SOFT": {"pace_offset": -0.8, "deg_rate": 0.08, "max_life": 25},
	"MEDIUM": {"pace_offset": 0.0, "deg_rate": 0.04, "max_life": 35},
	"HARD": {"pace_offset": 0.5, "deg_rate": 0.02, "max_life": 50},
	"INTERMEDIATE": {"pace_offset": 3.0, "deg_rate": 0.03, "max_life": 40},
	"WET": {"pace_offset": 8.0, "deg_rate": 0.02, "max_life": 50},
	}

	SAFETY_CAR_LAP_TIME = 110.0


	def tyre_degradation(compound, tyre_age, track_deg):
	data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
	deg = data["deg_rate"] * track_deg * tyre_age
	max_life = data["max_life"]
	if tyre_age > max_life * 0.8:
	cliff = ((tyre_age - max_life * 0.8) / (max_life * 0.2)) ** 2 * 2.0
	deg += cliff
	return deg


	def compound_pace_offset(compound, is_raining):
	data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
	offset = data["pace_offset"]
	if is_raining and compound in ("SOFT", "MEDIUM", "HARD"):
	offset += 8.0
	elif not is_raining and compound in ("INTERMEDIATE", "WET"):
	offset += 15.0
	return offset


	def should_pit(tyre_age, compound, lap, n_laps, track_deg, is_raining, pit_loss, rng):
	data = COMPOUND_DATA.get(compound, COMPOUND_DATA["MEDIUM"])
	laps_remaining = n_laps - lap
	if laps_remaining <= 5:
	return False
	if lap < 5 and not is_raining:
	return False
	if is_raining and compound in ("SOFT", "MEDIUM", "HARD"):
	return rng.random() < 0.6
	if not is_raining and compound in ("INTERMEDIATE", "WET"):
	return rng.random() < 0.7
	deg_cost = sum(tyre_degradation(compound, tyre_age + i, track_deg) for i in range(1, 6))
	fresh_cost = sum(tyre_degradation("MEDIUM", i, track_deg) for i in range(1, 6))
	if deg_cost - fresh_cost > pit_loss * 0.8:
	return True
	if tyre_age >= data["max_life"]:
	return True
	return False


	def choose_compound(lap, n_laps, is_raining, used):
	if is_raining:
	return "INTERMEDIATE"
	laps_remaining = n_laps - lap
	dry_used = [c for c in used if c in ("SOFT", "MEDIUM", "HARD")]
	if laps_remaining <= 20:
	return "SOFT" if "SOFT" not in dry_used else "MEDIUM"
	elif laps_remaining <= 35:
	return "MEDIUM" if "MEDIUM" not in dry_used else "HARD"
	return "HARD" if "HARD" not in dry_used else "MEDIUM"


	def simulate_race_batch(drivers_df, track, n_sims=1000, seed=42):
	"""Run Monte Carlo simulation. Returns results DataFrame."""
	rng = np.random.default_rng(seed)
	n = len(drivers_df)
	names = drivers_df["name"].tolist()

	win_counts = np.zeros(n)
	podium_counts = np.zeros(n)
	pos_sums = np.zeros(n)
	finish_counts = np.zeros(n)
	dnf_counts = np.zeros(n)

	# Pre-extract profiles
	profiles = []
	for _, d in drivers_df.iterrows():
	profiles.append({
	"quali": d.get("quali_dominance", 0.5),
	"pace": d.get("race_pace", 0.5),
	"consistency": d.get("consistency", 0.5),
	"wet": d.get("wet_mastery", 0.5),
	"overtaking": d.get("overtaking", 0.5),
	})

	sc_prob = track["safety_car_prob"] / track["laps"]

	for _ in range(n_sims):
	srng = np.random.default_rng(rng.integers(0, 2**32))

	# Qualifying
	quali_scores = []
	for i, p in enumerate(profiles):
	score = p["quali"] * track["quali_importance"] + p["pace"] * (1 - track["quali_importance"])
	score += srng.normal(0, 0.08)
	quali_scores.append((i, score))
	quali_scores.sort(key=lambda x: -x[1])
	positions = [idx for idx, _ in quali_scores]

	# State
	compounds = ["MEDIUM"] * n
	tyre_ages = [0] * n
	total_times = [0.0] * n
	dnf = [False] * n
	compounds_used = [["MEDIUM"] for _ in range(n)]
	pit_stops_count = [0] * n

	sc_active = False
	sc_remaining = 0
	is_raining = False

	for lap in range(1, track["laps"] + 1):
	# Safety car
	if sc_active:
	sc_remaining -= 1
	if sc_remaining <= 0:
	sc_active = False
	elif lap > 1 and srng.random() < sc_prob:
	sc_active = True
	sc_remaining = srng.integers(2, 6)

	# Rain
	rain_per_lap = track["rain_prob"] / track["laps"] * 3
	if not is_raining and srng.random() < rain_per_lap:
	is_raining = True
	elif is_raining and srng.random() < 0.2:
	is_raining = False

	lap_times = {}
	for driver_idx in positions:
	if dnf[driver_idx]:
	continue

	# DNF check
	reliability = 0.7 + profiles[driver_idx]["consistency"] * 0.3
	dnf_prob = 0.0005 + (1 - reliability) * 0.003
	if lap > track["laps"] * 0.5:
	dnf_prob *= 1.5
	if srng.random() < dnf_prob:
	dnf[driver_idx] = True
	continue

	# Pit stop
	if should_pit(tyre_ages[driver_idx], compounds[driver_idx], lap,
	track["laps"], track["tire_deg"], is_raining,
	track["pit_loss"], srng):
	new_c = choose_compound(lap, track["laps"], is_raining, compounds_used[driver_idx])
	compounds[driver_idx] = new_c
	compounds_used[driver_idx].append(new_c)
	tyre_ages[driver_idx] = 0
	pit_stops_count[driver_idx] += 1

	# Lap time
	if sc_active:
	lt = SAFETY_CAR_LAP_TIME + srng.normal(0, 0.1)
	else:
	p = profiles[driver_idx]
	lt = 90.0 + (1 - p["pace"]) * 4.0
	lt += compound_pace_offset(compounds[driver_idx], is_raining)
	lt += tyre_degradation(compounds[driver_idx], tyre_ages[driver_idx], track["tire_deg"])
	if is_raining:
	lt += 5.0 * (1 - p["wet"] * 0.6)
	noise = 0.3 + (1 - p["consistency"]) * 0.7
	lt += srng.normal(0, noise)
	lt = max(lt, 60.0)

	if pit_stops_count[driver_idx] > 0 and tyre_ages[driver_idx] == 0:
	lt += track["pit_loss"]

	lap_times[driver_idx] = lt
	total_times[driver_idx] += lt
	tyre_ages[driver_idx] += 1

	# Overtaking (simplified)
	if not sc_active:
	active = [p for p in positions if not dnf[p]]
	for i in range(len(active) - 1, 0, -1):
	att, defn = active[i], active[i-1]
	if att in lap_times and defn in lap_times:
	delta = lap_times[defn] - lap_times[att]
	if delta > 0:
	prob = (1 - np.exp(-delta * 1.5)) * (1 - track["overtaking_diff"] * 0.8)
	if is_raining:
	prob *= 1.3
	prob = 0.8 + profiles[att]["overtaking"] 0.4
	if srng.random() < min(prob, 0.95):
	active[i-1], active[i] = active[i], active[i-1]
	positions = active + [p for p in positions if dnf[p]]

	# Record
	for pos, idx in enumerate(positions):
	if not dnf[idx]:
	fp = pos + 1
	pos_sums[idx] += fp
	finish_counts[idx] += 1
	if fp == 1:
	win_counts[idx] += 1
	if fp <= 3:
	podium_counts[idx] += 1
	else:
	dnf_counts[idx] += 1

	results = pd.DataFrame({
	"Driver": names,
	"Win %": (win_counts / n_sims * 100).round(1),
	"Podium %": (podium_counts / n_sims * 100).round(1),
	"Avg Finish": np.where(finish_counts > 0, (pos_sums / finish_counts).round(1), n),
	"DNF %": (dnf_counts / n_sims * 100).round(1),
	}).sort_values("Win %", ascending=False).reset_index(drop=True)

	return results


	# ── Load data ───────────────────────────────────────────────────────────
	try:
	profiles_path = download_file(DATA_REPO, "driver_profiles.parquet")
	ALL_PROFILES = pd.read_parquet(profiles_path)
	except Exception:
	ALL_PROFILES = pd.read_parquet("data/features/driver_profiles.parquet")

	# Build driver list for dropdown
	DRIVER_CHOICES = ALL_PROFILES.sort_values("overall_rating", ascending=False)["name"].tolist()

	# Load tracks
	try:
	tracks_path = download_file(DATA_REPO, "tracks.yaml")
	with open(tracks_path) as f:
	TRACKS_RAW = yaml.safe_load(f)["tracks"]
	except Exception:
	with open("config/tracks.yaml") as f:
	TRACKS_RAW = yaml.safe_load(f)["tracks"]

	TRACK_CHOICES = {v["name"]: k for k, v in TRACKS_RAW.items()}


	def parse_track(track_name):
	key = TRACK_CHOICES[track_name]
	t = TRACKS_RAW[key]
	return {
	"name": t["name"],
	"laps": t["laps"],
	"overtaking_diff": t["overtaking_difficulty"],
	"pit_loss": t["pit_time_loss_seconds"],
	"safety_car_prob": t["safety_car_probability"],
	"tire_deg": t["tire_degradation_multiplier"],
	"rain_prob": t["rain_probability"],
	"quali_importance": t["qualifying_importance"],
	}


	def find_drivers(names):
	selected = []
	for name in names:
	match = ALL_PROFILES[ALL_PROFILES["name"] == name]
	if not match.empty:
	selected.append(match.iloc[0])
	return pd.DataFrame(selected)


	# ── Gradio interface ────────────────────────────────────────────────────
	def run_simulation(driver_names, track_name, n_sims):
	if len(driver_names) < 2:
	return None, None, "Select at least 2 drivers."

	n_sims = int(n_sims)
	drivers = find_drivers(driver_names)
	if len(drivers) < 2:
	return None, None, "Could not find enough valid drivers."

	track = parse_track(track_name)
	results = simulate_race_batch(drivers, track, n_sims=n_sims, seed=np.random.randint(0, 100000))

	# Win probability bar chart
	fig = go.Figure()
	colors = ["#e10600" if i == 0 else "#333" for i in range(len(results))]
	fig.add_trace(go.Bar(
	x=results["Driver"],
	y=results["Win %"],
	marker_color=colors,
	text=[f"{v:.1f}%" for v in results["Win %"]],
	textposition="outside",
	))
	fig.update_layout(
	title=f"Win Probability — {track['name']} ({n_sims:,} simulations)",
	yaxis_title="Win %",
	template="plotly_dark",
	height=400,
	margin=dict(t=60, b=40),
	font=dict(family="monospace"),
	)

	# Format summary text
	summary = f"Virtual Race at {track['name']} ({n_sims:,} simulations)\n\n"
	summary += f"\| Driver \| Win % \| Podium % \| Avg Finish \| DNF % \|\n"
	summary += f"\|--------\|-------\|----------\|------------\|-------\|\n"
	for _, row in results.iterrows():
	summary += f"\| {row['Driver']} \| {row['Win %']:.1f}% \| {row['Podium %']:.1f}% \| {row['Avg Finish']:.1f} \| {row['DNF %']:.1f}% \|\n"

	return fig, results, summary


	# ── Build app ───────────────────────────────────────────────────────────
	# Default legendary grid
	DEFAULT_DRIVERS = [
	"Max Verstappen", "Lewis Hamilton", "Michael Schumacher",
	"Ayrton Senna", "Alain Prost", "Sebastian Vettel",
	"Fernando Alonso", "Mika Häkkinen", "Nigel Mansell", "Juan Fangio",
	]
	# Filter to drivers that exist in our profiles
	DEFAULT_DRIVERS = [d for d in DEFAULT_DRIVERS if d in DRIVER_CHOICES]

	with gr.Blocks(
	title="F1 Virtual Race Simulator",
	theme=gr.themes.Base(primary_hue="red", neutral_hue="gray"),
	css=".gradio-container { max-width: 960px !important; }"
	) as app:
	gr.Markdown(
	"# F1 Virtual Race Simulator\n"
	"Pick drivers from any era (1950-2025), choose a circuit, and run "
	"Monte Carlo simulations to see who would win. Driver abilities are "
	"normalized across eras using relative performance metrics."
	)

	with gr.Row():
	driver_select = gr.Dropdown(
	choices=DRIVER_CHOICES,
	value=DEFAULT_DRIVERS,
	multiselect=True,
	label="Select Drivers (2-20)",
	info="Search by name — all F1 drivers with 10+ races available",
	)

	with gr.Row():
	track_select = gr.Dropdown(
	choices=list(TRACK_CHOICES.keys()),
	value="Monza",
	label="Circuit",
	)
	sim_count = gr.Slider(
	minimum=100, maximum=10000, value=1000, step=100,
	label="Number of Simulations",
	)

	run_btn = gr.Button("Run Simulation", variant="primary")

	chart = gr.Plot(label="Win Probability")
	table = gr.DataFrame(label="Full Results")
	summary = gr.Markdown()

	run_btn.click(
	fn=run_simulation,
	inputs=[driver_select, track_select, sim_count],
	outputs=[chart, table, summary],
	)

	if __name__ == "__main__":
	app.launch()