""" Hyrox Race Time Predictor — Gradio app for Hugging Face Spaces. Loads the demo regressor + demo classifier (trained without cluster features, so a real prospective athlete can fill out the form without having raced before) and predicts: 1. Expected total finish time in minutes 2. Probability of finishing under 90 minutes Required files in the Space repo: - app.py (this file) - requirements.txt - hyrox_demo_regressor.pkl (from notebook Part 6.5) - hyrox_demo_classifier.pkl (from notebook Part 6.5) - hyrox_demo_scaler.pkl (from notebook Part 6.5) - hyrox_demo_features.pkl (from notebook Part 6.5) - hyrox_demo_options.pkl (from notebook Part 6.5) """ import pickle from pathlib import Path import gradio as gr import numpy as np import pandas as pd # -------------------------------------------------------------------------- # Load model artifacts at startup # -------------------------------------------------------------------------- HERE = Path(__file__).parent with open(HERE / "hyrox_demo_regressor.pkl", "rb") as f: regressor = pickle.load(f) with open(HERE / "hyrox_demo_classifier.pkl", "rb") as f: classifier = pickle.load(f) with open(HERE / "hyrox_demo_scaler.pkl", "rb") as f: scaler = pickle.load(f) with open(HERE / "hyrox_demo_features.pkl", "rb") as f: feature_columns = pickle.load(f) with open(HERE / "hyrox_demo_options.pkl", "rb") as f: options = pickle.load(f) # Recreate the helpers the notebook used so a single-row prediction lines up def age_midpoint(s): try: parts = str(s).replace("+", "").split("-") if len(parts) == 2: return (int(parts[0]) + int(parts[1])) / 2 if len(parts) == 1: return float(parts[0]) except Exception: return np.nan return np.nan def format_minutes(total_minutes: float) -> str: """Convert a float number of minutes into 'Xh YYm ZZs'.""" total_seconds = int(round(total_minutes * 60)) h = total_seconds // 3600 m = (total_seconds % 3600) // 60 s = total_seconds % 60 if h: return f"{h}h {m:02d}m {s:02d}s" return f"{m}m {s:02d}s" # -------------------------------------------------------------------------- # Prediction function # -------------------------------------------------------------------------- def predict(gender, age_group, division, region, year, event_size): """Run a single prediction and return a Markdown summary card.""" # Build a one-row DataFrame matching what X_demo looked like before one-hot row = pd.DataFrame([{ "age_numeric": age_midpoint(age_group), "is_male": int(gender == "male"), "year": float(year), "event_size": float(event_size), "gender": gender, "age_group": age_group, "division": division, "region": region, }]) # One-hot encode and reindex to the exact training column set row_numeric = row[["age_numeric", "is_male", "year", "event_size"]] row_categorical = pd.get_dummies(row[["gender", "age_group", "division", "region"]], drop_first=True) X_one = pd.concat([row_numeric, row_categorical], axis=1) X_one = X_one.reindex(columns=feature_columns, fill_value=0) # Regression: predicted total time in seconds → minutes pred_seconds = float(regressor.predict(X_one)[0]) pred_minutes = pred_seconds / 60 # Classification: probability of finishing under 90 minutes X_one_scaled = scaler.transform(X_one) prob_under_90 = float(classifier.predict_proba(X_one_scaled)[0, 1]) # Headline verdict if pred_minutes < 90: verdict = "🏃 On pace to break 90 minutes" verdict_color = "#16a34a" # green elif pred_minutes < 100: verdict = "⏱️ Borderline — within striking distance of 90" verdict_color = "#ca8a04" # amber else: verdict = "🐢 Likely above 90 minutes" verdict_color = "#dc2626" # red delta = pred_minutes - 90 delta_str = f"+{delta:.1f} min over 90" if delta >= 0 else f"{delta:.1f} min under 90" # Build the output card as Markdown card = f""" ### Prediction