| """ |
| Hyrox Race Time Predictor β Gradio app for Hugging Face Spaces. |
| |
| Loads the demo regressor + demo classifier (trained without cluster features, |
| so a real prospective athlete can fill out the form without having raced before) |
| and predicts: |
| 1. Expected total finish time in minutes |
| 2. Probability of finishing under 90 minutes |
| |
| Required files in the Space repo: |
| - app.py (this file) |
| - requirements.txt |
| - hyrox_demo_regressor.pkl (from notebook Part 6.5) |
| - hyrox_demo_classifier.pkl (from notebook Part 6.5) |
| - hyrox_demo_scaler.pkl (from notebook Part 6.5) |
| - hyrox_demo_features.pkl (from notebook Part 6.5) |
| - hyrox_demo_options.pkl (from notebook Part 6.5) |
| """ |
|
|
| import pickle |
| from pathlib import Path |
|
|
| import gradio as gr |
| import numpy as np |
| import pandas as pd |
|
|
| |
| |
| |
| HERE = Path(__file__).parent |
|
|
| with open(HERE / "hyrox_demo_regressor.pkl", "rb") as f: |
| regressor = pickle.load(f) |
|
|
| with open(HERE / "hyrox_demo_classifier.pkl", "rb") as f: |
| classifier = pickle.load(f) |
|
|
| with open(HERE / "hyrox_demo_scaler.pkl", "rb") as f: |
| scaler = pickle.load(f) |
|
|
| with open(HERE / "hyrox_demo_features.pkl", "rb") as f: |
| feature_columns = pickle.load(f) |
|
|
| with open(HERE / "hyrox_demo_options.pkl", "rb") as f: |
| options = pickle.load(f) |
|
|
| |
| def age_midpoint(s): |
| try: |
| parts = str(s).replace("+", "").split("-") |
| if len(parts) == 2: |
| return (int(parts[0]) + int(parts[1])) / 2 |
| if len(parts) == 1: |
| return float(parts[0]) |
| except Exception: |
| return np.nan |
| return np.nan |
|
|
|
|
| def format_minutes(total_minutes: float) -> str: |
| """Convert a float number of minutes into 'Xh YYm ZZs'.""" |
| total_seconds = int(round(total_minutes * 60)) |
| h = total_seconds // 3600 |
| m = (total_seconds % 3600) // 60 |
| s = total_seconds % 60 |
| if h: |
| return f"{h}h {m:02d}m {s:02d}s" |
| return f"{m}m {s:02d}s" |
|
|
|
|
| |
| |
| |
| def predict(gender, age_group, division, region, year, event_size): |
| """Run a single prediction and return a Markdown summary card.""" |
| |
| row = pd.DataFrame([{ |
| "age_numeric": age_midpoint(age_group), |
| "is_male": int(gender == "male"), |
| "year": float(year), |
| "event_size": float(event_size), |
| "gender": gender, |
| "age_group": age_group, |
| "division": division, |
| "region": region, |
| }]) |
|
|
| |
| row_numeric = row[["age_numeric", "is_male", "year", "event_size"]] |
| row_categorical = pd.get_dummies(row[["gender", "age_group", "division", "region"]], |
| drop_first=True) |
| X_one = pd.concat([row_numeric, row_categorical], axis=1) |
| X_one = X_one.reindex(columns=feature_columns, fill_value=0) |
|
|
| |
| pred_seconds = float(regressor.predict(X_one)[0]) |
| pred_minutes = pred_seconds / 60 |
|
|
| |
| X_one_scaled = scaler.transform(X_one) |
| prob_under_90 = float(classifier.predict_proba(X_one_scaled)[0, 1]) |
|
|
| |
| if pred_minutes < 90: |
| verdict = "π On pace to break 90 minutes" |
| verdict_color = "#16a34a" |
| elif pred_minutes < 100: |
| verdict = "β±οΈ Borderline β within striking distance of 90" |
| verdict_color = "#ca8a04" |
| else: |
| verdict = "π’ Likely above 90 minutes" |
| verdict_color = "#dc2626" |
|
|
| delta = pred_minutes - 90 |
| delta_str = f"+{delta:.1f} min over 90" if delta >= 0 else f"{delta:.1f} min under 90" |
|
|
| |
| card = f""" |
| ### Prediction |
| |
| <div style="padding:14px 18px; border-radius:10px; background:#F9FAFB; border:1px solid #E5E7EB;"> |
| |
| **Predicted finish time:** <span style="font-size:1.4em;"><b>{format_minutes(pred_minutes)}</b></span> ({pred_minutes:.1f} min) |
| |
| **Probability of finishing under 90 minutes:** <span style="font-size:1.4em;"><b>{prob_under_90 * 100:.0f}%</b></span> |
| |
| **Margin vs. 90 min target:** {delta_str} |
| |
| <div style="margin-top:10px; padding:10px 14px; border-radius:8px; background:white; border-left:5px solid {verdict_color}; font-size:1.05em;"> |
| <b>{verdict}</b> |
| </div> |
| |
| </div> |
| |
| <sub>Predictions come from a Gradient Boosting regressor and a Logistic Regression classifier trained on ~92,000 Hyrox results. The demo models use demographics + event metadata only (no race-split features), so anyone can use this β but the production assignment regressor in the linked notebook is more accurate because it also uses athlete-archetype clusters derived from past race splits.</sub> |
| """ |
| return card |
|
|
|
|
| |
| |
| |
| custom_css = """ |
| .gradio-container {max-width: 920px; margin: auto;} |
| """ |
|
|
| with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Hyrox Race Time Predictor") as demo: |
| gr.Markdown( |
| """ |
| # Hyrox Race Time Predictor ποΈββοΈπ |
| Will you finish a Hyrox race in under 90 minutes? Fill in the form below β the model |
| will predict your expected total time and the probability you'll cross the line under |
| the 90-minute mark. |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| gender = gr.Dropdown( |
| choices=options["gender"], |
| value="male" if "male" in options["gender"] else options["gender"][0], |
| label="Gender", |
| ) |
| age_group = gr.Dropdown( |
| choices=options["age_group"], |
| value=options["age_group"][len(options["age_group"]) // 2], |
| label="Age group", |
| ) |
| division = gr.Dropdown( |
| choices=options["division"], |
| value=options["division"][0], |
| label="Division", |
| info="Open / Pro / Doubles / Relay etc.", |
| ) |
| region = gr.Dropdown( |
| choices=options["region"], |
| value="Europe" if "Europe" in options["region"] else options["region"][0], |
| label="Region", |
| ) |
| year = gr.Slider( |
| minimum=options["year_min"], |
| maximum=options["year_max"], |
| value=options["year_max"], |
| step=1, |
| label="Race year", |
| ) |
| event_size = gr.Slider( |
| minimum=100, |
| maximum=5000, |
| value=options["event_size_median"], |
| step=50, |
| label="Event size (number of athletes)", |
| info="Bigger flagship events tend to have slightly different field profiles.", |
| ) |
| predict_btn = gr.Button("Predict my finish time", variant="primary") |
|
|
| with gr.Column(): |
| output = gr.Markdown() |
|
|
| predict_btn.click( |
| fn=predict, |
| inputs=[gender, age_group, division, region, year, event_size], |
| outputs=output, |
| ) |
|
|
| gr.Markdown( |
| """ |
| --- |
| ### How this works |
| 1. Your inputs are encoded the same way the training data was (one-hot + numeric). |
| 2. A **Gradient Boosting regressor** estimates total race time in seconds. |
| 3. A **Logistic Regression classifier**, trained directly on the binary "did this athlete finish under 90 min?" label, estimates the probability shown. |
| 4. The two models were trained on ~92,000 cleaned Hyrox results from [jgug05/hyrox-results](https://www.kaggle.com/datasets/jgug05/hyrox-results). |
| |
| ### Caveats |
| - The probability calibration is only as good as the training data β Hyrox times have changed event-to-event and year-to-year. |
| - Cluster-based features (which capture athlete archetype from past race splits) were intentionally removed for this demo so the form stays usable. |
| """ |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|