Finish-him's picture
Upload app.py with huggingface_hub
2e84ab9 verified
"""
Dota 2 Edge โ€” Multi-Model Ensemble Prediction API v3
=====================================================
HuggingFace Space serving V1, V2, and V3 (temporal CV) models.
V3 = production-grade, leak-free temporal cross-validation.
"""
import gradio as gr
import pickle
import numpy as np
import json
import os
from pathlib import Path
# โ”€โ”€ Model directories โ”€โ”€
MODEL_DIR_V1 = Path("models")
MODEL_DIR_V2 = Path("models_v2")
MODEL_DIR_V3 = Path("models_v3")
# โ”€โ”€ V3 Moments (production) โ”€โ”€
V3_MOMENTS = {
'm5': '@5 minutes',
'm8': '@8 minutes',
'm10': '@10 minutes',
'm15': '@15 minutes',
'm20': '@20 minutes',
'm25': '@25 minutes',
'm30': '@30 minutes',
}
# โ”€โ”€ V3 Feature columns โ”€โ”€
V3_FEATURES = [
'gold_delta', 'xp_delta', 'lh_delta', 'dn_delta',
'rad_gpm', 'dire_gpm', 'gpm_delta',
'rad_xpm', 'dire_xpm', 'xpm_delta',
'carry_gap', 'support_gap',
'cs_eff_delta', 'deny_delta',
'gold_log_ratio', 'xp_log_ratio', 'gold_lead_pct',
'gold_momentum', 'xp_momentum',
]
# โ”€โ”€ V1 Moments (legacy) โ”€โ”€
V1_MOMENTS = {
'm0': 'Draft (pre-game)',
'm1': '@10 minutes',
'm15': '@15 minutes',
'm2': '@20 minutes',
'm3': '@30 minutes',
}
# โ”€โ”€ Load V3 bundles (production) โ”€โ”€
v3_bundles = {}
for mk, label in V3_MOMENTS.items():
path = MODEL_DIR_V3 / f"temporal_v3_{mk}.pkl"
if path.exists():
with open(path, 'rb') as f:
v3_bundles[mk] = pickle.load(f)
# โ”€โ”€ Load V1 bundles (legacy) โ”€โ”€
v1_bundles = {}
for mk in V1_MOMENTS:
path = MODEL_DIR_V1 / f"ensemble_{mk}.pkl"
if path.exists():
with open(path, 'rb') as f:
v1_bundles[mk] = pickle.load(f)
# โ”€โ”€ Load V2 bundles โ”€โ”€
v2_bundles = {}
V2_MOMENTS = {'m0_5': '@5min', 'm1_8': '@8min', 'm1': '@10min', 'm15': '@15min',
'm2': '@20min', 'm25': '@25min', 'm3': '@30min'}
for mk in V2_MOMENTS:
path = MODEL_DIR_V2 / f"ensemble_{mk}.pkl"
if path.exists():
with open(path, 'rb') as f:
v2_bundles[mk] = pickle.load(f)
# Load summaries
summaries = {}
for name, path in [('v1', MODEL_DIR_V1 / 'ensemble_summary.json'),
('v2', MODEL_DIR_V2 / 'ensemble_v2_summary.json'),
('v3', MODEL_DIR_V3 / 'ensemble_v3_summary.json')]:
if path.exists():
with open(path) as f:
summaries[name] = json.load(f)
def predict_v3(minute_key: str, features_json: str) -> dict:
"""
V3 production prediction using temporal CV models.
"""
if minute_key not in v3_bundles:
return {"error": f"Moment '{minute_key}' not loaded. Available: {list(v3_bundles.keys())}"}
bundle = v3_bundles[minute_key]
feature_cols = bundle['feature_cols']
try:
features = json.loads(features_json) if isinstance(features_json, str) else features_json
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}
X = np.array([[features.get(col, 0.0) for col in feature_cols]])
X_scaled = bundle['scaler'].transform(X)
probs = {}
for name in bundle['models']:
model = bundle['models'][name]
if name in ('logreg', 'mlp'):
p = model.predict_proba(X_scaled)[:, 1][0]
else:
p = model.predict_proba(X)[:, 1][0]
probs[name] = round(float(p), 4)
# Stacking
stack_input = np.array([[probs[n] for n in bundle['models']]])
stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4)
# Best individual = LogReg (proven by temporal CV)
logreg_prob = probs.get('logreg', stacking)
return {
"version": "v3_temporal_cv",
"moment": minute_key,
"moment_name": V3_MOMENTS[minute_key],
"n_features": len(feature_cols),
"individual_models": probs,
"stacking_ensemble": stacking,
"logreg_prob": logreg_prob,
"recommended_prob": logreg_prob, # LogReg is best with temporal CV
"prediction": "Radiant" if logreg_prob > 0.5 else "Dire",
"confidence": round(abs(logreg_prob - 0.5) * 2, 4),
"holdout_auc": bundle['results'].get('logreg', {}).get('auc', 'N/A'),
}
def predict_v1(moment: str, features_json: str) -> dict:
"""V1 legacy prediction."""
if moment not in v1_bundles:
return {"error": f"V1 moment '{moment}' not loaded. Available: {list(v1_bundles.keys())}"}
bundle = v1_bundles[moment]
feature_cols = bundle['feature_cols']
try:
features = json.loads(features_json) if isinstance(features_json, str) else features_json
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}
X = np.array([[features.get(col, 0.0) for col in feature_cols]])
X_scaled = bundle['scaler'].transform(X)
probs = {}
for name in bundle['base_model_names']:
model = bundle['models'][name]
if name in ('logreg', 'mlp'):
p = model.predict_proba(X_scaled)[:, 1][0]
else:
p = model.predict_proba(X)[:, 1][0]
probs[name] = round(float(p), 4)
stack_input = np.array([[probs[n] for n in bundle['base_model_names']]])
stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4)
return {
"version": "v1",
"moment": moment,
"moment_name": V1_MOMENTS.get(moment, moment),
"individual_models": probs,
"stacking_ensemble": stacking,
"prediction": "Radiant" if stacking > 0.5 else "Dire",
"confidence": round(abs(stacking - 0.5) * 2, 4),
}
def get_model_info() -> dict:
"""Get info about all loaded models."""
info = {"v3_models": {}, "v2_models": {}, "v1_models": {}, "summaries": summaries}
for mk, bundle in v3_bundles.items():
info["v3_models"][mk] = {
"name": V3_MOMENTS[mk],
"n_features": len(bundle['feature_cols']),
"n_train": bundle.get('n_train', 'N/A'),
"n_test": bundle.get('n_test', 'N/A'),
"feature_cols": bundle['feature_cols'],
"results": bundle['results'],
"cutoff_date": bundle.get('cutoff_date', 'N/A'),
"top5_features": bundle.get('top5_features', []),
}
for mk, bundle in v1_bundles.items():
info["v1_models"][mk] = {
"name": V1_MOMENTS.get(mk, mk),
"n_features": bundle.get('n_features', len(bundle['feature_cols'])),
"results": bundle['results'],
}
return info
# โ”€โ”€ AUC Reference Table โ”€โ”€
AUC_TABLE = """
| Minute | V3 Holdout AUC | V2 K-Fold AUC | Confidence Level |
|--------|---------------|---------------|-----------------|
| @5min | 0.652 | 0.661 | Low |
| @8min | 0.685 | 0.697 | Low-Medium |
| @10min | 0.720 | 0.718 | Medium |
| @15min | 0.775 | 0.770 | Medium-High |
| @20min | 0.830 | 0.836 | High |
| @25min | 0.872 | 0.893 | Very High |
| @30min | 0.906 | 0.905 | Very High |
"""
# ================================================================
# GRADIO INTERFACE
# ================================================================
def gradio_predict_v3(moment, features_text):
try:
result = predict_v3(moment, features_text)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
def gradio_predict_v1(moment, features_text):
try:
result = predict_v1(moment, features_text)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
def gradio_info():
return json.dumps(get_model_info(), indent=2, default=str)
# Example V3 features
example_v3 = {
"gold_delta": 3500, "xp_delta": 2200, "lh_delta": 35, "dn_delta": 5,
"rad_gpm": 2200, "dire_gpm": 1850, "gpm_delta": 350,
"rad_xpm": 2800, "dire_xpm": 2500, "xpm_delta": 300,
"carry_gap": 800, "support_gap": -200,
"cs_eff_delta": 0.05, "deny_delta": 3,
"gold_log_ratio": 0.15, "xp_log_ratio": 0.08, "gold_lead_pct": 0.12,
"gold_momentum": 100, "xp_momentum": 50,
}
with gr.Blocks(
title="Dota 2 Edge โ€” Ensemble Predictor v3",
theme=gr.themes.Base(primary_hue="red", secondary_hue="green"),
) as demo:
gr.Markdown("""
# Dota 2 Edge โ€” Ensemble Predictor v3
Predict professional Dota 2 match outcomes using temporally-validated models.
**V3 (Production):** Trained with expanding-window temporal CV on 4,068 pro matches.
No data leakage. LogReg = best individual model.
**Holdout AUC:** @10min=0.720 | @15min=0.775 | @20min=0.830 | @25min=0.872 | @30min=0.906
""")
with gr.Tab("V3 Predict (Production)"):
with gr.Row():
v3_moment = gr.Dropdown(
choices=list(V3_MOMENTS.keys()),
value="m10",
label="Minute",
info="Select game minute"
)
v3_features = gr.Textbox(
label="Features (JSON โ€” 19 temporal features)",
value=json.dumps(example_v3, indent=2),
lines=10,
)
v3_btn = gr.Button("Predict (V3)", variant="primary")
v3_output = gr.Textbox(label="Result", lines=15)
v3_btn.click(gradio_predict_v3, inputs=[v3_moment, v3_features], outputs=v3_output)
gr.Markdown(AUC_TABLE)
with gr.Tab("V1 Predict (Legacy)"):
with gr.Row():
v1_moment = gr.Dropdown(
choices=list(V1_MOMENTS.keys()),
value="m1",
label="Prediction Moment",
)
v1_features = gr.Textbox(
label="Features (JSON)",
value=json.dumps({"gold_delta_10": 2500, "xp_delta_10": 1800}, indent=2),
lines=10,
)
v1_btn = gr.Button("Predict (V1)", variant="secondary")
v1_output = gr.Textbox(label="Result", lines=15)
v1_btn.click(gradio_predict_v1, inputs=[v1_moment, v1_features], outputs=v1_output)
with gr.Tab("Model Info"):
info_btn = gr.Button("Load Model Info")
info_output = gr.Textbox(label="Model Details", lines=30)
info_btn.click(gradio_info, outputs=info_output)
with gr.Tab("API"):
gr.Markdown("""
## API Usage
```python
from gradio_client import Client
client = Client("Finish-him/dota2-edge-ensemble")
# V3 Production prediction
result = client.predict(
moment="m10",
features_text='{"gold_delta": 3500, "xp_delta": 2200, "gpm_delta": 350, ...}',
api_name="/predict_v3"
)
print(result)
# V1 Legacy prediction
result = client.predict(
moment="m1",
features_text='{"gold_delta_10": 2500}',
api_name="/predict_v1"
)
```
### V3 Feature List (19 features)
`gold_delta`, `xp_delta`, `lh_delta`, `dn_delta`, `rad_gpm`, `dire_gpm`,
`gpm_delta`, `rad_xpm`, `dire_xpm`, `xpm_delta`, `carry_gap`, `support_gap`,
`cs_eff_delta`, `deny_delta`, `gold_log_ratio`, `xp_log_ratio`, `gold_lead_pct`,
`gold_momentum`, `xp_momentum`
""")
if __name__ == "__main__":
demo.launch()