""" Dota 2 Edge — Multi-Model Ensemble Prediction API v3 ===================================================== HuggingFace Space serving V1, V2, and V3 (temporal CV) models. V3 = production-grade, leak-free temporal cross-validation. """ import gradio as gr import pickle import numpy as np import json import os from pathlib import Path # ── Model directories ── MODEL_DIR_V1 = Path("models") MODEL_DIR_V2 = Path("models_v2") MODEL_DIR_V3 = Path("models_v3") # ── V3 Moments (production) ── V3_MOMENTS = { 'm5': '@5 minutes', 'm8': '@8 minutes', 'm10': '@10 minutes', 'm15': '@15 minutes', 'm20': '@20 minutes', 'm25': '@25 minutes', 'm30': '@30 minutes', } # ── V3 Feature columns ── V3_FEATURES = [ 'gold_delta', 'xp_delta', 'lh_delta', 'dn_delta', 'rad_gpm', 'dire_gpm', 'gpm_delta', 'rad_xpm', 'dire_xpm', 'xpm_delta', 'carry_gap', 'support_gap', 'cs_eff_delta', 'deny_delta', 'gold_log_ratio', 'xp_log_ratio', 'gold_lead_pct', 'gold_momentum', 'xp_momentum', ] # ── V1 Moments (legacy) ── V1_MOMENTS = { 'm0': 'Draft (pre-game)', 'm1': '@10 minutes', 'm15': '@15 minutes', 'm2': '@20 minutes', 'm3': '@30 minutes', } # ── Load V3 bundles (production) ── v3_bundles = {} for mk, label in V3_MOMENTS.items(): path = MODEL_DIR_V3 / f"temporal_v3_{mk}.pkl" if path.exists(): with open(path, 'rb') as f: v3_bundles[mk] = pickle.load(f) # ── Load V1 bundles (legacy) ── v1_bundles = {} for mk in V1_MOMENTS: path = MODEL_DIR_V1 / f"ensemble_{mk}.pkl" if path.exists(): with open(path, 'rb') as f: v1_bundles[mk] = pickle.load(f) # ── Load V2 bundles ── v2_bundles = {} V2_MOMENTS = {'m0_5': '@5min', 'm1_8': '@8min', 'm1': '@10min', 'm15': '@15min', 'm2': '@20min', 'm25': '@25min', 'm3': '@30min'} for mk in V2_MOMENTS: path = MODEL_DIR_V2 / f"ensemble_{mk}.pkl" if path.exists(): with open(path, 'rb') as f: v2_bundles[mk] = pickle.load(f) # Load summaries summaries = {} for name, path in [('v1', MODEL_DIR_V1 / 'ensemble_summary.json'), ('v2', MODEL_DIR_V2 / 'ensemble_v2_summary.json'), ('v3', MODEL_DIR_V3 / 'ensemble_v3_summary.json')]: if path.exists(): with open(path) as f: summaries[name] = json.load(f) def predict_v3(minute_key: str, features_json: str) -> dict: """ V3 production prediction using temporal CV models. """ if minute_key not in v3_bundles: return {"error": f"Moment '{minute_key}' not loaded. Available: {list(v3_bundles.keys())}"} bundle = v3_bundles[minute_key] feature_cols = bundle['feature_cols'] try: features = json.loads(features_json) if isinstance(features_json, str) else features_json except json.JSONDecodeError as e: return {"error": f"Invalid JSON: {e}"} X = np.array([[features.get(col, 0.0) for col in feature_cols]]) X_scaled = bundle['scaler'].transform(X) probs = {} for name in bundle['models']: model = bundle['models'][name] if name in ('logreg', 'mlp'): p = model.predict_proba(X_scaled)[:, 1][0] else: p = model.predict_proba(X)[:, 1][0] probs[name] = round(float(p), 4) # Stacking stack_input = np.array([[probs[n] for n in bundle['models']]]) stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4) # Best individual = LogReg (proven by temporal CV) logreg_prob = probs.get('logreg', stacking) return { "version": "v3_temporal_cv", "moment": minute_key, "moment_name": V3_MOMENTS[minute_key], "n_features": len(feature_cols), "individual_models": probs, "stacking_ensemble": stacking, "logreg_prob": logreg_prob, "recommended_prob": logreg_prob, # LogReg is best with temporal CV "prediction": "Radiant" if logreg_prob > 0.5 else "Dire", "confidence": round(abs(logreg_prob - 0.5) * 2, 4), "holdout_auc": bundle['results'].get('logreg', {}).get('auc', 'N/A'), } def predict_v1(moment: str, features_json: str) -> dict: """V1 legacy prediction.""" if moment not in v1_bundles: return {"error": f"V1 moment '{moment}' not loaded. Available: {list(v1_bundles.keys())}"} bundle = v1_bundles[moment] feature_cols = bundle['feature_cols'] try: features = json.loads(features_json) if isinstance(features_json, str) else features_json except json.JSONDecodeError as e: return {"error": f"Invalid JSON: {e}"} X = np.array([[features.get(col, 0.0) for col in feature_cols]]) X_scaled = bundle['scaler'].transform(X) probs = {} for name in bundle['base_model_names']: model = bundle['models'][name] if name in ('logreg', 'mlp'): p = model.predict_proba(X_scaled)[:, 1][0] else: p = model.predict_proba(X)[:, 1][0] probs[name] = round(float(p), 4) stack_input = np.array([[probs[n] for n in bundle['base_model_names']]]) stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4) return { "version": "v1", "moment": moment, "moment_name": V1_MOMENTS.get(moment, moment), "individual_models": probs, "stacking_ensemble": stacking, "prediction": "Radiant" if stacking > 0.5 else "Dire", "confidence": round(abs(stacking - 0.5) * 2, 4), } def get_model_info() -> dict: """Get info about all loaded models.""" info = {"v3_models": {}, "v2_models": {}, "v1_models": {}, "summaries": summaries} for mk, bundle in v3_bundles.items(): info["v3_models"][mk] = { "name": V3_MOMENTS[mk], "n_features": len(bundle['feature_cols']), "n_train": bundle.get('n_train', 'N/A'), "n_test": bundle.get('n_test', 'N/A'), "feature_cols": bundle['feature_cols'], "results": bundle['results'], "cutoff_date": bundle.get('cutoff_date', 'N/A'), "top5_features": bundle.get('top5_features', []), } for mk, bundle in v1_bundles.items(): info["v1_models"][mk] = { "name": V1_MOMENTS.get(mk, mk), "n_features": bundle.get('n_features', len(bundle['feature_cols'])), "results": bundle['results'], } return info # ── AUC Reference Table ── AUC_TABLE = """ | Minute | V3 Holdout AUC | V2 K-Fold AUC | Confidence Level | |--------|---------------|---------------|-----------------| | @5min | 0.652 | 0.661 | Low | | @8min | 0.685 | 0.697 | Low-Medium | | @10min | 0.720 | 0.718 | Medium | | @15min | 0.775 | 0.770 | Medium-High | | @20min | 0.830 | 0.836 | High | | @25min | 0.872 | 0.893 | Very High | | @30min | 0.906 | 0.905 | Very High | """ # ================================================================ # GRADIO INTERFACE # ================================================================ def gradio_predict_v3(moment, features_text): try: result = predict_v3(moment, features_text) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) def gradio_predict_v1(moment, features_text): try: result = predict_v1(moment, features_text) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) def gradio_info(): return json.dumps(get_model_info(), indent=2, default=str) # Example V3 features example_v3 = { "gold_delta": 3500, "xp_delta": 2200, "lh_delta": 35, "dn_delta": 5, "rad_gpm": 2200, "dire_gpm": 1850, "gpm_delta": 350, "rad_xpm": 2800, "dire_xpm": 2500, "xpm_delta": 300, "carry_gap": 800, "support_gap": -200, "cs_eff_delta": 0.05, "deny_delta": 3, "gold_log_ratio": 0.15, "xp_log_ratio": 0.08, "gold_lead_pct": 0.12, "gold_momentum": 100, "xp_momentum": 50, } with gr.Blocks( title="Dota 2 Edge — Ensemble Predictor v3", theme=gr.themes.Base(primary_hue="red", secondary_hue="green"), ) as demo: gr.Markdown(""" # Dota 2 Edge — Ensemble Predictor v3 Predict professional Dota 2 match outcomes using temporally-validated models. **V3 (Production):** Trained with expanding-window temporal CV on 4,068 pro matches. No data leakage. LogReg = best individual model. **Holdout AUC:** @10min=0.720 | @15min=0.775 | @20min=0.830 | @25min=0.872 | @30min=0.906 """) with gr.Tab("V3 Predict (Production)"): with gr.Row(): v3_moment = gr.Dropdown( choices=list(V3_MOMENTS.keys()), value="m10", label="Minute", info="Select game minute" ) v3_features = gr.Textbox( label="Features (JSON — 19 temporal features)", value=json.dumps(example_v3, indent=2), lines=10, ) v3_btn = gr.Button("Predict (V3)", variant="primary") v3_output = gr.Textbox(label="Result", lines=15) v3_btn.click(gradio_predict_v3, inputs=[v3_moment, v3_features], outputs=v3_output) gr.Markdown(AUC_TABLE) with gr.Tab("V1 Predict (Legacy)"): with gr.Row(): v1_moment = gr.Dropdown( choices=list(V1_MOMENTS.keys()), value="m1", label="Prediction Moment", ) v1_features = gr.Textbox( label="Features (JSON)", value=json.dumps({"gold_delta_10": 2500, "xp_delta_10": 1800}, indent=2), lines=10, ) v1_btn = gr.Button("Predict (V1)", variant="secondary") v1_output = gr.Textbox(label="Result", lines=15) v1_btn.click(gradio_predict_v1, inputs=[v1_moment, v1_features], outputs=v1_output) with gr.Tab("Model Info"): info_btn = gr.Button("Load Model Info") info_output = gr.Textbox(label="Model Details", lines=30) info_btn.click(gradio_info, outputs=info_output) with gr.Tab("API"): gr.Markdown(""" ## API Usage ```python from gradio_client import Client client = Client("Finish-him/dota2-edge-ensemble") # V3 Production prediction result = client.predict( moment="m10", features_text='{"gold_delta": 3500, "xp_delta": 2200, "gpm_delta": 350, ...}', api_name="/predict_v3" ) print(result) # V1 Legacy prediction result = client.predict( moment="m1", features_text='{"gold_delta_10": 2500}', api_name="/predict_v1" ) ``` ### V3 Feature List (19 features) `gold_delta`, `xp_delta`, `lh_delta`, `dn_delta`, `rad_gpm`, `dire_gpm`, `gpm_delta`, `rad_xpm`, `dire_xpm`, `xpm_delta`, `carry_gap`, `support_gap`, `cs_eff_delta`, `deny_delta`, `gold_log_ratio`, `xp_log_ratio`, `gold_lead_pct`, `gold_momentum`, `xp_momentum` """) if __name__ == "__main__": demo.launch()