Spaces:

Finish-him
/

dota2-edge-ensemble

Runtime error

App Files Files Community

dota2-edge-ensemble / app.py

Finish-him

Upload app.py with huggingface_hub

2e84ab9 verified 20 days ago

raw

history blame contribute delete

11.2 kB

	"""
	Dota 2 Edge — Multi-Model Ensemble Prediction API v3
	=====================================================
	HuggingFace Space serving V1, V2, and V3 (temporal CV) models.
	V3 = production-grade, leak-free temporal cross-validation.
	"""

	import gradio as gr
	import pickle
	import numpy as np
	import json
	import os
	from pathlib import Path

	# ── Model directories ──
	MODEL_DIR_V1 = Path("models")
	MODEL_DIR_V2 = Path("models_v2")
	MODEL_DIR_V3 = Path("models_v3")

	# ── V3 Moments (production) ──
	V3_MOMENTS = {
	'm5': '@5 minutes',
	'm8': '@8 minutes',
	'm10': '@10 minutes',
	'm15': '@15 minutes',
	'm20': '@20 minutes',
	'm25': '@25 minutes',
	'm30': '@30 minutes',
	}

	# ── V3 Feature columns ──
	V3_FEATURES = [
	'gold_delta', 'xp_delta', 'lh_delta', 'dn_delta',
	'rad_gpm', 'dire_gpm', 'gpm_delta',
	'rad_xpm', 'dire_xpm', 'xpm_delta',
	'carry_gap', 'support_gap',
	'cs_eff_delta', 'deny_delta',
	'gold_log_ratio', 'xp_log_ratio', 'gold_lead_pct',
	'gold_momentum', 'xp_momentum',
	]

	# ── V1 Moments (legacy) ──
	V1_MOMENTS = {
	'm0': 'Draft (pre-game)',
	'm1': '@10 minutes',
	'm15': '@15 minutes',
	'm2': '@20 minutes',
	'm3': '@30 minutes',
	}

	# ── Load V3 bundles (production) ──
	v3_bundles = {}
	for mk, label in V3_MOMENTS.items():
	path = MODEL_DIR_V3 / f"temporal_v3_{mk}.pkl"
	if path.exists():
	with open(path, 'rb') as f:
	v3_bundles[mk] = pickle.load(f)

	# ── Load V1 bundles (legacy) ──
	v1_bundles = {}
	for mk in V1_MOMENTS:
	path = MODEL_DIR_V1 / f"ensemble_{mk}.pkl"
	if path.exists():
	with open(path, 'rb') as f:
	v1_bundles[mk] = pickle.load(f)

	# ── Load V2 bundles ──
	v2_bundles = {}
	V2_MOMENTS = {'m0_5': '@5min', 'm1_8': '@8min', 'm1': '@10min', 'm15': '@15min',
	'm2': '@20min', 'm25': '@25min', 'm3': '@30min'}
	for mk in V2_MOMENTS:
	path = MODEL_DIR_V2 / f"ensemble_{mk}.pkl"
	if path.exists():
	with open(path, 'rb') as f:
	v2_bundles[mk] = pickle.load(f)

	# Load summaries
	summaries = {}
	for name, path in [('v1', MODEL_DIR_V1 / 'ensemble_summary.json'),
	('v2', MODEL_DIR_V2 / 'ensemble_v2_summary.json'),
	('v3', MODEL_DIR_V3 / 'ensemble_v3_summary.json')]:
	if path.exists():
	with open(path) as f:
	summaries[name] = json.load(f)


	def predict_v3(minute_key: str, features_json: str) -> dict:
	"""
	V3 production prediction using temporal CV models.
	"""
	if minute_key not in v3_bundles:
	return {"error": f"Moment '{minute_key}' not loaded. Available: {list(v3_bundles.keys())}"}

	bundle = v3_bundles[minute_key]
	feature_cols = bundle['feature_cols']

	try:
	features = json.loads(features_json) if isinstance(features_json, str) else features_json
	except json.JSONDecodeError as e:
	return {"error": f"Invalid JSON: {e}"}

	X = np.array([[features.get(col, 0.0) for col in feature_cols]])
	X_scaled = bundle['scaler'].transform(X)

	probs = {}
	for name in bundle['models']:
	model = bundle['models'][name]
	if name in ('logreg', 'mlp'):
	p = model.predict_proba(X_scaled)[:, 1][0]
	else:
	p = model.predict_proba(X)[:, 1][0]
	probs[name] = round(float(p), 4)

	# Stacking
	stack_input = np.array([[probs[n] for n in bundle['models']]])
	stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4)

	# Best individual = LogReg (proven by temporal CV)
	logreg_prob = probs.get('logreg', stacking)

	return {
	"version": "v3_temporal_cv",
	"moment": minute_key,
	"moment_name": V3_MOMENTS[minute_key],
	"n_features": len(feature_cols),
	"individual_models": probs,
	"stacking_ensemble": stacking,
	"logreg_prob": logreg_prob,
	"recommended_prob": logreg_prob, # LogReg is best with temporal CV
	"prediction": "Radiant" if logreg_prob > 0.5 else "Dire",
	"confidence": round(abs(logreg_prob - 0.5) * 2, 4),
	"holdout_auc": bundle['results'].get('logreg', {}).get('auc', 'N/A'),
	}


	def predict_v1(moment: str, features_json: str) -> dict:
	"""V1 legacy prediction."""
	if moment not in v1_bundles:
	return {"error": f"V1 moment '{moment}' not loaded. Available: {list(v1_bundles.keys())}"}

	bundle = v1_bundles[moment]
	feature_cols = bundle['feature_cols']

	try:
	features = json.loads(features_json) if isinstance(features_json, str) else features_json
	except json.JSONDecodeError as e:
	return {"error": f"Invalid JSON: {e}"}

	X = np.array([[features.get(col, 0.0) for col in feature_cols]])
	X_scaled = bundle['scaler'].transform(X)

	probs = {}
	for name in bundle['base_model_names']:
	model = bundle['models'][name]
	if name in ('logreg', 'mlp'):
	p = model.predict_proba(X_scaled)[:, 1][0]
	else:
	p = model.predict_proba(X)[:, 1][0]
	probs[name] = round(float(p), 4)

	stack_input = np.array([[probs[n] for n in bundle['base_model_names']]])
	stacking = round(float(bundle['meta_learner'].predict_proba(stack_input)[:, 1][0]), 4)

	return {
	"version": "v1",
	"moment": moment,
	"moment_name": V1_MOMENTS.get(moment, moment),
	"individual_models": probs,
	"stacking_ensemble": stacking,
	"prediction": "Radiant" if stacking > 0.5 else "Dire",
	"confidence": round(abs(stacking - 0.5) * 2, 4),
	}


	def get_model_info() -> dict:
	"""Get info about all loaded models."""
	info = {"v3_models": {}, "v2_models": {}, "v1_models": {}, "summaries": summaries}

	for mk, bundle in v3_bundles.items():
	info["v3_models"][mk] = {
	"name": V3_MOMENTS[mk],
	"n_features": len(bundle['feature_cols']),
	"n_train": bundle.get('n_train', 'N/A'),
	"n_test": bundle.get('n_test', 'N/A'),
	"feature_cols": bundle['feature_cols'],
	"results": bundle['results'],
	"cutoff_date": bundle.get('cutoff_date', 'N/A'),
	"top5_features": bundle.get('top5_features', []),
	}

	for mk, bundle in v1_bundles.items():
	info["v1_models"][mk] = {
	"name": V1_MOMENTS.get(mk, mk),
	"n_features": bundle.get('n_features', len(bundle['feature_cols'])),
	"results": bundle['results'],
	}

	return info


	# ── AUC Reference Table ──
	AUC_TABLE = """
	\| Minute \| V3 Holdout AUC \| V2 K-Fold AUC \| Confidence Level \|
	\|--------\|---------------\|---------------\|-----------------\|
	\| @5min \| 0.652 \| 0.661 \| Low \|
	\| @8min \| 0.685 \| 0.697 \| Low-Medium \|
	\| @10min \| 0.720 \| 0.718 \| Medium \|
	\| @15min \| 0.775 \| 0.770 \| Medium-High \|
	\| @20min \| 0.830 \| 0.836 \| High \|
	\| @25min \| 0.872 \| 0.893 \| Very High \|
	\| @30min \| 0.906 \| 0.905 \| Very High \|
	"""


	# ================================================================
	# GRADIO INTERFACE
	# ================================================================

	def gradio_predict_v3(moment, features_text):
	try:
	result = predict_v3(moment, features_text)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	def gradio_predict_v1(moment, features_text):
	try:
	result = predict_v1(moment, features_text)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	def gradio_info():
	return json.dumps(get_model_info(), indent=2, default=str)


	# Example V3 features
	example_v3 = {
	"gold_delta": 3500, "xp_delta": 2200, "lh_delta": 35, "dn_delta": 5,
	"rad_gpm": 2200, "dire_gpm": 1850, "gpm_delta": 350,
	"rad_xpm": 2800, "dire_xpm": 2500, "xpm_delta": 300,
	"carry_gap": 800, "support_gap": -200,
	"cs_eff_delta": 0.05, "deny_delta": 3,
	"gold_log_ratio": 0.15, "xp_log_ratio": 0.08, "gold_lead_pct": 0.12,
	"gold_momentum": 100, "xp_momentum": 50,
	}

	with gr.Blocks(
	title="Dota 2 Edge — Ensemble Predictor v3",
	theme=gr.themes.Base(primary_hue="red", secondary_hue="green"),
	) as demo:
	gr.Markdown("""
	# Dota 2 Edge — Ensemble Predictor v3

	Predict professional Dota 2 match outcomes using temporally-validated models.

	V3 (Production): Trained with expanding-window temporal CV on 4,068 pro matches.
	No data leakage. LogReg = best individual model.

	Holdout AUC: @10min=0.720 \| @15min=0.775 \| @20min=0.830 \| @25min=0.872 \| @30min=0.906
	""")

	with gr.Tab("V3 Predict (Production)"):
	with gr.Row():
	v3_moment = gr.Dropdown(
	choices=list(V3_MOMENTS.keys()),
	value="m10",
	label="Minute",
	info="Select game minute"
	)
	v3_features = gr.Textbox(
	label="Features (JSON — 19 temporal features)",
	value=json.dumps(example_v3, indent=2),
	lines=10,
	)
	v3_btn = gr.Button("Predict (V3)", variant="primary")
	v3_output = gr.Textbox(label="Result", lines=15)
	v3_btn.click(gradio_predict_v3, inputs=[v3_moment, v3_features], outputs=v3_output)

	gr.Markdown(AUC_TABLE)

	with gr.Tab("V1 Predict (Legacy)"):
	with gr.Row():
	v1_moment = gr.Dropdown(
	choices=list(V1_MOMENTS.keys()),
	value="m1",
	label="Prediction Moment",
	)
	v1_features = gr.Textbox(
	label="Features (JSON)",
	value=json.dumps({"gold_delta_10": 2500, "xp_delta_10": 1800}, indent=2),
	lines=10,
	)
	v1_btn = gr.Button("Predict (V1)", variant="secondary")
	v1_output = gr.Textbox(label="Result", lines=15)
	v1_btn.click(gradio_predict_v1, inputs=[v1_moment, v1_features], outputs=v1_output)

	with gr.Tab("Model Info"):
	info_btn = gr.Button("Load Model Info")
	info_output = gr.Textbox(label="Model Details", lines=30)
	info_btn.click(gradio_info, outputs=info_output)

	with gr.Tab("API"):
	gr.Markdown("""
	## API Usage

	```python
	from gradio_client import Client

	client = Client("Finish-him/dota2-edge-ensemble")

	# V3 Production prediction
	result = client.predict(
	moment="m10",
	features_text='{"gold_delta": 3500, "xp_delta": 2200, "gpm_delta": 350, ...}',
	api_name="/predict_v3"
	)
	print(result)

	# V1 Legacy prediction
	result = client.predict(
	moment="m1",
	features_text='{"gold_delta_10": 2500}',
	api_name="/predict_v1"
	)
	```

	### V3 Feature List (19 features)
	`gold_delta`, `xp_delta`, `lh_delta`, `dn_delta`, `rad_gpm`, `dire_gpm`,
	`gpm_delta`, `rad_xpm`, `dire_xpm`, `xpm_delta`, `carry_gap`, `support_gap`,
	`cs_eff_delta`, `deny_delta`, `gold_log_ratio`, `xp_log_ratio`, `gold_lead_pct`,
	`gold_momentum`, `xp_momentum`
	""")


	if __name__ == "__main__":
	demo.launch()