Spaces:

nananie143
/

footypredict-pro

Runtime error

footypredict-pro / src /models /ensemble.py

NetBoss

V3.0 Ultimate Enhancement - Complete production system

6f7e932 about 1 month ago

10.9 kB

	"""
	Model Ensemble - Combines multiple ML models for robust predictions

	Weighted averaging of predictions from:
	- Podos Transformer (30%)
	- XGBoost (35%)
	- FootballerModel (20%)
	- LSTM Form (15%)

	Includes confidence calibration and model disagreement detection.
	"""

	import numpy as np
	from typing import Dict, List, Optional, Any, Tuple
	from dataclasses import dataclass, asdict
	from pathlib import Path
	import json
	import logging

	logger = logging.getLogger(__name__)


	@dataclass
	class EnsemblePrediction:
	"""Output from ensemble prediction"""
	home_win_prob: float
	draw_prob: float
	away_win_prob: float
	predicted_outcome: str
	confidence: float
	model_agreement: float
	individual_predictions: Dict[str, Dict]
	calibrated: bool = True

	def to_dict(self) -> Dict:
	return asdict(self)


	class ModelEnsemble:
	"""
	Ensemble predictor combining multiple ML models.

	Uses weighted averaging with confidence calibration.
	"""

	# Default model weights (sum to 1.0)
	DEFAULT_WEIGHTS = {
	'podos': 0.30,
	'xgboost': 0.35,
	'footballer': 0.20,
	'lstm': 0.15
	}

	def __init__(self, weights: Optional[Dict[str, float]] = None):
	"""
	Initialize ensemble.

	Args:
	weights: Custom model weights. Default weights used if not provided.
	"""
	self.weights = weights or self.DEFAULT_WEIGHTS.copy()
	self.models: Dict[str, Any] = {}
	self._calibration_params: Dict[str, float] = {}
	self._load_calibration()

	def _load_calibration(self):
	"""Load calibration parameters if available"""
	config_path = Path(__file__).parent.parent.parent / "models" / "config" / "calibration.json"
	if config_path.exists():
	with open(config_path, 'r') as f:
	self._calibration_params = json.load(f)

	def register_model(self, name: str, model: Any, weight: Optional[float] = None):
	"""
	Register a model with the ensemble.

	Args:
	name: Model identifier
	model: Model object with predict() method
	weight: Model weight (optional, uses default if not specified)
	"""
	self.models[name] = model
	if weight is not None:
	self.weights[name] = weight
	self._normalize_weights()
	logger.info(f"Registered model: {name} (weight: {self.weights.get(name, 0)})")

	def _normalize_weights(self):
	"""Ensure weights sum to 1.0"""
	# Only consider weights for registered models
	active_weights = {k: v for k, v in self.weights.items() if k in self.models}
	total = sum(active_weights.values())
	if total > 0:
	for k in active_weights:
	self.weights[k] = active_weights[k] / total

	def predict(self, home_team: str, away_team: str, **features) -> EnsemblePrediction:
	"""
	Get ensemble prediction from all models.

	Args:
	home_team: Home team name
	away_team: Away team name
	**features: Additional features (form, odds, etc.)

	Returns:
	EnsemblePrediction with combined probabilities
	"""
	if not self.models:
	raise ValueError("No models registered in ensemble")

	individual_preds = {}
	weighted_probs = np.array([0.0, 0.0, 0.0]) # home, draw, away
	total_weight = 0.0

	# Get prediction from each model
	for name, model in self.models.items():
	try:
	pred = model.predict(home_team, away_team, **features)

	# Extract probabilities
	if hasattr(pred, 'home_win_prob'):
	probs = np.array([
	pred.home_win_prob,
	pred.draw_prob,
	pred.away_win_prob
	])
	elif isinstance(pred, dict):
	probs = np.array([
	pred.get('home_win_prob', 0.33),
	pred.get('draw_prob', 0.33),
	pred.get('away_win_prob', 0.34)
	])
	else:
	continue

	# Store individual prediction
	individual_preds[name] = {
	'home_win_prob': float(probs[0]),
	'draw_prob': float(probs[1]),
	'away_win_prob': float(probs[2]),
	'confidence': getattr(pred, 'confidence', 0.5)
	}

	# Add weighted contribution
	weight = self.weights.get(name, 0.1)
	weighted_probs += probs * weight
	total_weight += weight

	except Exception as e:
	logger.warning(f"Model {name} prediction failed: {e}")
	continue

	if total_weight == 0:
	raise ValueError("All model predictions failed")

	# Normalize weighted probabilities
	final_probs = weighted_probs / total_weight

	# Ensure probabilities sum to 1
	final_probs = final_probs / final_probs.sum()

	# Calculate model agreement (measure of consensus)
	agreement = self._calculate_agreement(individual_preds)

	# Determine predicted outcome
	outcome_idx = np.argmax(final_probs)
	outcomes = ['Home Win', 'Draw', 'Away Win']
	predicted_outcome = outcomes[outcome_idx]

	# Calculate confidence (base + agreement bonus)
	base_confidence = float(final_probs[outcome_idx])
	confidence = self._calibrate_confidence(base_confidence, agreement)

	return EnsemblePrediction(
	home_win_prob=float(final_probs[0]),
	draw_prob=float(final_probs[1]),
	away_win_prob=float(final_probs[2]),
	predicted_outcome=predicted_outcome,
	confidence=confidence,
	model_agreement=agreement,
	individual_predictions=individual_preds,
	calibrated=True
	)

	def _calculate_agreement(self, predictions: Dict[str, Dict]) -> float:
	"""
	Calculate how much models agree on the outcome.

	Returns:
	Agreement score from 0 (complete disagreement) to 1 (full agreement)
	"""
	if len(predictions) < 2:
	return 1.0

	# Get predicted outcome from each model
	outcomes = []
	for pred in predictions.values():
	probs = [pred['home_win_prob'], pred['draw_prob'], pred['away_win_prob']]
	outcomes.append(np.argmax(probs))

	# Calculate agreement as percentage of models agreeing with majority
	from collections import Counter
	outcome_counts = Counter(outcomes)
	most_common_count = outcome_counts.most_common(1)[0][1]

	return most_common_count / len(outcomes)

	def _calibrate_confidence(self, raw_confidence: float, agreement: float) -> float:
	"""
	Calibrate confidence based on model agreement.

	High agreement → boost confidence
	Low agreement → reduce confidence
	"""
	# Agreement multiplier
	if agreement >= 0.8:
	multiplier = 1.1 # Boost for high agreement
	elif agreement >= 0.6:
	multiplier = 1.0 # No change
	elif agreement >= 0.4:
	multiplier = 0.9 # Slight reduction
	else:
	multiplier = 0.8 # Significant reduction for disagreement

	calibrated = raw_confidence * multiplier

	# Clamp to valid range
	return max(0.3, min(0.95, calibrated))

	def get_model_contributions(self, home_team: str, away_team: str,
	**features) -> Dict[str, Dict]:
	"""
	Get detailed breakdown of each model's contribution.

	Useful for debugging and understanding predictions.
	"""
	contributions = {}

	for name, model in self.models.items():
	try:
	pred = model.predict(home_team, away_team, **features)
	weight = self.weights.get(name, 0)

	contributions[name] = {
	'weight': weight,
	'prediction': pred.to_dict() if hasattr(pred, 'to_dict') else pred,
	'weighted_contribution': {
	'home': getattr(pred, 'home_win_prob', 0.33) * weight,
	'draw': getattr(pred, 'draw_prob', 0.33) * weight,
	'away': getattr(pred, 'away_win_prob', 0.34) * weight
	}
	}
	except Exception as e:
	contributions[name] = {'error': str(e)}

	return contributions

	def save_weights(self, path: Optional[Path] = None):
	"""Save current weights to config file"""
	path = path or Path(__file__).parent.parent.parent / "models" / "config" / "ensemble_weights.json"
	path.parent.mkdir(parents=True, exist_ok=True)

	with open(path, 'w') as f:
	json.dump(self.weights, f, indent=2)

	logger.info(f"Saved weights to {path}")

	def load_weights(self, path: Optional[Path] = None):
	"""Load weights from config file"""
	path = path or Path(__file__).parent.parent.parent / "models" / "config" / "ensemble_weights.json"

	if path.exists():
	with open(path, 'r') as f:
	self.weights = json.load(f)
	logger.info(f"Loaded weights from {path}")
	else:
	logger.warning(f"No weights file found at {path}, using defaults")


	class SimpleVotingEnsemble:
	"""
	Simple voting ensemble - each model gets one vote.
	Good for when you want equal weighting.
	"""

	def __init__(self):
	self.models: Dict[str, Any] = {}

	def register_model(self, name: str, model: Any):
	self.models[name] = model

	def predict(self, home_team: str, away_team: str, **features) -> str:
	"""Get majority vote for outcome"""
	votes = {'home': 0, 'draw': 0, 'away': 0}

	for model in self.models.values():
	pred = model.predict(home_team, away_team, **features)
	probs = [pred.home_win_prob, pred.draw_prob, pred.away_win_prob]
	outcome_idx = np.argmax(probs)
	vote_keys = ['home', 'draw', 'away']
	votes[vote_keys[outcome_idx]] += 1

	winner = max(votes, key=votes.get)
	return {'home': 'Home Win', 'draw': 'Draw', 'away': 'Away Win'}[winner]