Spaces:
Runtime error
Runtime error
| """ | |
| Trained Model Loader | |
| Loads models trained on Kaggle and exported to models/trained/ | |
| Supports: XGBoost, LightGBM, CatBoost, PyTorch, ONNX | |
| """ | |
| import os | |
| import json | |
| import pickle | |
| import logging | |
| from pathlib import Path | |
| from typing import Dict, Optional, Any | |
| import numpy as np | |
| logger = logging.getLogger(__name__) | |
| MODELS_DIR = Path(__file__).parent.parent.parent / "models" | |
| TRAINED_DIR = MODELS_DIR / "trained" | |
| CONFIG_DIR = MODELS_DIR / "config" | |
| class TrainedModelLoader: | |
| """Load models trained on Kaggle""" | |
| def __init__(self): | |
| self.models: Dict[str, Any] = {} | |
| self.encoders: Dict[str, Any] = {} | |
| self.elo_ratings: Dict[str, float] = {} | |
| self.metadata: Dict[str, Any] = {} | |
| self.scaler = None | |
| self._loaded = False | |
| def load_all(self) -> bool: | |
| """Load all available trained models""" | |
| try: | |
| self._load_config() | |
| self._load_xgboost() | |
| self._load_lightgbm() | |
| self._load_catboost() | |
| self._load_neural_net() | |
| self._load_onnx() | |
| self._loaded = len(self.models) > 0 | |
| logger.info(f"Loaded {len(self.models)} trained models") | |
| return self._loaded | |
| except Exception as e: | |
| logger.error(f"Error loading models: {e}") | |
| return False | |
| def _load_config(self): | |
| """Load encoders, elo ratings, and metadata""" | |
| # Encoders | |
| enc_path = CONFIG_DIR / "encoders.pkl" | |
| if enc_path.exists(): | |
| with open(enc_path, 'rb') as f: | |
| data = pickle.load(f) | |
| self.encoders = data | |
| self.scaler = data.get('scaler') | |
| logger.info("Loaded encoders") | |
| # Elo ratings | |
| elo_path = CONFIG_DIR / "elo_ratings.json" | |
| if elo_path.exists(): | |
| with open(elo_path, 'r') as f: | |
| self.elo_ratings = json.load(f) | |
| logger.info(f"Loaded {len(self.elo_ratings)} team Elo ratings") | |
| # Metadata | |
| meta_path = CONFIG_DIR / "model_meta.json" | |
| if meta_path.exists(): | |
| with open(meta_path, 'r') as f: | |
| self.metadata = json.load(f) | |
| logger.info("Loaded model metadata") | |
| def _load_xgboost(self): | |
| """Load XGBoost model""" | |
| path = TRAINED_DIR / "xgb_football.json" | |
| if path.exists(): | |
| try: | |
| from xgboost import XGBClassifier | |
| model = XGBClassifier() | |
| model.load_model(str(path)) | |
| self.models['xgb'] = model | |
| logger.info("Loaded XGBoost model") | |
| except ImportError: | |
| logger.warning("XGBoost not installed") | |
| def _load_lightgbm(self): | |
| """Load LightGBM model""" | |
| path = TRAINED_DIR / "lgb_football.txt" | |
| if path.exists(): | |
| try: | |
| import lightgbm as lgb | |
| model = lgb.Booster(model_file=str(path)) | |
| self.models['lgb'] = model | |
| logger.info("Loaded LightGBM model") | |
| except ImportError: | |
| logger.warning("LightGBM not installed") | |
| def _load_catboost(self): | |
| """Load CatBoost model""" | |
| path = TRAINED_DIR / "cat_football.cbm" | |
| if path.exists(): | |
| try: | |
| from catboost import CatBoostClassifier | |
| model = CatBoostClassifier() | |
| model.load_model(str(path)) | |
| self.models['cat'] = model | |
| logger.info("Loaded CatBoost model") | |
| except ImportError: | |
| logger.warning("CatBoost not installed") | |
| def _load_neural_net(self): | |
| """Load PyTorch neural network""" | |
| path = TRAINED_DIR / "nn_football.pt" | |
| if path.exists(): | |
| try: | |
| import torch | |
| import torch.nn as nn | |
| class FootballNet(nn.Module): | |
| def __init__(self, input_dim=8, hidden=128): | |
| super().__init__() | |
| self.net = nn.Sequential( | |
| nn.Linear(input_dim, hidden), | |
| nn.ReLU(), nn.Dropout(0.3), | |
| nn.Linear(hidden, 64), | |
| nn.ReLU(), nn.Dropout(0.2), | |
| nn.Linear(64, 3) | |
| ) | |
| def forward(self, x): | |
| return self.net(x) | |
| model = FootballNet() | |
| model.load_state_dict(torch.load(path, map_location='cpu')) | |
| model.eval() | |
| self.models['nn'] = model | |
| logger.info("Loaded PyTorch neural network") | |
| except ImportError: | |
| logger.warning("PyTorch not installed") | |
| def _load_onnx(self): | |
| """Load ONNX model for fast inference""" | |
| path = TRAINED_DIR / "football_transformer.onnx" | |
| if path.exists(): | |
| try: | |
| import onnxruntime as ort | |
| session = ort.InferenceSession(str(path)) | |
| self.models['onnx'] = session | |
| logger.info("Loaded ONNX transformer") | |
| except ImportError: | |
| logger.warning("ONNX Runtime not installed") | |
| def get_elo(self, team: str) -> float: | |
| """Get Elo rating for a team""" | |
| if team in self.elo_ratings: | |
| return self.elo_ratings[team] | |
| # Fuzzy match | |
| team_lower = team.lower() | |
| for t, elo in self.elo_ratings.items(): | |
| if t.lower() in team_lower or team_lower in t.lower(): | |
| return elo | |
| return 1500.0 # Default | |
| def build_features(self, home_team: str, away_team: str, league: str = 'premier_league') -> np.ndarray: | |
| """Build comprehensive 153-feature vector for prediction.""" | |
| try: | |
| # Use comprehensive feature builder | |
| from .comprehensive_features import build_match_features | |
| features = build_match_features(home_team, away_team, league) | |
| logger.debug(f"Built {features.shape[1]} features for {home_team} vs {away_team}") | |
| return features | |
| except Exception as e: | |
| logger.warning(f"Comprehensive features failed, using fallback: {e}") | |
| # Fallback to basic features | |
| home_elo = self.get_elo(home_team) | |
| away_elo = self.get_elo(away_team) | |
| # Encode teams | |
| team_enc = self.encoders.get('team_enc') | |
| if team_enc: | |
| try: | |
| home_enc = team_enc.transform([home_team])[0] | |
| away_enc = team_enc.transform([away_team])[0] | |
| except: | |
| home_enc, away_enc = 0, 0 | |
| else: | |
| home_enc, away_enc = 0, 0 | |
| # Build basic feature vector | |
| import datetime | |
| now = datetime.datetime.now() | |
| features = np.array([ | |
| home_enc, away_enc, | |
| home_elo, away_elo, | |
| home_elo - away_elo, | |
| now.year, now.month, now.weekday() | |
| ], dtype=np.float32) | |
| return features.reshape(1, -1) | |
| def predict(self, home_team: str, away_team: str) -> Dict: | |
| """Get ensemble prediction""" | |
| if not self._loaded: | |
| self.load_all() | |
| if not self.models: | |
| return {'error': 'No trained models available'} | |
| features = self.build_features(home_team, away_team) | |
| # Ensemble weights | |
| weights = self.metadata.get('ensemble_weights', { | |
| 'xgb': 0.3, 'lgb': 0.3, 'cat': 0.25, 'nn': 0.15 | |
| }) | |
| probs = np.zeros(3) | |
| total_weight = 0 | |
| # XGBoost | |
| if 'xgb' in self.models: | |
| probs += weights.get('xgb', 0.3) * self.models['xgb'].predict_proba(features)[0] | |
| total_weight += weights.get('xgb', 0.3) | |
| # LightGBM (skip if feature count mismatch) | |
| if 'lgb' in self.models: | |
| try: | |
| lgb_raw = self.models['lgb'].predict(features) | |
| # Handle different output shapes | |
| if lgb_raw.ndim == 1: | |
| lgb_probs = lgb_raw | |
| elif lgb_raw.ndim == 2: | |
| lgb_probs = lgb_raw[0] | |
| else: | |
| lgb_probs = np.array([lgb_raw, 0.3, 0.3]) | |
| # Normalize if needed | |
| if len(lgb_probs) >= 3: | |
| lgb_probs = lgb_probs[:3] | |
| lgb_probs = lgb_probs / lgb_probs.sum() | |
| probs += weights.get('lgb', 0.3) * lgb_probs | |
| total_weight += weights.get('lgb', 0.3) | |
| except Exception as e: | |
| # Feature mismatch - skip this model | |
| logger.debug(f"LightGBM skipped: {e}") | |
| # CatBoost (skip if feature count mismatch) | |
| if 'cat' in self.models: | |
| try: | |
| cat_probs = self.models['cat'].predict_proba(features)[0] | |
| probs += weights.get('cat', 0.25) * cat_probs | |
| total_weight += weights.get('cat', 0.25) | |
| except Exception as e: | |
| logger.debug(f"CatBoost skipped: {e}") | |
| # Neural Net (skip if scaler or feature issues) | |
| if 'nn' in self.models: | |
| try: | |
| import torch | |
| if self.scaler: | |
| scaled = self.scaler.transform(features) | |
| else: | |
| scaled = features | |
| with torch.no_grad(): | |
| nn_out = torch.softmax(self.models['nn'](torch.FloatTensor(scaled)), dim=1).numpy()[0] | |
| probs += weights.get('nn', 0.15) * nn_out | |
| total_weight += weights.get('nn', 0.15) | |
| except Exception as e: | |
| logger.debug(f"Neural Net skipped: {e}") | |
| if total_weight > 0: | |
| probs = probs / total_weight | |
| # Normalize | |
| probs = probs / probs.sum() | |
| # Get classes | |
| classes = self.metadata.get('classes', ['A', 'D', 'H']) | |
| pred_idx = probs.argmax() | |
| return { | |
| 'home_team': home_team, | |
| 'away_team': away_team, | |
| 'home_win_prob': float(probs[classes.index('H')] if 'H' in classes else probs[0]), | |
| 'draw_prob': float(probs[classes.index('D')] if 'D' in classes else probs[1]), | |
| 'away_win_prob': float(probs[classes.index('A')] if 'A' in classes else probs[2]), | |
| 'predicted_outcome': classes[pred_idx].replace('H', 'Home Win').replace('A', 'Away Win').replace('D', 'Draw'), | |
| 'confidence': float(probs[pred_idx]), | |
| 'models_used': list(self.models.keys()) | |
| } | |
| # Global instance | |
| _loader: Optional[TrainedModelLoader] = None | |
| def get_trained_loader() -> TrainedModelLoader: | |
| global _loader | |
| if _loader is None: | |
| _loader = TrainedModelLoader() | |
| _loader.load_all() | |
| return _loader | |
| def predict_with_trained(home: str, away: str) -> Dict: | |
| return get_trained_loader().predict(home, away) | |