Spaces:
Runtime error
Runtime error
File size: 11,266 Bytes
6f7e932 2e6fc13 6f7e932 2e6fc13 6f7e932 34643b5 6f7e932 34643b5 6f7e932 34643b5 6f7e932 34643b5 6f7e932 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | """
Trained Model Loader
Loads models trained on Kaggle and exported to models/trained/
Supports: XGBoost, LightGBM, CatBoost, PyTorch, ONNX
"""
import os
import json
import pickle
import logging
from pathlib import Path
from typing import Dict, Optional, Any
import numpy as np
logger = logging.getLogger(__name__)
MODELS_DIR = Path(__file__).parent.parent.parent / "models"
TRAINED_DIR = MODELS_DIR / "trained"
CONFIG_DIR = MODELS_DIR / "config"
class TrainedModelLoader:
"""Load models trained on Kaggle"""
def __init__(self):
self.models: Dict[str, Any] = {}
self.encoders: Dict[str, Any] = {}
self.elo_ratings: Dict[str, float] = {}
self.metadata: Dict[str, Any] = {}
self.scaler = None
self._loaded = False
def load_all(self) -> bool:
"""Load all available trained models"""
try:
self._load_config()
self._load_xgboost()
self._load_lightgbm()
self._load_catboost()
self._load_neural_net()
self._load_onnx()
self._loaded = len(self.models) > 0
logger.info(f"Loaded {len(self.models)} trained models")
return self._loaded
except Exception as e:
logger.error(f"Error loading models: {e}")
return False
def _load_config(self):
"""Load encoders, elo ratings, and metadata"""
# Encoders
enc_path = CONFIG_DIR / "encoders.pkl"
if enc_path.exists():
with open(enc_path, 'rb') as f:
data = pickle.load(f)
self.encoders = data
self.scaler = data.get('scaler')
logger.info("Loaded encoders")
# Elo ratings
elo_path = CONFIG_DIR / "elo_ratings.json"
if elo_path.exists():
with open(elo_path, 'r') as f:
self.elo_ratings = json.load(f)
logger.info(f"Loaded {len(self.elo_ratings)} team Elo ratings")
# Metadata
meta_path = CONFIG_DIR / "model_meta.json"
if meta_path.exists():
with open(meta_path, 'r') as f:
self.metadata = json.load(f)
logger.info("Loaded model metadata")
def _load_xgboost(self):
"""Load XGBoost model"""
path = TRAINED_DIR / "xgb_football.json"
if path.exists():
try:
from xgboost import XGBClassifier
model = XGBClassifier()
model.load_model(str(path))
self.models['xgb'] = model
logger.info("Loaded XGBoost model")
except ImportError:
logger.warning("XGBoost not installed")
def _load_lightgbm(self):
"""Load LightGBM model"""
path = TRAINED_DIR / "lgb_football.txt"
if path.exists():
try:
import lightgbm as lgb
model = lgb.Booster(model_file=str(path))
self.models['lgb'] = model
logger.info("Loaded LightGBM model")
except ImportError:
logger.warning("LightGBM not installed")
def _load_catboost(self):
"""Load CatBoost model"""
path = TRAINED_DIR / "cat_football.cbm"
if path.exists():
try:
from catboost import CatBoostClassifier
model = CatBoostClassifier()
model.load_model(str(path))
self.models['cat'] = model
logger.info("Loaded CatBoost model")
except ImportError:
logger.warning("CatBoost not installed")
def _load_neural_net(self):
"""Load PyTorch neural network"""
path = TRAINED_DIR / "nn_football.pt"
if path.exists():
try:
import torch
import torch.nn as nn
class FootballNet(nn.Module):
def __init__(self, input_dim=8, hidden=128):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, hidden),
nn.ReLU(), nn.Dropout(0.3),
nn.Linear(hidden, 64),
nn.ReLU(), nn.Dropout(0.2),
nn.Linear(64, 3)
)
def forward(self, x):
return self.net(x)
model = FootballNet()
model.load_state_dict(torch.load(path, map_location='cpu'))
model.eval()
self.models['nn'] = model
logger.info("Loaded PyTorch neural network")
except ImportError:
logger.warning("PyTorch not installed")
def _load_onnx(self):
"""Load ONNX model for fast inference"""
path = TRAINED_DIR / "football_transformer.onnx"
if path.exists():
try:
import onnxruntime as ort
session = ort.InferenceSession(str(path))
self.models['onnx'] = session
logger.info("Loaded ONNX transformer")
except ImportError:
logger.warning("ONNX Runtime not installed")
def get_elo(self, team: str) -> float:
"""Get Elo rating for a team"""
if team in self.elo_ratings:
return self.elo_ratings[team]
# Fuzzy match
team_lower = team.lower()
for t, elo in self.elo_ratings.items():
if t.lower() in team_lower or team_lower in t.lower():
return elo
return 1500.0 # Default
def build_features(self, home_team: str, away_team: str, league: str = 'premier_league') -> np.ndarray:
"""Build comprehensive 153-feature vector for prediction."""
try:
# Use comprehensive feature builder
from .comprehensive_features import build_match_features
features = build_match_features(home_team, away_team, league)
logger.debug(f"Built {features.shape[1]} features for {home_team} vs {away_team}")
return features
except Exception as e:
logger.warning(f"Comprehensive features failed, using fallback: {e}")
# Fallback to basic features
home_elo = self.get_elo(home_team)
away_elo = self.get_elo(away_team)
# Encode teams
team_enc = self.encoders.get('team_enc')
if team_enc:
try:
home_enc = team_enc.transform([home_team])[0]
away_enc = team_enc.transform([away_team])[0]
except:
home_enc, away_enc = 0, 0
else:
home_enc, away_enc = 0, 0
# Build basic feature vector
import datetime
now = datetime.datetime.now()
features = np.array([
home_enc, away_enc,
home_elo, away_elo,
home_elo - away_elo,
now.year, now.month, now.weekday()
], dtype=np.float32)
return features.reshape(1, -1)
def predict(self, home_team: str, away_team: str) -> Dict:
"""Get ensemble prediction"""
if not self._loaded:
self.load_all()
if not self.models:
return {'error': 'No trained models available'}
features = self.build_features(home_team, away_team)
# Ensemble weights
weights = self.metadata.get('ensemble_weights', {
'xgb': 0.3, 'lgb': 0.3, 'cat': 0.25, 'nn': 0.15
})
probs = np.zeros(3)
total_weight = 0
# XGBoost
if 'xgb' in self.models:
probs += weights.get('xgb', 0.3) * self.models['xgb'].predict_proba(features)[0]
total_weight += weights.get('xgb', 0.3)
# LightGBM (skip if feature count mismatch)
if 'lgb' in self.models:
try:
lgb_raw = self.models['lgb'].predict(features)
# Handle different output shapes
if lgb_raw.ndim == 1:
lgb_probs = lgb_raw
elif lgb_raw.ndim == 2:
lgb_probs = lgb_raw[0]
else:
lgb_probs = np.array([lgb_raw, 0.3, 0.3])
# Normalize if needed
if len(lgb_probs) >= 3:
lgb_probs = lgb_probs[:3]
lgb_probs = lgb_probs / lgb_probs.sum()
probs += weights.get('lgb', 0.3) * lgb_probs
total_weight += weights.get('lgb', 0.3)
except Exception as e:
# Feature mismatch - skip this model
logger.debug(f"LightGBM skipped: {e}")
# CatBoost (skip if feature count mismatch)
if 'cat' in self.models:
try:
cat_probs = self.models['cat'].predict_proba(features)[0]
probs += weights.get('cat', 0.25) * cat_probs
total_weight += weights.get('cat', 0.25)
except Exception as e:
logger.debug(f"CatBoost skipped: {e}")
# Neural Net (skip if scaler or feature issues)
if 'nn' in self.models:
try:
import torch
if self.scaler:
scaled = self.scaler.transform(features)
else:
scaled = features
with torch.no_grad():
nn_out = torch.softmax(self.models['nn'](torch.FloatTensor(scaled)), dim=1).numpy()[0]
probs += weights.get('nn', 0.15) * nn_out
total_weight += weights.get('nn', 0.15)
except Exception as e:
logger.debug(f"Neural Net skipped: {e}")
if total_weight > 0:
probs = probs / total_weight
# Normalize
probs = probs / probs.sum()
# Get classes
classes = self.metadata.get('classes', ['A', 'D', 'H'])
pred_idx = probs.argmax()
return {
'home_team': home_team,
'away_team': away_team,
'home_win_prob': float(probs[classes.index('H')] if 'H' in classes else probs[0]),
'draw_prob': float(probs[classes.index('D')] if 'D' in classes else probs[1]),
'away_win_prob': float(probs[classes.index('A')] if 'A' in classes else probs[2]),
'predicted_outcome': classes[pred_idx].replace('H', 'Home Win').replace('A', 'Away Win').replace('D', 'Draw'),
'confidence': float(probs[pred_idx]),
'models_used': list(self.models.keys())
}
# Global instance
_loader: Optional[TrainedModelLoader] = None
def get_trained_loader() -> TrainedModelLoader:
global _loader
if _loader is None:
_loader = TrainedModelLoader()
_loader.load_all()
return _loader
def predict_with_trained(home: str, away: str) -> Dict:
return get_trained_loader().predict(home, away)
|