footypredict-pro / src /models /trained_loader.py
nananie143's picture
feat: Add src/models/trained_loader.py
2e6fc13 verified
"""
Trained Model Loader
Loads models trained on Kaggle and exported to models/trained/
Supports: XGBoost, LightGBM, CatBoost, PyTorch, ONNX
"""
import os
import json
import pickle
import logging
from pathlib import Path
from typing import Dict, Optional, Any
import numpy as np
logger = logging.getLogger(__name__)
MODELS_DIR = Path(__file__).parent.parent.parent / "models"
TRAINED_DIR = MODELS_DIR / "trained"
CONFIG_DIR = MODELS_DIR / "config"
class TrainedModelLoader:
"""Load models trained on Kaggle"""
def __init__(self):
self.models: Dict[str, Any] = {}
self.encoders: Dict[str, Any] = {}
self.elo_ratings: Dict[str, float] = {}
self.metadata: Dict[str, Any] = {}
self.scaler = None
self._loaded = False
def load_all(self) -> bool:
"""Load all available trained models"""
try:
self._load_config()
self._load_xgboost()
self._load_lightgbm()
self._load_catboost()
self._load_neural_net()
self._load_onnx()
self._loaded = len(self.models) > 0
logger.info(f"Loaded {len(self.models)} trained models")
return self._loaded
except Exception as e:
logger.error(f"Error loading models: {e}")
return False
def _load_config(self):
"""Load encoders, elo ratings, and metadata"""
# Encoders
enc_path = CONFIG_DIR / "encoders.pkl"
if enc_path.exists():
with open(enc_path, 'rb') as f:
data = pickle.load(f)
self.encoders = data
self.scaler = data.get('scaler')
logger.info("Loaded encoders")
# Elo ratings
elo_path = CONFIG_DIR / "elo_ratings.json"
if elo_path.exists():
with open(elo_path, 'r') as f:
self.elo_ratings = json.load(f)
logger.info(f"Loaded {len(self.elo_ratings)} team Elo ratings")
# Metadata
meta_path = CONFIG_DIR / "model_meta.json"
if meta_path.exists():
with open(meta_path, 'r') as f:
self.metadata = json.load(f)
logger.info("Loaded model metadata")
def _load_xgboost(self):
"""Load XGBoost model"""
path = TRAINED_DIR / "xgb_football.json"
if path.exists():
try:
from xgboost import XGBClassifier
model = XGBClassifier()
model.load_model(str(path))
self.models['xgb'] = model
logger.info("Loaded XGBoost model")
except ImportError:
logger.warning("XGBoost not installed")
def _load_lightgbm(self):
"""Load LightGBM model"""
path = TRAINED_DIR / "lgb_football.txt"
if path.exists():
try:
import lightgbm as lgb
model = lgb.Booster(model_file=str(path))
self.models['lgb'] = model
logger.info("Loaded LightGBM model")
except ImportError:
logger.warning("LightGBM not installed")
def _load_catboost(self):
"""Load CatBoost model"""
path = TRAINED_DIR / "cat_football.cbm"
if path.exists():
try:
from catboost import CatBoostClassifier
model = CatBoostClassifier()
model.load_model(str(path))
self.models['cat'] = model
logger.info("Loaded CatBoost model")
except ImportError:
logger.warning("CatBoost not installed")
def _load_neural_net(self):
"""Load PyTorch neural network"""
path = TRAINED_DIR / "nn_football.pt"
if path.exists():
try:
import torch
import torch.nn as nn
class FootballNet(nn.Module):
def __init__(self, input_dim=8, hidden=128):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, hidden),
nn.ReLU(), nn.Dropout(0.3),
nn.Linear(hidden, 64),
nn.ReLU(), nn.Dropout(0.2),
nn.Linear(64, 3)
)
def forward(self, x):
return self.net(x)
model = FootballNet()
model.load_state_dict(torch.load(path, map_location='cpu'))
model.eval()
self.models['nn'] = model
logger.info("Loaded PyTorch neural network")
except ImportError:
logger.warning("PyTorch not installed")
def _load_onnx(self):
"""Load ONNX model for fast inference"""
path = TRAINED_DIR / "football_transformer.onnx"
if path.exists():
try:
import onnxruntime as ort
session = ort.InferenceSession(str(path))
self.models['onnx'] = session
logger.info("Loaded ONNX transformer")
except ImportError:
logger.warning("ONNX Runtime not installed")
def get_elo(self, team: str) -> float:
"""Get Elo rating for a team"""
if team in self.elo_ratings:
return self.elo_ratings[team]
# Fuzzy match
team_lower = team.lower()
for t, elo in self.elo_ratings.items():
if t.lower() in team_lower or team_lower in t.lower():
return elo
return 1500.0 # Default
def build_features(self, home_team: str, away_team: str, league: str = 'premier_league') -> np.ndarray:
"""Build comprehensive 153-feature vector for prediction."""
try:
# Use comprehensive feature builder
from .comprehensive_features import build_match_features
features = build_match_features(home_team, away_team, league)
logger.debug(f"Built {features.shape[1]} features for {home_team} vs {away_team}")
return features
except Exception as e:
logger.warning(f"Comprehensive features failed, using fallback: {e}")
# Fallback to basic features
home_elo = self.get_elo(home_team)
away_elo = self.get_elo(away_team)
# Encode teams
team_enc = self.encoders.get('team_enc')
if team_enc:
try:
home_enc = team_enc.transform([home_team])[0]
away_enc = team_enc.transform([away_team])[0]
except:
home_enc, away_enc = 0, 0
else:
home_enc, away_enc = 0, 0
# Build basic feature vector
import datetime
now = datetime.datetime.now()
features = np.array([
home_enc, away_enc,
home_elo, away_elo,
home_elo - away_elo,
now.year, now.month, now.weekday()
], dtype=np.float32)
return features.reshape(1, -1)
def predict(self, home_team: str, away_team: str) -> Dict:
"""Get ensemble prediction"""
if not self._loaded:
self.load_all()
if not self.models:
return {'error': 'No trained models available'}
features = self.build_features(home_team, away_team)
# Ensemble weights
weights = self.metadata.get('ensemble_weights', {
'xgb': 0.3, 'lgb': 0.3, 'cat': 0.25, 'nn': 0.15
})
probs = np.zeros(3)
total_weight = 0
# XGBoost
if 'xgb' in self.models:
probs += weights.get('xgb', 0.3) * self.models['xgb'].predict_proba(features)[0]
total_weight += weights.get('xgb', 0.3)
# LightGBM (skip if feature count mismatch)
if 'lgb' in self.models:
try:
lgb_raw = self.models['lgb'].predict(features)
# Handle different output shapes
if lgb_raw.ndim == 1:
lgb_probs = lgb_raw
elif lgb_raw.ndim == 2:
lgb_probs = lgb_raw[0]
else:
lgb_probs = np.array([lgb_raw, 0.3, 0.3])
# Normalize if needed
if len(lgb_probs) >= 3:
lgb_probs = lgb_probs[:3]
lgb_probs = lgb_probs / lgb_probs.sum()
probs += weights.get('lgb', 0.3) * lgb_probs
total_weight += weights.get('lgb', 0.3)
except Exception as e:
# Feature mismatch - skip this model
logger.debug(f"LightGBM skipped: {e}")
# CatBoost (skip if feature count mismatch)
if 'cat' in self.models:
try:
cat_probs = self.models['cat'].predict_proba(features)[0]
probs += weights.get('cat', 0.25) * cat_probs
total_weight += weights.get('cat', 0.25)
except Exception as e:
logger.debug(f"CatBoost skipped: {e}")
# Neural Net (skip if scaler or feature issues)
if 'nn' in self.models:
try:
import torch
if self.scaler:
scaled = self.scaler.transform(features)
else:
scaled = features
with torch.no_grad():
nn_out = torch.softmax(self.models['nn'](torch.FloatTensor(scaled)), dim=1).numpy()[0]
probs += weights.get('nn', 0.15) * nn_out
total_weight += weights.get('nn', 0.15)
except Exception as e:
logger.debug(f"Neural Net skipped: {e}")
if total_weight > 0:
probs = probs / total_weight
# Normalize
probs = probs / probs.sum()
# Get classes
classes = self.metadata.get('classes', ['A', 'D', 'H'])
pred_idx = probs.argmax()
return {
'home_team': home_team,
'away_team': away_team,
'home_win_prob': float(probs[classes.index('H')] if 'H' in classes else probs[0]),
'draw_prob': float(probs[classes.index('D')] if 'D' in classes else probs[1]),
'away_win_prob': float(probs[classes.index('A')] if 'A' in classes else probs[2]),
'predicted_outcome': classes[pred_idx].replace('H', 'Home Win').replace('A', 'Away Win').replace('D', 'Draw'),
'confidence': float(probs[pred_idx]),
'models_used': list(self.models.keys())
}
# Global instance
_loader: Optional[TrainedModelLoader] = None
def get_trained_loader() -> TrainedModelLoader:
global _loader
if _loader is None:
_loader = TrainedModelLoader()
_loader.load_all()
return _loader
def predict_with_trained(home: str, away: str) -> Dict:
return get_trained_loader().predict(home, away)