""" backend/services/model_service.py ───────────────────────────────────────────── Carga, gestión e inferencia de los 3 modelos finales: 1. XGBoost (Optuna, EXP-C) – tabular, fast 2. LSTM + Self-Attention (EXP-E) – sequential, best AUC 3. Logistic Regression – calibrated baseline Diseñado para carga única al startup de FastAPI. ───────────────────────────────────────────── """ from __future__ import annotations import logging from typing import Dict, List, Optional import joblib import numpy as np import pandas as pd import torch import torch.nn as nn from backend.config import ( FEATURE_COLUMNS, LOGREG_MODEL_PATH, LSTM_DROPOUT, LSTM_HIDDEN_DIM, LSTM_MAX_SEQ_LEN, LSTM_MODEL_PATH, SCALER_PATH, XGBOOST_MODEL_PATH, ) logger = logging.getLogger(__name__) # ───────────────────────────────────────────── # LSTM Architecture (must match training exactly) # ───────────────────────────────────────────── class LSTMWithAttention(nn.Module): """LSTM + Self-Attention para predicción de win probability. Replica exacta de la arquitectura entrenada en notebook 08 (EXP-E). """ def __init__(self, input_dim: int, hidden_dim: int, dropout: float = 0.3): super().__init__() self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True) self.attention = nn.Linear(hidden_dim, 1) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(hidden_dim, 1) def forward(self, x: torch.Tensor) -> torch.Tensor: out, _ = self.lstm(x) # (B, T, H) attn_w = torch.softmax(self.attention(out), dim=1) # (B, T, 1) context = (attn_w * out).sum(dim=1) # (B, H) return self.fc(self.dropout(context)) # ───────────────────────────────────────────── # Model Service (Singleton-ish via module state) # ───────────────────────────────────────────── class ModelService: """Servicio centralizado de carga e inferencia. Se inicializa una sola vez al arrancar la app y mantiene los 3 modelos en memoria para inferencia rápida. """ def __init__(self): self._xgb_model = None self._lstm_model: Optional[LSTMWithAttention] = None self._logreg_model = None self._scaler = None self._device = "cpu" # No GPU en HF Spaces gratuito self._loaded = False @property def is_loaded(self) -> bool: return self._loaded # ── Load ────────────────────────────────────────────────── def load_models(self) -> None: """Carga los 3 modelos desde disco. Llamar una sola vez.""" if self._loaded: logger.info("Models already loaded — skipping.") return logger.info("Loading ML models…") # 1. XGBoost try: self._xgb_model = joblib.load(XGBOOST_MODEL_PATH) logger.info("✅ XGBoost loaded from %s", XGBOOST_MODEL_PATH) except Exception as e: logger.error("❌ Failed to load XGBoost: %s", e) raise # 2. Logistic Regression + Scaler try: self._logreg_model = joblib.load(LOGREG_MODEL_PATH) self._scaler = joblib.load(SCALER_PATH) logger.info("✅ LogReg + Scaler loaded") except Exception as e: logger.error("❌ Failed to load LogReg/Scaler: %s", e) raise # 3. LSTM + Self-Attention try: input_dim = len(FEATURE_COLUMNS) self._lstm_model = LSTMWithAttention( input_dim=input_dim, hidden_dim=LSTM_HIDDEN_DIM, dropout=LSTM_DROPOUT, ) checkpoint = torch.load( LSTM_MODEL_PATH, map_location=self._device, weights_only=False, ) # El modelo fue guardado como un diccionario con metadatos if "model_state_dict" in checkpoint: self._lstm_model.load_state_dict(checkpoint["model_state_dict"]) else: self._lstm_model.load_state_dict(checkpoint) self._lstm_model.to(self._device) self._lstm_model.eval() logger.info("✅ LSTM loaded from %s (device=%s)", LSTM_MODEL_PATH, self._device) except Exception as e: logger.error("❌ Failed to load LSTM: %s", e) raise self._loaded = True logger.info("All models loaded successfully.") # ── Predict ─────────────────────────────────────────────── def predict(self, features_df: pd.DataFrame) -> Dict[str, List[float]]: """Genera predicciones minuto-a-minuto con los 3 modelos. Args: features_df: DataFrame con columnas == FEATURE_COLUMNS. Cada fila es un minuto de la partida. Returns: Dict con keys "xgboost", "lstm", "logreg", cada uno una lista de floats (probabilidad de blue_win por minuto). """ if not self._loaded: raise RuntimeError("Models not loaded. Call load_models() first.") # Validate columns missing = set(FEATURE_COLUMNS) - set(features_df.columns) if missing: raise ValueError(f"Missing features in input: {missing}") # Ensure correct column order X = features_df[FEATURE_COLUMNS].astype(np.float32) results: Dict[str, List[float]] = {} # ── XGBoost (tabular, row-by-row) ───────────────────── try: xgb_probs = self._xgb_model.predict_proba(X)[:, 1] results["xgboost"] = xgb_probs.tolist() except Exception as e: logger.error("XGBoost prediction failed: %s", e) results["xgboost"] = [] # ── Logistic Regression (scaled, row-by-row) ────────── try: X_scaled = self._scaler.transform(X) logreg_probs = self._logreg_model.predict_proba(X_scaled)[:, 1] results["logreg"] = logreg_probs.tolist() except Exception as e: logger.error("LogReg prediction failed: %s", e) results["logreg"] = [] # ── LSTM (sequence, padded to MAX_SEQ_LEN) ──────────── try: results["lstm"] = self._predict_lstm(X.values) except Exception as e: logger.error("LSTM prediction failed: %s", e) results["lstm"] = [] return results def _predict_lstm(self, X_arr: np.ndarray) -> List[float]: """Inferencia LSTM minuto-a-minuto (acumulativa). Para cada minuto t, construimos la secuencia [0..t] (max 20 min), la pasamos por el modelo, y obtenemos P(blue_win) hasta ese punto. Esto simula cómo se usaría en producción: la partida avanza y el modelo ve la secuencia acumulada. """ n_minutes = len(X_arr) probs: List[float] = [] for t in range(n_minutes): # Secuencia acumulada hasta minuto t (inclusive) seq = X_arr[:t + 1] # Truncar a MAX_SEQ_LEN (tomar los últimos N minutos) if len(seq) > LSTM_MAX_SEQ_LEN: seq = seq[-LSTM_MAX_SEQ_LEN:] # Post-pad with zeros after real data — matches training exactly # (pad_sequences in notebook fills [:l] then leaves zeros at the end) seq_len = len(seq) if seq_len < LSTM_MAX_SEQ_LEN: pad = np.zeros( (LSTM_MAX_SEQ_LEN - seq_len, seq.shape[1]), dtype=np.float32, ) seq = np.concatenate([seq, pad], axis=0) # (1, MAX_SEQ_LEN, n_features) tensor = torch.tensor(seq, dtype=torch.float32).unsqueeze(0).to(self._device) with torch.no_grad(): logit = self._lstm_model(tensor).squeeze() prob = torch.sigmoid(logit).item() probs.append(prob) return probs # ── Module-level singleton ──────────────────────────────────── model_service = ModelService()