Spaces:
Runtime error
Runtime error
| """ | |
| CatBoost Model Wrapper | |
| Standardized wrapper for CatBoost football prediction. | |
| Part of the complete blueprint implementation. | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from typing import Dict, List, Optional | |
| from pathlib import Path | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| try: | |
| from catboost import CatBoostClassifier, CatBoostRegressor | |
| CAT_AVAILABLE = True | |
| except ImportError: | |
| CAT_AVAILABLE = False | |
| class CatBoostModel: | |
| """ | |
| CatBoost model wrapper for football predictions. | |
| Supports: | |
| - Native categorical feature handling | |
| - GPU training | |
| - Ordered boosting | |
| """ | |
| DEFAULT_PARAMS = { | |
| 'learning_rate': 0.05, | |
| 'depth': 6, | |
| 'iterations': 200, | |
| 'random_seed': 42, | |
| 'verbose': False, | |
| 'loss_function': 'MultiClass' | |
| } | |
| def __init__( | |
| self, | |
| params: Dict = None, | |
| model_dir: str = "models/saved_models/catboost" | |
| ): | |
| self.params = {**self.DEFAULT_PARAMS, **(params or {})} | |
| self.model_dir = Path(model_dir) | |
| self.model_dir.mkdir(parents=True, exist_ok=True) | |
| self.result_model = None | |
| self.goals_model = None | |
| self.btts_model = None | |
| self.feature_names: List[str] = [] | |
| self.is_fitted = False | |
| def fit( | |
| self, | |
| X: pd.DataFrame, | |
| y_result: pd.Series, | |
| y_home_goals: pd.Series = None, | |
| y_away_goals: pd.Series = None, | |
| y_btts: pd.Series = None, | |
| cat_features: List[str] = None | |
| ) -> 'CatBoostModel': | |
| """Fit all prediction models.""" | |
| if not CAT_AVAILABLE: | |
| logger.error("CatBoost not installed") | |
| return self | |
| self.feature_names = X.columns.tolist() | |
| cat_idx = [X.columns.get_loc(c) for c in (cat_features or []) if c in X.columns] | |
| # Result model | |
| logger.info("Training CatBoost result model...") | |
| self.result_model = CatBoostClassifier(**self.params) | |
| self.result_model.fit(X, y_result, cat_features=cat_idx if cat_idx else None) | |
| # Goals models | |
| if y_home_goals is not None: | |
| logger.info("Training CatBoost goals model...") | |
| goals_params = {**self.params} | |
| goals_params['loss_function'] = 'RMSE' | |
| self.goals_model = { | |
| 'home': CatBoostRegressor(**goals_params), | |
| 'away': CatBoostRegressor(**goals_params) | |
| } | |
| self.goals_model['home'].fit(X, y_home_goals, cat_features=cat_idx if cat_idx else None) | |
| if y_away_goals is not None: | |
| self.goals_model['away'].fit(X, y_away_goals, cat_features=cat_idx if cat_idx else None) | |
| # BTTS model | |
| if y_btts is not None: | |
| logger.info("Training CatBoost BTTS model...") | |
| btts_params = {**self.params} | |
| btts_params['loss_function'] = 'Logloss' | |
| self.btts_model = CatBoostClassifier(**btts_params) | |
| self.btts_model.fit(X, y_btts, cat_features=cat_idx if cat_idx else None) | |
| self.is_fitted = True | |
| logger.info("CatBoost training complete") | |
| return self | |
| def predict(self, X: pd.DataFrame) -> Dict: | |
| """Make predictions.""" | |
| if not self.is_fitted: | |
| return {} | |
| predictions = {} | |
| if self.result_model: | |
| probs = self.result_model.predict_proba(X) | |
| predictions['1x2'] = { | |
| 'home': float(probs[0, 0]), | |
| 'draw': float(probs[0, 1]), | |
| 'away': float(probs[0, 2]) | |
| } | |
| predictions['result'] = ['H', 'D', 'A'][np.argmax(probs[0])] | |
| if self.goals_model: | |
| predictions['home_goals'] = float(self.goals_model['home'].predict(X)[0]) | |
| predictions['away_goals'] = float(self.goals_model['away'].predict(X)[0]) | |
| total = predictions['home_goals'] + predictions['away_goals'] | |
| predictions['over_2.5'] = 1 / (1 + np.exp(-(total - 2.5))) | |
| if self.btts_model: | |
| btts_prob = self.btts_model.predict_proba(X) | |
| predictions['btts'] = float(btts_prob[0, 1]) if btts_prob.shape[1] > 1 else 0.5 | |
| return predictions | |
| def get_feature_importance(self, top_n: int = 20) -> pd.DataFrame: | |
| """Get feature importance.""" | |
| if not self.result_model: | |
| return pd.DataFrame() | |
| importance = self.result_model.feature_importances_ | |
| return pd.DataFrame({ | |
| 'feature': self.feature_names, | |
| 'importance': importance | |
| }).sort_values('importance', ascending=False).head(top_n) | |
| def save(self, name: str = "catboost_model"): | |
| """Save models.""" | |
| if self.result_model: | |
| self.result_model.save_model(str(self.model_dir / f"{name}_result.cbm")) | |
| if self.goals_model: | |
| self.goals_model['home'].save_model(str(self.model_dir / f"{name}_home_goals.cbm")) | |
| self.goals_model['away'].save_model(str(self.model_dir / f"{name}_away_goals.cbm")) | |
| if self.btts_model: | |
| self.btts_model.save_model(str(self.model_dir / f"{name}_btts.cbm")) | |
| def load(self, name: str = "catboost_model") -> bool: | |
| """Load models.""" | |
| try: | |
| result_path = self.model_dir / f"{name}_result.cbm" | |
| if result_path.exists(): | |
| self.result_model = CatBoostClassifier() | |
| self.result_model.load_model(str(result_path)) | |
| home_path = self.model_dir / f"{name}_home_goals.cbm" | |
| away_path = self.model_dir / f"{name}_away_goals.cbm" | |
| if home_path.exists(): | |
| self.goals_model = { | |
| 'home': CatBoostRegressor(), | |
| 'away': CatBoostRegressor() | |
| } | |
| self.goals_model['home'].load_model(str(home_path)) | |
| if away_path.exists(): | |
| self.goals_model['away'].load_model(str(away_path)) | |
| btts_path = self.model_dir / f"{name}_btts.cbm" | |
| if btts_path.exists(): | |
| self.btts_model = CatBoostClassifier() | |
| self.btts_model.load_model(str(btts_path)) | |
| self.is_fitted = self.result_model is not None | |
| return True | |
| except Exception as e: | |
| logger.error(f"Load failed: {e}") | |
| return False | |
| _model: Optional[CatBoostModel] = None | |
| def get_model() -> CatBoostModel: | |
| global _model | |
| if _model is None: | |
| _model = CatBoostModel() | |
| return _model | |