Spaces:
Sleeping
Sleeping
| """CatBoost model for stock prediction.""" | |
| import logging | |
| from typing import Optional | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from catboost import CatBoostClassifier, CatBoostRegressor | |
| from src.models.base import BaseModel, PredictionResult | |
| logger = logging.getLogger(__name__) | |
| class CatBoostModel(BaseModel): | |
| """Multi-output CatBoost model.""" | |
| def __init__(self, stock_type: str = "large_cap", horizon: int = 5, **kwargs): | |
| super().__init__(name="catboost", stock_type=stock_type, horizon=horizon) | |
| self.params = { | |
| "depth": kwargs.get("depth", 6), | |
| "learning_rate": kwargs.get("learning_rate", 0.05), | |
| "iterations": kwargs.get("iterations", 500), | |
| "l2_leaf_reg": kwargs.get("l2_leaf_reg", 3), | |
| "random_seed": 42, | |
| "verbose": 0, | |
| } | |
| self.direction_model = None | |
| self.magnitude_model = None | |
| self.volatility_model = None | |
| def fit( | |
| self, | |
| X_train: pd.DataFrame, | |
| y_train: pd.DataFrame, | |
| X_val: Optional[pd.DataFrame] = None, | |
| y_val: Optional[pd.DataFrame] = None, | |
| ) -> "CatBoostModel": | |
| dir_col = f"direction_{self.horizon}d" | |
| mag_col = f"magnitude_{self.horizon}d" | |
| vol_col = f"volatility_{self.horizon}d" | |
| # Map direction {-1, 0, 1} to {0, 1, 2} for CatBoost | |
| y_dir = y_train[dir_col].fillna(0).astype(int) + 1 | |
| self.direction_model = CatBoostClassifier(**self.params, loss_function="MultiClass", classes_count=3) | |
| eval_set = None | |
| if X_val is not None and y_val is not None: | |
| eval_set = (X_val, y_val[dir_col].fillna(0).astype(int) + 1) | |
| self.direction_model.fit(X_train, y_dir, eval_set=eval_set) | |
| self.magnitude_model = CatBoostRegressor(**self.params, loss_function="RMSE") | |
| eval_mag = (X_val, y_val[mag_col].fillna(0)) if X_val is not None and y_val is not None else None | |
| self.magnitude_model.fit(X_train, y_train[mag_col].fillna(0), eval_set=eval_mag) | |
| self.volatility_model = CatBoostRegressor(**self.params, loss_function="RMSE") | |
| eval_vol = (X_val, y_val[vol_col].fillna(0)) if X_val is not None and y_val is not None else None | |
| self.volatility_model.fit(X_train, y_train[vol_col].fillna(0), eval_set=eval_vol) | |
| self.is_fitted = True | |
| return self | |
| def predict(self, X: pd.DataFrame) -> PredictionResult: | |
| if not self.is_fitted: | |
| raise RuntimeError("Model not fitted") | |
| dir_proba = self.direction_model.predict_proba(X) | |
| direction = np.argmax(dir_proba, axis=1) - 1 # map {0,1,2} back to {-1,0,1} | |
| magnitude = self.magnitude_model.predict(X) | |
| volatility = self.volatility_model.predict(X) | |
| confidence = np.max(dir_proba, axis=1) | |
| return PredictionResult( | |
| direction=direction, direction_proba=dir_proba, | |
| magnitude=magnitude, volatility=volatility, confidence=confidence, | |
| ) | |
| def save(self, path: str) -> None: | |
| joblib.dump({ | |
| "direction_model": self.direction_model, "magnitude_model": self.magnitude_model, | |
| "volatility_model": self.volatility_model, "params": self.params, | |
| "stock_type": self.stock_type, "horizon": self.horizon, | |
| }, path) | |
| def load(cls, path: str) -> "CatBoostModel": | |
| data = joblib.load(path) | |
| model = cls(stock_type=data["stock_type"], horizon=data["horizon"]) | |
| model.direction_model = data["direction_model"] | |
| model.magnitude_model = data["magnitude_model"] | |
| model.volatility_model = data["volatility_model"] | |
| model.params = data["params"] | |
| model.is_fitted = True | |
| return model | |