"""CatBoost model for stock prediction.""" import logging from typing import Optional import joblib import numpy as np import pandas as pd from catboost import CatBoostClassifier, CatBoostRegressor from src.models.base import BaseModel, PredictionResult logger = logging.getLogger(__name__) class CatBoostModel(BaseModel): """Multi-output CatBoost model.""" def __init__(self, stock_type: str = "large_cap", horizon: int = 5, **kwargs): super().__init__(name="catboost", stock_type=stock_type, horizon=horizon) self.params = { "depth": kwargs.get("depth", 6), "learning_rate": kwargs.get("learning_rate", 0.05), "iterations": kwargs.get("iterations", 500), "l2_leaf_reg": kwargs.get("l2_leaf_reg", 3), "random_seed": 42, "verbose": 0, } self.direction_model = None self.magnitude_model = None self.volatility_model = None def fit( self, X_train: pd.DataFrame, y_train: pd.DataFrame, X_val: Optional[pd.DataFrame] = None, y_val: Optional[pd.DataFrame] = None, ) -> "CatBoostModel": dir_col = f"direction_{self.horizon}d" mag_col = f"magnitude_{self.horizon}d" vol_col = f"volatility_{self.horizon}d" # Map direction {-1, 0, 1} to {0, 1, 2} for CatBoost y_dir = y_train[dir_col].fillna(0).astype(int) + 1 self.direction_model = CatBoostClassifier(**self.params, loss_function="MultiClass", classes_count=3) eval_set = None if X_val is not None and y_val is not None: eval_set = (X_val, y_val[dir_col].fillna(0).astype(int) + 1) self.direction_model.fit(X_train, y_dir, eval_set=eval_set) self.magnitude_model = CatBoostRegressor(**self.params, loss_function="RMSE") eval_mag = (X_val, y_val[mag_col].fillna(0)) if X_val is not None and y_val is not None else None self.magnitude_model.fit(X_train, y_train[mag_col].fillna(0), eval_set=eval_mag) self.volatility_model = CatBoostRegressor(**self.params, loss_function="RMSE") eval_vol = (X_val, y_val[vol_col].fillna(0)) if X_val is not None and y_val is not None else None self.volatility_model.fit(X_train, y_train[vol_col].fillna(0), eval_set=eval_vol) self.is_fitted = True return self def predict(self, X: pd.DataFrame) -> PredictionResult: if not self.is_fitted: raise RuntimeError("Model not fitted") dir_proba = self.direction_model.predict_proba(X) direction = np.argmax(dir_proba, axis=1) - 1 # map {0,1,2} back to {-1,0,1} magnitude = self.magnitude_model.predict(X) volatility = self.volatility_model.predict(X) confidence = np.max(dir_proba, axis=1) return PredictionResult( direction=direction, direction_proba=dir_proba, magnitude=magnitude, volatility=volatility, confidence=confidence, ) def save(self, path: str) -> None: joblib.dump({ "direction_model": self.direction_model, "magnitude_model": self.magnitude_model, "volatility_model": self.volatility_model, "params": self.params, "stock_type": self.stock_type, "horizon": self.horizon, }, path) @classmethod def load(cls, path: str) -> "CatBoostModel": data = joblib.load(path) model = cls(stock_type=data["stock_type"], horizon=data["horizon"]) model.direction_model = data["direction_model"] model.magnitude_model = data["magnitude_model"] model.volatility_model = data["volatility_model"] model.params = data["params"] model.is_fitted = True return model