m
Initial deployment: ensemble stock predictor with trained models
bcceb77
"""CatBoost model for stock prediction."""
import logging
from typing import Optional
import joblib
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, CatBoostRegressor
from src.models.base import BaseModel, PredictionResult
logger = logging.getLogger(__name__)
class CatBoostModel(BaseModel):
"""Multi-output CatBoost model."""
def __init__(self, stock_type: str = "large_cap", horizon: int = 5, **kwargs):
super().__init__(name="catboost", stock_type=stock_type, horizon=horizon)
self.params = {
"depth": kwargs.get("depth", 6),
"learning_rate": kwargs.get("learning_rate", 0.05),
"iterations": kwargs.get("iterations", 500),
"l2_leaf_reg": kwargs.get("l2_leaf_reg", 3),
"random_seed": 42,
"verbose": 0,
}
self.direction_model = None
self.magnitude_model = None
self.volatility_model = None
def fit(
self,
X_train: pd.DataFrame,
y_train: pd.DataFrame,
X_val: Optional[pd.DataFrame] = None,
y_val: Optional[pd.DataFrame] = None,
) -> "CatBoostModel":
dir_col = f"direction_{self.horizon}d"
mag_col = f"magnitude_{self.horizon}d"
vol_col = f"volatility_{self.horizon}d"
# Map direction {-1, 0, 1} to {0, 1, 2} for CatBoost
y_dir = y_train[dir_col].fillna(0).astype(int) + 1
self.direction_model = CatBoostClassifier(**self.params, loss_function="MultiClass", classes_count=3)
eval_set = None
if X_val is not None and y_val is not None:
eval_set = (X_val, y_val[dir_col].fillna(0).astype(int) + 1)
self.direction_model.fit(X_train, y_dir, eval_set=eval_set)
self.magnitude_model = CatBoostRegressor(**self.params, loss_function="RMSE")
eval_mag = (X_val, y_val[mag_col].fillna(0)) if X_val is not None and y_val is not None else None
self.magnitude_model.fit(X_train, y_train[mag_col].fillna(0), eval_set=eval_mag)
self.volatility_model = CatBoostRegressor(**self.params, loss_function="RMSE")
eval_vol = (X_val, y_val[vol_col].fillna(0)) if X_val is not None and y_val is not None else None
self.volatility_model.fit(X_train, y_train[vol_col].fillna(0), eval_set=eval_vol)
self.is_fitted = True
return self
def predict(self, X: pd.DataFrame) -> PredictionResult:
if not self.is_fitted:
raise RuntimeError("Model not fitted")
dir_proba = self.direction_model.predict_proba(X)
direction = np.argmax(dir_proba, axis=1) - 1 # map {0,1,2} back to {-1,0,1}
magnitude = self.magnitude_model.predict(X)
volatility = self.volatility_model.predict(X)
confidence = np.max(dir_proba, axis=1)
return PredictionResult(
direction=direction, direction_proba=dir_proba,
magnitude=magnitude, volatility=volatility, confidence=confidence,
)
def save(self, path: str) -> None:
joblib.dump({
"direction_model": self.direction_model, "magnitude_model": self.magnitude_model,
"volatility_model": self.volatility_model, "params": self.params,
"stock_type": self.stock_type, "horizon": self.horizon,
}, path)
@classmethod
def load(cls, path: str) -> "CatBoostModel":
data = joblib.load(path)
model = cls(stock_type=data["stock_type"], horizon=data["horizon"])
model.direction_model = data["direction_model"]
model.magnitude_model = data["magnitude_model"]
model.volatility_model = data["volatility_model"]
model.params = data["params"]
model.is_fitted = True
return model