m
Initial deployment: ensemble stock predictor with trained models
bcceb77
"""Multi-objective Optuna hyperparameter optimization for all models."""
import logging
from typing import Callable, Optional
import optuna
import pandas as pd
logger = logging.getLogger(__name__)
class OptunaMultiObjectiveTuner:
"""Multi-objective Optuna optimization (Sharpe ratio + directional accuracy).
Tunes 30+ hyperparameters across classical ML, time-series, GNN, ensemble,
and data pipeline parameters.
"""
def __init__(self, storage: Optional[str] = None):
self.storage = storage
def create_study(self, study_name: str) -> optuna.Study:
"""Create a multi-objective study with MedianPruner."""
return optuna.create_study(
study_name=study_name,
storage=self.storage,
directions=["maximize", "maximize"], # sharpe_ratio, directional_accuracy
pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
load_if_exists=True,
)
@staticmethod
def define_search_space(trial: optuna.Trial) -> dict:
"""Define hyperparameter search space for all models.
Returns dict of 30+ parameters covering: xgboost, lightgbm, catboost,
tft, nbeats, patchtst, gnn, meta-learner, sentiment, and data pipeline.
"""
params = {
# === XGBoost (5 params) ===
"xgb_max_depth": trial.suggest_int("xgb_max_depth", 3, 10),
"xgb_learning_rate": trial.suggest_float("xgb_lr", 0.01, 0.3, log=True),
"xgb_n_estimators": trial.suggest_int("xgb_n_est", 100, 2000),
"xgb_subsample": trial.suggest_float("xgb_subsample", 0.6, 1.0),
"xgb_colsample_bytree": trial.suggest_float("xgb_colsample", 0.6, 1.0),
# === LightGBM (5 params) ===
"lgbm_num_leaves": trial.suggest_int("lgbm_leaves", 20, 150),
"lgbm_learning_rate": trial.suggest_float("lgbm_lr", 0.01, 0.3, log=True),
"lgbm_n_estimators": trial.suggest_int("lgbm_n_est", 100, 2000),
"lgbm_feature_fraction": trial.suggest_float("lgbm_feat_frac", 0.5, 1.0),
"lgbm_bagging_fraction": trial.suggest_float("lgbm_bag_frac", 0.5, 1.0),
# === CatBoost (3 params) ===
"catboost_depth": trial.suggest_int("cb_depth", 4, 10),
"catboost_learning_rate": trial.suggest_float("cb_lr", 0.01, 0.3, log=True),
"catboost_l2_leaf_reg": trial.suggest_float("cb_l2", 1.0, 10.0),
# === TFT (3 params) ===
"tft_hidden_size": trial.suggest_categorical("tft_hidden", [64, 128, 256]),
"tft_attention_heads": trial.suggest_categorical("tft_attn_heads", [1, 2, 4]),
"tft_dropout": trial.suggest_float("tft_dropout", 0.1, 0.4),
# === N-BEATS (3 params) ===
"nbeats_hidden_size": trial.suggest_categorical("nbeats_hidden", [64, 128, 256, 512]),
"nbeats_n_blocks": trial.suggest_int("nbeats_blocks", 2, 5),
"nbeats_max_epochs": trial.suggest_int("nbeats_epochs", 20, 50),
# === PatchTST (4 params) ===
"patchtst_patch_len": trial.suggest_categorical("patchtst_patch", [16, 32, 64]),
"patchtst_encoder_layers": trial.suggest_categorical("patchtst_enc_layers", [2, 4, 6]),
"patchtst_n_heads": trial.suggest_categorical("patchtst_heads", [4, 8, 16]),
"patchtst_dropout": trial.suggest_float("patchtst_dropout", 0.1, 0.3),
# === GNN (2 params) ===
"gnn_hidden_channels": trial.suggest_categorical("gnn_hidden", [32, 64, 128]),
"gnn_correlation_threshold": trial.suggest_float("gnn_corr_thresh", 0.3, 0.7),
# === Ensemble meta-learner (2 params) ===
"meta_num_leaves": trial.suggest_int("meta_leaves", 10, 60),
"meta_learning_rate": trial.suggest_float("meta_lr", 0.01, 0.2, log=True),
# === Conformal prediction (1 param) ===
"conformal_alpha": trial.suggest_float("conformal_alpha", 0.05, 0.20),
# === Sentiment / NLP (1 param) ===
"sentiment_decay_alpha": trial.suggest_float("sent_decay", 0.1, 0.5),
# === Data pipeline (2 params) ===
"lookback_window": trial.suggest_int("lookback", 60, 200),
"purge_days": trial.suggest_int("purge_days", 3, 10),
}
return params
def objective(
self,
trial: optuna.Trial,
X: pd.DataFrame,
y: pd.DataFrame,
train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]],
) -> tuple[float, float]:
"""Single trial: suggest params, train/evaluate, return objectives.
Args:
trial: Optuna trial object.
X: Feature DataFrame.
y: Target DataFrame.
train_evaluate_fn: Callable that takes (params, X, y) and returns
(sharpe_ratio, directional_accuracy).
Returns:
(sharpe_ratio, directional_accuracy)
"""
params = self.define_search_space(trial)
sharpe, dir_acc = train_evaluate_fn(params, X, y)
return sharpe, dir_acc
def optimize(
self,
X: pd.DataFrame,
y: pd.DataFrame,
train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]],
study_name: str = "ensemble_optimization",
n_trials: int = 200,
timeout: Optional[int] = None,
) -> optuna.Study:
"""Run full multi-objective optimization.
Args:
X: Feature DataFrame.
y: Target DataFrame.
train_evaluate_fn: Callable(params, X, y) -> (sharpe, dir_acc).
study_name: Name for the Optuna study.
n_trials: Maximum number of trials.
timeout: Optional timeout in seconds.
Returns:
Completed Optuna study.
"""
study = self.create_study(study_name)
def wrapped_objective(trial: optuna.Trial) -> tuple[float, float]:
return self.objective(trial, X, y, train_evaluate_fn)
study.optimize(
wrapped_objective,
n_trials=n_trials,
timeout=timeout,
show_progress_bar=False,
)
logger.info(
f"Optimization complete: {len(study.trials)} trials, "
f"{len(study.best_trials)} Pareto-optimal"
)
return study
@staticmethod
def get_best_params(study: optuna.Study) -> list[dict]:
"""Get Pareto-optimal parameter sets from a completed study."""
return [t.params for t in study.best_trials]
@staticmethod
def get_results_dataframe(study: optuna.Study) -> pd.DataFrame:
"""Convert study trials to a DataFrame for analysis."""
records = []
for trial in study.trials:
if trial.state == optuna.trial.TrialState.COMPLETE:
row = {
"trial_number": trial.number,
"sharpe_ratio": trial.values[0],
"directional_accuracy": trial.values[1],
**trial.params,
}
records.append(row)
return pd.DataFrame(records)