"""Multi-objective Optuna hyperparameter optimization for all models.""" import logging from typing import Callable, Optional import optuna import pandas as pd logger = logging.getLogger(__name__) class OptunaMultiObjectiveTuner: """Multi-objective Optuna optimization (Sharpe ratio + directional accuracy). Tunes 30+ hyperparameters across classical ML, time-series, GNN, ensemble, and data pipeline parameters. """ def __init__(self, storage: Optional[str] = None): self.storage = storage def create_study(self, study_name: str) -> optuna.Study: """Create a multi-objective study with MedianPruner.""" return optuna.create_study( study_name=study_name, storage=self.storage, directions=["maximize", "maximize"], # sharpe_ratio, directional_accuracy pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10), load_if_exists=True, ) @staticmethod def define_search_space(trial: optuna.Trial) -> dict: """Define hyperparameter search space for all models. Returns dict of 30+ parameters covering: xgboost, lightgbm, catboost, tft, nbeats, patchtst, gnn, meta-learner, sentiment, and data pipeline. """ params = { # === XGBoost (5 params) === "xgb_max_depth": trial.suggest_int("xgb_max_depth", 3, 10), "xgb_learning_rate": trial.suggest_float("xgb_lr", 0.01, 0.3, log=True), "xgb_n_estimators": trial.suggest_int("xgb_n_est", 100, 2000), "xgb_subsample": trial.suggest_float("xgb_subsample", 0.6, 1.0), "xgb_colsample_bytree": trial.suggest_float("xgb_colsample", 0.6, 1.0), # === LightGBM (5 params) === "lgbm_num_leaves": trial.suggest_int("lgbm_leaves", 20, 150), "lgbm_learning_rate": trial.suggest_float("lgbm_lr", 0.01, 0.3, log=True), "lgbm_n_estimators": trial.suggest_int("lgbm_n_est", 100, 2000), "lgbm_feature_fraction": trial.suggest_float("lgbm_feat_frac", 0.5, 1.0), "lgbm_bagging_fraction": trial.suggest_float("lgbm_bag_frac", 0.5, 1.0), # === CatBoost (3 params) === "catboost_depth": trial.suggest_int("cb_depth", 4, 10), "catboost_learning_rate": trial.suggest_float("cb_lr", 0.01, 0.3, log=True), "catboost_l2_leaf_reg": trial.suggest_float("cb_l2", 1.0, 10.0), # === TFT (3 params) === "tft_hidden_size": trial.suggest_categorical("tft_hidden", [64, 128, 256]), "tft_attention_heads": trial.suggest_categorical("tft_attn_heads", [1, 2, 4]), "tft_dropout": trial.suggest_float("tft_dropout", 0.1, 0.4), # === N-BEATS (3 params) === "nbeats_hidden_size": trial.suggest_categorical("nbeats_hidden", [64, 128, 256, 512]), "nbeats_n_blocks": trial.suggest_int("nbeats_blocks", 2, 5), "nbeats_max_epochs": trial.suggest_int("nbeats_epochs", 20, 50), # === PatchTST (4 params) === "patchtst_patch_len": trial.suggest_categorical("patchtst_patch", [16, 32, 64]), "patchtst_encoder_layers": trial.suggest_categorical("patchtst_enc_layers", [2, 4, 6]), "patchtst_n_heads": trial.suggest_categorical("patchtst_heads", [4, 8, 16]), "patchtst_dropout": trial.suggest_float("patchtst_dropout", 0.1, 0.3), # === GNN (2 params) === "gnn_hidden_channels": trial.suggest_categorical("gnn_hidden", [32, 64, 128]), "gnn_correlation_threshold": trial.suggest_float("gnn_corr_thresh", 0.3, 0.7), # === Ensemble meta-learner (2 params) === "meta_num_leaves": trial.suggest_int("meta_leaves", 10, 60), "meta_learning_rate": trial.suggest_float("meta_lr", 0.01, 0.2, log=True), # === Conformal prediction (1 param) === "conformal_alpha": trial.suggest_float("conformal_alpha", 0.05, 0.20), # === Sentiment / NLP (1 param) === "sentiment_decay_alpha": trial.suggest_float("sent_decay", 0.1, 0.5), # === Data pipeline (2 params) === "lookback_window": trial.suggest_int("lookback", 60, 200), "purge_days": trial.suggest_int("purge_days", 3, 10), } return params def objective( self, trial: optuna.Trial, X: pd.DataFrame, y: pd.DataFrame, train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]], ) -> tuple[float, float]: """Single trial: suggest params, train/evaluate, return objectives. Args: trial: Optuna trial object. X: Feature DataFrame. y: Target DataFrame. train_evaluate_fn: Callable that takes (params, X, y) and returns (sharpe_ratio, directional_accuracy). Returns: (sharpe_ratio, directional_accuracy) """ params = self.define_search_space(trial) sharpe, dir_acc = train_evaluate_fn(params, X, y) return sharpe, dir_acc def optimize( self, X: pd.DataFrame, y: pd.DataFrame, train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]], study_name: str = "ensemble_optimization", n_trials: int = 200, timeout: Optional[int] = None, ) -> optuna.Study: """Run full multi-objective optimization. Args: X: Feature DataFrame. y: Target DataFrame. train_evaluate_fn: Callable(params, X, y) -> (sharpe, dir_acc). study_name: Name for the Optuna study. n_trials: Maximum number of trials. timeout: Optional timeout in seconds. Returns: Completed Optuna study. """ study = self.create_study(study_name) def wrapped_objective(trial: optuna.Trial) -> tuple[float, float]: return self.objective(trial, X, y, train_evaluate_fn) study.optimize( wrapped_objective, n_trials=n_trials, timeout=timeout, show_progress_bar=False, ) logger.info( f"Optimization complete: {len(study.trials)} trials, " f"{len(study.best_trials)} Pareto-optimal" ) return study @staticmethod def get_best_params(study: optuna.Study) -> list[dict]: """Get Pareto-optimal parameter sets from a completed study.""" return [t.params for t in study.best_trials] @staticmethod def get_results_dataframe(study: optuna.Study) -> pd.DataFrame: """Convert study trials to a DataFrame for analysis.""" records = [] for trial in study.trials: if trial.state == optuna.trial.TrialState.COMPLETE: row = { "trial_number": trial.number, "sharpe_ratio": trial.values[0], "directional_accuracy": trial.values[1], **trial.params, } records.append(row) return pd.DataFrame(records)