Spaces:
Sleeping
Sleeping
| """Multi-objective Optuna hyperparameter optimization for all models.""" | |
| import logging | |
| from typing import Callable, Optional | |
| import optuna | |
| import pandas as pd | |
| logger = logging.getLogger(__name__) | |
| class OptunaMultiObjectiveTuner: | |
| """Multi-objective Optuna optimization (Sharpe ratio + directional accuracy). | |
| Tunes 30+ hyperparameters across classical ML, time-series, GNN, ensemble, | |
| and data pipeline parameters. | |
| """ | |
| def __init__(self, storage: Optional[str] = None): | |
| self.storage = storage | |
| def create_study(self, study_name: str) -> optuna.Study: | |
| """Create a multi-objective study with MedianPruner.""" | |
| return optuna.create_study( | |
| study_name=study_name, | |
| storage=self.storage, | |
| directions=["maximize", "maximize"], # sharpe_ratio, directional_accuracy | |
| pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10), | |
| load_if_exists=True, | |
| ) | |
| def define_search_space(trial: optuna.Trial) -> dict: | |
| """Define hyperparameter search space for all models. | |
| Returns dict of 30+ parameters covering: xgboost, lightgbm, catboost, | |
| tft, nbeats, patchtst, gnn, meta-learner, sentiment, and data pipeline. | |
| """ | |
| params = { | |
| # === XGBoost (5 params) === | |
| "xgb_max_depth": trial.suggest_int("xgb_max_depth", 3, 10), | |
| "xgb_learning_rate": trial.suggest_float("xgb_lr", 0.01, 0.3, log=True), | |
| "xgb_n_estimators": trial.suggest_int("xgb_n_est", 100, 2000), | |
| "xgb_subsample": trial.suggest_float("xgb_subsample", 0.6, 1.0), | |
| "xgb_colsample_bytree": trial.suggest_float("xgb_colsample", 0.6, 1.0), | |
| # === LightGBM (5 params) === | |
| "lgbm_num_leaves": trial.suggest_int("lgbm_leaves", 20, 150), | |
| "lgbm_learning_rate": trial.suggest_float("lgbm_lr", 0.01, 0.3, log=True), | |
| "lgbm_n_estimators": trial.suggest_int("lgbm_n_est", 100, 2000), | |
| "lgbm_feature_fraction": trial.suggest_float("lgbm_feat_frac", 0.5, 1.0), | |
| "lgbm_bagging_fraction": trial.suggest_float("lgbm_bag_frac", 0.5, 1.0), | |
| # === CatBoost (3 params) === | |
| "catboost_depth": trial.suggest_int("cb_depth", 4, 10), | |
| "catboost_learning_rate": trial.suggest_float("cb_lr", 0.01, 0.3, log=True), | |
| "catboost_l2_leaf_reg": trial.suggest_float("cb_l2", 1.0, 10.0), | |
| # === TFT (3 params) === | |
| "tft_hidden_size": trial.suggest_categorical("tft_hidden", [64, 128, 256]), | |
| "tft_attention_heads": trial.suggest_categorical("tft_attn_heads", [1, 2, 4]), | |
| "tft_dropout": trial.suggest_float("tft_dropout", 0.1, 0.4), | |
| # === N-BEATS (3 params) === | |
| "nbeats_hidden_size": trial.suggest_categorical("nbeats_hidden", [64, 128, 256, 512]), | |
| "nbeats_n_blocks": trial.suggest_int("nbeats_blocks", 2, 5), | |
| "nbeats_max_epochs": trial.suggest_int("nbeats_epochs", 20, 50), | |
| # === PatchTST (4 params) === | |
| "patchtst_patch_len": trial.suggest_categorical("patchtst_patch", [16, 32, 64]), | |
| "patchtst_encoder_layers": trial.suggest_categorical("patchtst_enc_layers", [2, 4, 6]), | |
| "patchtst_n_heads": trial.suggest_categorical("patchtst_heads", [4, 8, 16]), | |
| "patchtst_dropout": trial.suggest_float("patchtst_dropout", 0.1, 0.3), | |
| # === GNN (2 params) === | |
| "gnn_hidden_channels": trial.suggest_categorical("gnn_hidden", [32, 64, 128]), | |
| "gnn_correlation_threshold": trial.suggest_float("gnn_corr_thresh", 0.3, 0.7), | |
| # === Ensemble meta-learner (2 params) === | |
| "meta_num_leaves": trial.suggest_int("meta_leaves", 10, 60), | |
| "meta_learning_rate": trial.suggest_float("meta_lr", 0.01, 0.2, log=True), | |
| # === Conformal prediction (1 param) === | |
| "conformal_alpha": trial.suggest_float("conformal_alpha", 0.05, 0.20), | |
| # === Sentiment / NLP (1 param) === | |
| "sentiment_decay_alpha": trial.suggest_float("sent_decay", 0.1, 0.5), | |
| # === Data pipeline (2 params) === | |
| "lookback_window": trial.suggest_int("lookback", 60, 200), | |
| "purge_days": trial.suggest_int("purge_days", 3, 10), | |
| } | |
| return params | |
| def objective( | |
| self, | |
| trial: optuna.Trial, | |
| X: pd.DataFrame, | |
| y: pd.DataFrame, | |
| train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]], | |
| ) -> tuple[float, float]: | |
| """Single trial: suggest params, train/evaluate, return objectives. | |
| Args: | |
| trial: Optuna trial object. | |
| X: Feature DataFrame. | |
| y: Target DataFrame. | |
| train_evaluate_fn: Callable that takes (params, X, y) and returns | |
| (sharpe_ratio, directional_accuracy). | |
| Returns: | |
| (sharpe_ratio, directional_accuracy) | |
| """ | |
| params = self.define_search_space(trial) | |
| sharpe, dir_acc = train_evaluate_fn(params, X, y) | |
| return sharpe, dir_acc | |
| def optimize( | |
| self, | |
| X: pd.DataFrame, | |
| y: pd.DataFrame, | |
| train_evaluate_fn: Callable[[dict, pd.DataFrame, pd.DataFrame], tuple[float, float]], | |
| study_name: str = "ensemble_optimization", | |
| n_trials: int = 200, | |
| timeout: Optional[int] = None, | |
| ) -> optuna.Study: | |
| """Run full multi-objective optimization. | |
| Args: | |
| X: Feature DataFrame. | |
| y: Target DataFrame. | |
| train_evaluate_fn: Callable(params, X, y) -> (sharpe, dir_acc). | |
| study_name: Name for the Optuna study. | |
| n_trials: Maximum number of trials. | |
| timeout: Optional timeout in seconds. | |
| Returns: | |
| Completed Optuna study. | |
| """ | |
| study = self.create_study(study_name) | |
| def wrapped_objective(trial: optuna.Trial) -> tuple[float, float]: | |
| return self.objective(trial, X, y, train_evaluate_fn) | |
| study.optimize( | |
| wrapped_objective, | |
| n_trials=n_trials, | |
| timeout=timeout, | |
| show_progress_bar=False, | |
| ) | |
| logger.info( | |
| f"Optimization complete: {len(study.trials)} trials, " | |
| f"{len(study.best_trials)} Pareto-optimal" | |
| ) | |
| return study | |
| def get_best_params(study: optuna.Study) -> list[dict]: | |
| """Get Pareto-optimal parameter sets from a completed study.""" | |
| return [t.params for t in study.best_trials] | |
| def get_results_dataframe(study: optuna.Study) -> pd.DataFrame: | |
| """Convert study trials to a DataFrame for analysis.""" | |
| records = [] | |
| for trial in study.trials: | |
| if trial.state == optuna.trial.TrialState.COMPLETE: | |
| row = { | |
| "trial_number": trial.number, | |
| "sharpe_ratio": trial.values[0], | |
| "directional_accuracy": trial.values[1], | |
| **trial.params, | |
| } | |
| records.append(row) | |
| return pd.DataFrame(records) | |