| |
|
|
| import os |
| import sys |
| import pandas as pd |
| import numpy as np |
| import optuna |
| from stable_baselines3 import SAC |
| from stable_baselines3.common.vec_env import DummyVecEnv |
| from stable_baselines3.common.callbacks import EvalCallback |
| from stable_baselines3.common.logger import configure |
|
|
| from environment import PortfolioEnv |
|
|
| |
| |
| |
|
|
| TRAIN_DATA_PATH = 'data/train.csv' |
| EVAL_DATA_PATH = 'data/eval.csv' |
| OPTUNA_LOG_DIR = 'optuna_logs' |
| CHECKPOINT_DIR = 'checkpoints/optuna_sac_trials' |
|
|
| |
| os.makedirs(OPTUNA_LOG_DIR, exist_ok=True) |
| os.makedirs(CHECKPOINT_DIR, exist_ok=True) |
|
|
| |
| df_full_train = pd.read_csv(TRAIN_DATA_PATH, index_col='Date', parse_dates=True) |
| df_eval = pd.read_csv(EVAL_DATA_PATH, index_col='Date', parse_dates=True) |
|
|
| |
| train_split_point = int(len(df_full_train) * 0.8) |
| df_train_tune = df_full_train.iloc[:train_split_point] |
| df_validation_tune = df_full_train.iloc[train_split_point:] |
|
|
| print(f"Total training data points: {len(df_full_train)}") |
| print(f"Optuna training data points: {len(df_train_tune)}") |
| print(f"Optuna validation data points: {len(df_validation_tune)}") |
|
|
|
|
| |
| |
| |
|
|
| def make_env(df, window_size=30, initial_balance=10000, transaction_cost_pct=0.001): |
| """ |
| Helper function to create a PortfolioEnv instance. |
| """ |
| def _init(): |
| env = PortfolioEnv( |
| df=df, |
| initial_balance=initial_balance, |
| window_size=window_size, |
| transaction_cost_pct=transaction_cost_pct |
| ) |
| return env |
| return _init |
|
|
| |
| |
| |
|
|
| def objective(trial: optuna.Trial) -> float: |
| """ |
| Objective function for Optuna to optimize hyperparameters for SAC. |
| """ |
| |
| learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True) |
| gamma = trial.suggest_float('gamma', 0.9, 0.999) |
| tau = trial.suggest_float('tau', 0.005, 0.02) |
| buffer_size = trial.suggest_int('buffer_size', 50000, 1000000, log=True) |
| batch_size = trial.suggest_categorical('batch_size', [64, 128, 256, 512]) |
| ent_coef = trial.suggest_float('ent_coef', 0.001, 0.1, log=True) |
|
|
| |
| n_layers = trial.suggest_int('n_layers', 1, 3) |
| net_arch = [] |
| for i in range(n_layers): |
| layer_size = trial.suggest_categorical(f'layer_size_{i}', [64, 128, 256]) |
| net_arch.append(layer_size) |
|
|
| policy_kwargs = dict(net_arch=net_arch) |
|
|
| |
| train_env = DummyVecEnv([make_env(df_train_tune)]) |
| eval_env = DummyVecEnv([make_env(df_validation_tune)]) |
|
|
| |
| trial_log_path = os.path.join(OPTUNA_LOG_DIR, f"trial_{trial.number}") |
| new_logger = configure(trial_log_path, ["stdout", "csv", "tensorboard"]) |
|
|
| |
| model = SAC( |
| "MlpPolicy", |
| train_env, |
| learning_rate=learning_rate, |
| gamma=gamma, |
| tau=tau, |
| buffer_size=buffer_size, |
| batch_size=batch_size, |
| ent_coef=ent_coef, |
| policy_kwargs=policy_kwargs, |
| verbose=0, |
| seed=42, |
| tensorboard_log=OPTUNA_LOG_DIR |
| ) |
| model.set_logger(new_logger) |
|
|
| |
| eval_callback = EvalCallback( |
| eval_env, |
| best_model_save_path=os.path.join(CHECKPOINT_DIR, f"best_sac_trial_{trial.number}"), |
| log_path=trial_log_path, |
| eval_freq=5000, |
| deterministic=True, |
| render=False, |
| n_eval_episodes=1 |
| ) |
|
|
| try: |
| |
| total_timesteps_per_trial = 50000 |
| model.learn(total_timesteps=total_timesteps_per_trial, callback=eval_callback, progress_bar=False) |
|
|
| |
| best_model_path = os.path.join(CHECKPOINT_DIR, f"best_sac_trial_{trial.number}", "best_model.zip") |
| if os.path.exists(best_model_path): |
| model = SAC.load(best_model_path, env=eval_env) |
| else: |
| print(f"Warning: No best model saved for trial {trial.number}, using last model.") |
|
|
| |
| obs = eval_env.reset() |
| portfolio_values = [eval_env.envs[0].initial_balance] |
| done = False |
| while not done: |
| action, _ = model.predict(obs, deterministic=True) |
| obs, reward, done, info = eval_env.step(action) |
| portfolio_values.append(info[0]['portfolio_value']) |
|
|
| final_portfolio_value = portfolio_values[-1] |
| initial_portfolio_value = portfolio_values[0] |
| total_return = (final_portfolio_value / initial_portfolio_value) - 1 |
|
|
| print(f"Trial {trial.number} finished. Total Return on validation: {total_return:.4f}") |
|
|
| except Exception as e: |
| print(f"Trial {trial.number} failed due to: {e}") |
| return float('nan') |
|
|
| finally: |
| train_env.close() |
| eval_env.close() |
|
|
| return total_return |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == '__main__': |
| study = optuna.create_study( |
| direction='maximize', |
| sampler=optuna.samplers.TPESampler(seed=42) |
| ) |
|
|
| n_trials_to_run = 50 |
| study.optimize(objective, n_trials=n_trials_to_run, n_jobs=1) |
|
|
| print("\n--- Optimization finished. ---") |
| print("Best trial:") |
| trial = study.best_trial |
|
|
| print(f" Value: {trial.value:.4f}") |
| print(" Params: ") |
| for key, value in trial.params.items(): |
| print(f" {key}: {value}") |
|
|
| |
| best_params = trial.params |
| with open('checkpoints/best_sac_params.txt', 'w') as f: |
| f.write(str(best_params)) |
| print(f"\n✅ Best parameters saved to checkpoints/best_sac_params.txt") |
|
|
| |
| try: |
| import plotly |
| from optuna.visualization import plot_optimization_history, plot_param_importances |
|
|
| fig1 = plot_optimization_history(study) |
| fig1.show() |
|
|
| fig2 = plot_param_importances(study) |
| fig2.show() |
| except ImportError: |
| print("\nInstall plotly and kaleido to visualize Optuna results: !pip install plotly kaleido") |