Premchan369 commited on
Commit
60e7ce4
·
verified ·
1 Parent(s): 96e73d5

Add hyperparameter sweep infrastructure: grid, random, Latin Hypercube search

Browse files
Files changed (1) hide show
  1. hyperparameter_sweep.py +381 -0
hyperparameter_sweep.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hyperparameter Sweep Infrastructure
2
+
3
+ Grid/random search over key parameters with automatic evaluation.
4
+ No more hand-tuning one parameter at a time — let the machine find the best config.
5
+ """
6
+ import numpy as np
7
+ import pandas as pd
8
+ from itertools import product
9
+ from typing import Dict, List, Optional, Callable, Any, Tuple
10
+ import json
11
+ from dataclasses import dataclass
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+
16
+ @dataclass
17
+ class SweepConfig:
18
+ """Configuration for a hyperparameter sweep"""
19
+ param_grid: Dict[str, List[Any]]
20
+ metric: str = 'sharpe_ratio'
21
+ metric_direction: str = 'maximize'
22
+ n_trials: Optional[int] = None # For random search
23
+ random_seed: int = 42
24
+
25
+
26
+ def grid_search(param_grid: Dict[str, List[Any]]) -> List[Dict[str, Any]]:
27
+ """
28
+ Generate all combinations from parameter grid.
29
+
30
+ Example:
31
+ param_grid = {
32
+ 'learning_rate': [1e-4, 1e-3, 1e-2],
33
+ 'hidden_size': [64, 128, 256],
34
+ 'dropout': [0.1, 0.2, 0.3]
35
+ }
36
+ → 3 × 3 × 3 = 27 combinations
37
+
38
+ WARNING: Grid search is exponential in parameters.
39
+ Use random search for high-dimensional spaces.
40
+ """
41
+ keys = list(param_grid.keys())
42
+ values = list(param_grid.values())
43
+
44
+ combinations = []
45
+ for combo in product(*values):
46
+ combinations.append(dict(zip(keys, combo)))
47
+
48
+ return combinations
49
+
50
+
51
+ def random_search(param_grid: Dict[str, List[Any]],
52
+ n_trials: int,
53
+ random_seed: int = 42) -> List[Dict[str, Any]]:
54
+ """
55
+ Random search over parameter grid.
56
+
57
+ Often more efficient than grid search (Bergstra & Bengio, 2012):
58
+ Random search finds good hyperparameters faster than grid search
59
+ in high-dimensional spaces.
60
+ """
61
+ np.random.seed(random_seed)
62
+
63
+ combinations = []
64
+ for _ in range(n_trials):
65
+ config = {}
66
+ for key, values in param_grid.items():
67
+ config[key] = np.random.choice(values)
68
+ combinations.append(config)
69
+
70
+ return combinations
71
+
72
+
73
+ def latin_hypercube_sampling(param_ranges: Dict[str, Tuple[float, float]],
74
+ n_trials: int,
75
+ discrete_params: Optional[Dict[str, List]] = None,
76
+ random_seed: int = 42) -> List[Dict[str, Any]]:
77
+ """
78
+ Latin Hypercube Sampling for efficient space coverage.
79
+
80
+ Divides each dimension into n equal strata and samples once from each.
81
+ Ensures better coverage of the parameter space than random.
82
+
83
+ Args:
84
+ param_ranges: {param_name: (min, max)} for continuous params
85
+ n_trials: Number of samples
86
+ discrete_params: {param_name: [values]} for discrete params
87
+ """
88
+ np.random.seed(random_seed)
89
+
90
+ n_continuous = len(param_ranges)
91
+ n_total = n_continuous + (len(discrete_params) if discrete_params else 0)
92
+
93
+ # Generate LHS samples for continuous params
94
+ samples = np.zeros((n_trials, n_continuous))
95
+ for i in range(n_continuous):
96
+ # Divide [0,1] into n intervals
97
+ intervals = np.linspace(0, 1, n_trials + 1)
98
+ # Sample uniformly within each interval
99
+ points = intervals[:-1] + np.random.uniform(0, 1/n_trials, n_trials)
100
+ # Shuffle
101
+ np.random.shuffle(points)
102
+ samples[:, i] = points
103
+
104
+ # Convert to parameter values
105
+ combinations = []
106
+ param_names = list(param_ranges.keys())
107
+
108
+ for j in range(n_trials):
109
+ config = {}
110
+ for i, name in enumerate(param_names):
111
+ low, high = param_ranges[name]
112
+ config[name] = low + samples[j, i] * (high - low)
113
+
114
+ # Add discrete params
115
+ if discrete_params:
116
+ for name, values in discrete_params.items():
117
+ config[name] = np.random.choice(values)
118
+
119
+ combinations.append(config)
120
+
121
+ return combinations
122
+
123
+
124
+ class HyperparameterTuner:
125
+ """
126
+ Hyperparameter tuner with multiple search strategies.
127
+
128
+ Usage:
129
+ tuner = HyperparameterTuner(strategy='random')
130
+ best_config, results = tuner.search(
131
+ param_grid,
132
+ train_fn=train_and_evaluate,
133
+ n_trials=50
134
+ )
135
+ """
136
+
137
+ def __init__(self, strategy: str = 'random'):
138
+ self.strategy = strategy
139
+ self.results = []
140
+
141
+ def search(self,
142
+ param_grid: Dict[str, List[Any]],
143
+ train_fn: Callable[[Dict], Dict[str, float]],
144
+ n_trials: Optional[int] = None,
145
+ metric: str = 'sharpe_ratio',
146
+ direction: str = 'maximize',
147
+ verbose: bool = True) -> Tuple[Dict, pd.DataFrame]:
148
+ """
149
+ Run hyperparameter search.
150
+
151
+ Args:
152
+ param_grid: Parameter grid
153
+ train_fn: Function(params) -> dict of metrics
154
+ n_trials: Number of trials (for random/LHS)
155
+ metric: Metric to optimize
156
+ direction: 'maximize' or 'minimize'
157
+
158
+ Returns:
159
+ best_config: Best hyperparameter configuration
160
+ results_df: DataFrame of all trials
161
+ """
162
+ # Generate configurations
163
+ if self.strategy == 'grid':
164
+ configs = grid_search(param_grid)
165
+ elif self.strategy == 'random':
166
+ configs = random_search(param_grid, n_trials or 20)
167
+ elif self.strategy == 'lhs':
168
+ # Separate continuous and discrete
169
+ continuous = {k: v for k, v in param_grid.items()
170
+ if isinstance(v, tuple) and len(v) == 2}
171
+ discrete = {k: v for k, v in param_grid.items()
172
+ if k not in continuous}
173
+ configs = latin_hypercube_sampling(continuous, n_trials or 20, discrete)
174
+ else:
175
+ raise ValueError(f"Unknown strategy: {self.strategy}")
176
+
177
+ print(f"Running {len(configs)} trials with {self.strategy} search...")
178
+
179
+ # Evaluate each configuration
180
+ results = []
181
+ for i, config in enumerate(configs):
182
+ if verbose:
183
+ print(f"\nTrial {i+1}/{len(configs)}: {config}")
184
+
185
+ try:
186
+ metrics = train_fn(config)
187
+
188
+ result = {
189
+ 'trial': i,
190
+ 'status': 'success',
191
+ 'config': config,
192
+ **metrics
193
+ }
194
+
195
+ if verbose:
196
+ print(f" → {metric} = {metrics.get(metric, 'N/A')}")
197
+
198
+ except Exception as e:
199
+ result = {
200
+ 'trial': i,
201
+ 'status': 'failed',
202
+ 'error': str(e),
203
+ 'config': config
204
+ }
205
+ if verbose:
206
+ print(f" → FAILED: {e}")
207
+
208
+ results.append(result)
209
+
210
+ # Find best configuration
211
+ valid_results = [r for r in results if r.get('status') == 'success']
212
+
213
+ if not valid_results:
214
+ print("WARNING: All trials failed!")
215
+ return {}, pd.DataFrame(results)
216
+
217
+ if direction == 'maximize':
218
+ best_result = max(valid_results, key=lambda r: r.get(metric, -np.inf))
219
+ else:
220
+ best_result = min(valid_results, key=lambda r: r.get(metric, np.inf))
221
+
222
+ best_config = best_result['config']
223
+
224
+ # Create results DataFrame
225
+ results_df = pd.DataFrame(results)
226
+
227
+ # Flatten config columns
228
+ if 'config' in results_df.columns:
229
+ config_df = pd.json_normalize(results_df['config'].tolist())
230
+ config_df.columns = [f'param_{c}' for c in config_df.columns]
231
+ results_df = pd.concat([results_df.drop('config', axis=1), config_df], axis=1)
232
+
233
+ print(f"\n{'='*60}")
234
+ print(f"BEST CONFIGURATION:")
235
+ print(f" {metric}: {best_result.get(metric):.4f}")
236
+ for k, v in best_config.items():
237
+ print(f" {k}: {v}")
238
+ print(f"{'='*60}")
239
+
240
+ return best_config, results_df
241
+
242
+ def analyze_importance(self, results_df: pd.DataFrame,
243
+ metric: str) -> pd.DataFrame:
244
+ """
245
+ Analyze which hyperparameters matter most.
246
+
247
+ Uses correlation between each parameter and the metric.
248
+ """
249
+ param_cols = [c for c in results_df.columns if c.startswith('param_')]
250
+
251
+ if not param_cols:
252
+ return pd.DataFrame()
253
+
254
+ importance = []
255
+ for col in param_cols:
256
+ param_name = col.replace('param_', '')
257
+
258
+ # Calculate correlation with metric
259
+ valid = results_df.dropna(subset=[col, metric])
260
+ if len(valid) > 3:
261
+ corr = np.corrcoef(valid[col].values, valid[metric].values)[0, 1]
262
+ if not np.isnan(corr):
263
+ importance.append({
264
+ 'parameter': param_name,
265
+ 'correlation': corr,
266
+ 'abs_correlation': abs(corr),
267
+ 'importance_rank': abs(corr)
268
+ })
269
+
270
+ importance_df = pd.DataFrame(importance)
271
+ importance_df = importance_df.sort_values('abs_correlation', ascending=False)
272
+ importance_df['importance_rank'] = range(1, len(importance_df) + 1)
273
+
274
+ return importance_df
275
+
276
+
277
+ def create_alpha_model_sweep() -> Dict:
278
+ """
279
+ Pre-configured sweep for AlphaForge alpha model.
280
+
281
+ Key parameters to tune:
282
+ - lookback_window: How much history to use
283
+ - lstm_hidden_size: Model capacity
284
+ - lstm_layers: Depth
285
+ - dropout: Regularization
286
+ - learning_rate: Optimization
287
+ - ensemble_weights: How to combine models
288
+ """
289
+ return {
290
+ 'lookback_window': [30, 60, 90, 120],
291
+ 'lstm_hidden_size': [64, 128, 256],
292
+ 'lstm_num_layers': [1, 2, 3],
293
+ 'lstm_dropout': [0.1, 0.2, 0.3],
294
+ 'transformer_d_model': [64, 128],
295
+ 'transformer_nhead': [2, 4],
296
+ 'transformer_num_layers': [1, 2],
297
+ 'learning_rate': [1e-5, 5e-5, 1e-4, 5e-4],
298
+ 'batch_size': [32, 64, 128],
299
+ 'xgb_max_depth': [4, 6, 8],
300
+ 'xgb_n_estimators': [100, 200, 500],
301
+ 'ensemble_lstm_weight': [0.2, 0.3, 0.4],
302
+ 'ensemble_transformer_weight': [0.2, 0.3, 0.4],
303
+ 'ensemble_xgboost_weight': [0.2, 0.4, 0.5]
304
+ }
305
+
306
+
307
+ def create_portfolio_sweep() -> Dict:
308
+ """Pre-configured sweep for portfolio optimizer"""
309
+ return {
310
+ 'max_weight': [0.15, 0.20, 0.25, 0.30],
311
+ 'risk_aversion': [0.5, 1.0, 2.0, 3.0],
312
+ 'turnover_penalty': [0.0005, 0.001, 0.002],
313
+ 'rebalance_freq': [1, 3, 5, 10, 21],
314
+ 'risk_free_rate': [0.02, 0.03, 0.04, 0.05]
315
+ }
316
+
317
+
318
+ def create_mtl_sweep() -> Dict:
319
+ """Pre-configured sweep for Multi-Task Learning model"""
320
+ return {
321
+ 'hidden_dim': [64, 128, 256],
322
+ 'n_lstm_layers': [1, 2, 3],
323
+ 'dropout': [0.1, 0.15, 0.2, 0.3],
324
+ 'learning_rate': [1e-5, 5e-5, 1e-4],
325
+ 'weight_return': [0.5, 1.0, 2.0],
326
+ 'weight_volatility': [0.25, 0.5, 1.0],
327
+ 'weight_portfolio': [1.0, 2.0, 3.0],
328
+ 'weight_direction': [0.1, 0.3, 0.5],
329
+ 'max_grad_norm': [0.1, 0.5, 1.0]
330
+ }
331
+
332
+
333
+ def example_sweep():
334
+ """Example of running a hyperparameter sweep"""
335
+ # Define a simple objective function
336
+ def mock_train(config):
337
+ # Simulate training with different parameters
338
+ lr = config.get('learning_rate', 1e-4)
339
+ hidden = config.get('hidden_size', 128)
340
+ dropout = config.get('dropout', 0.2)
341
+
342
+ # Mock metric: Sharpe ratio (simulate a surface)
343
+ # Best around lr=5e-5, hidden=128, dropout=0.15
344
+ sharpe = 0.5 + np.exp(-((np.log10(lr) - (-4.3))**2) * 10) * 0.5
345
+ sharpe += np.exp(-((hidden - 128)**2) / 5000) * 0.3
346
+ sharpe += (0.2 - abs(dropout - 0.15)) * 0.2
347
+ sharpe += np.random.randn() * 0.1 # Noise
348
+
349
+ return {
350
+ 'sharpe_ratio': sharpe,
351
+ 'ic': sharpe * 0.3,
352
+ 'max_drawdown': -0.15 + np.random.rand() * 0.1
353
+ }
354
+
355
+ # Parameter grid
356
+ param_grid = {
357
+ 'learning_rate': [1e-5, 5e-5, 1e-4, 5e-4],
358
+ 'hidden_size': [64, 128, 256],
359
+ 'dropout': [0.1, 0.2, 0.3]
360
+ }
361
+
362
+ # Run random search
363
+ tuner = HyperparameterTuner(strategy='random')
364
+ best_config, results = tuner.search(
365
+ param_grid,
366
+ mock_train,
367
+ n_trials=20,
368
+ metric='sharpe_ratio',
369
+ direction='maximize'
370
+ )
371
+
372
+ # Analyze importance
373
+ importance = tuner.analyze_importance(results, 'sharpe_ratio')
374
+ print("\nParameter Importance:")
375
+ print(importance.to_string())
376
+
377
+ return best_config, results
378
+
379
+
380
+ if __name__ == '__main__':
381
+ best_config, results = example_sweep()