Premchan369 commited on
Commit
6dafcf9
·
verified ·
1 Parent(s): 24ff2ee

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +362 -0
main.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AlphaForge - Complete Quantitative Trading System
2
+
3
+ Usage:
4
+ python main.py --mode train --tickers SPY QQQ AAPL MSFT
5
+ python main.py --mode backtest --start 2020-01-01 --end 2024-01-01
6
+ python main.py --mode live --config config.yaml
7
+ """
8
+ import argparse
9
+ import numpy as np
10
+ import pandas as pd
11
+ import torch
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ from market_data import MarketDataPipeline
16
+ from alpha_model import AlphaEnsemble
17
+ from sentiment_model import SentimentAlphaModel
18
+ from volatility_model import VolatilityEngine
19
+ from portfolio_optimizer import PortfolioOptimizer
20
+ from options_pricer import MLOptionsPricer
21
+ from backtest_engine import BacktestEngine, compute_information_coefficient, RegimeDetector
22
+
23
+
24
+ def parse_args():
25
+ parser = argparse.ArgumentParser(description='AlphaForge Quant System')
26
+ parser.add_argument('--mode', type=str, default='train',
27
+ choices=['train', 'backtest', 'live', 'options'])
28
+ parser.add_argument('--tickers', type=str, nargs='+',
29
+ default=['SPY','QQQ','AAPL','MSFT','GOOGL','AMZN','META','NVDA','TSLA','JPM'])
30
+ parser.add_argument('--start', type=str, default='2020-01-01')
31
+ parser.add_argument('--end', type=str, default='2024-01-01')
32
+ parser.add_argument('--lookback', type=int, default=60)
33
+ parser.add_argument('--horizon', type=int, default=5)
34
+ parser.add_argument('--epochs', type=int, default=50)
35
+ parser.add_argument('--device', type=str, default='cpu')
36
+ parser.add_argument('--initial_capital', type=float, default=1_000_000)
37
+ parser.add_argument('--output', type=str, default='results/')
38
+ return parser.parse_args()
39
+
40
+
41
+ def train_alpha_model(args):
42
+ """Train the multi-asset alpha model"""
43
+ print("=" * 60)
44
+ print("ALPHA FORGE - Multi-Asset Alpha Model Training")
45
+ print("=" * 60)
46
+
47
+ # Fetch data
48
+ pipeline = MarketDataPipeline(args.tickers, args.start, args.end)
49
+ data = pipeline.fetch_data()
50
+
51
+ # Create features
52
+ features_df = pipeline.create_feature_matrix()
53
+ X, y, tickers, dates = pipeline.create_sequences(
54
+ features_df, lookback=args.lookback, forecast_horizon=args.horizon
55
+ )
56
+
57
+ print(f"\nDataset: {len(X)} samples, {X.shape[2]} features, seq_len={args.lookback}")
58
+
59
+ # Train/val/test split (time-based)
60
+ n = len(X)
61
+ train_end = int(n * 0.7)
62
+ val_end = int(n * 0.85)
63
+
64
+ X_train, y_train = X[:train_end], y[:train_end]
65
+ X_val, y_val = X[train_end:val_end], y[train_end:val_end]
66
+ X_test, y_test = X[val_end:], y[val_end:]
67
+
68
+ print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")
69
+
70
+ # Train ensemble
71
+ ensemble = AlphaEnsemble(
72
+ input_size=X.shape[2],
73
+ seq_len=args.lookback,
74
+ device=args.device
75
+ )
76
+
77
+ metrics = ensemble.fit(
78
+ X_train, y_train,
79
+ X_val, y_val,
80
+ epochs=args.epochs,
81
+ batch_size=64,
82
+ lr=1e-4
83
+ )
84
+
85
+ # Test predictions
86
+ test_pred = ensemble.predict(X_test)
87
+ test_ic = compute_information_coefficient(
88
+ pd.Series(test_pred),
89
+ pd.Series(y_test),
90
+ by_date=False
91
+ )
92
+
93
+ print(f"\nTest IC: {test_ic['mean_ic']:.4f}")
94
+ print(f"LSTM final val IC: {metrics['lstm']['val_ic'][-1]:.4f}")
95
+ print(f"Transformer final val IC: {metrics['transformer']['val_ic'][-1]:.4f}")
96
+
97
+ # Save model
98
+ torch.save(ensemble.lstm.state_dict(), f"{args.output}/lstm_model.pt")
99
+ torch.save(ensemble.transformer.state_dict(), f"{args.output}/transformer_model.pt")
100
+
101
+ return ensemble, metrics, test_ic
102
+
103
+
104
+ def run_backtest(args):
105
+ """Run full pipeline backtest"""
106
+ print("=" * 60)
107
+ print("ALPHA FORGE - Full Pipeline Backtest")
108
+ print("=" * 60)
109
+
110
+ # Fetch data
111
+ pipeline = MarketDataPipeline(args.tickers, args.start, args.end)
112
+ data = pipeline.fetch_data()
113
+ features_df = pipeline.create_feature_matrix()
114
+
115
+ X, y, tickers_arr, dates = pipeline.create_sequences(
116
+ features_df, lookback=args.lookback, forecast_horizon=args.horizon
117
+ )
118
+
119
+ # Split
120
+ n = len(X)
121
+ train_end = int(n * 0.7)
122
+ val_end = int(n * 0.85)
123
+
124
+ X_train, y_train = X[:train_end], y[:train_end]
125
+ X_test, y_test = X[val_end:], y[val_end:]
126
+ dates_test = dates[val_end:]
127
+ tickers_test = tickers_arr[val_end:]
128
+
129
+ # Train alpha model
130
+ print("\n[1/4] Training Alpha Model...")
131
+ ensemble = AlphaEnsemble(input_size=X.shape[2], seq_len=args.lookback, device=args.device)
132
+ ensemble.fit(X_train, y_train, epochs=30, batch_size=64, lr=1e-4)
133
+
134
+ # Generate predictions
135
+ alpha_pred = ensemble.predict(X_test)
136
+
137
+ # Build prediction DataFrame
138
+ pred_df = pd.DataFrame({
139
+ 'date': dates_test,
140
+ 'ticker': tickers_test,
141
+ 'predicted_return': alpha_pred,
142
+ 'actual_return': y_test
143
+ })
144
+
145
+ # Compute IC
146
+ ic_metrics = compute_information_coefficient(
147
+ pred_df['predicted_return'],
148
+ pred_df['actual_return'],
149
+ by_date=True
150
+ )
151
+ print(f"Mean IC: {ic_metrics['mean_ic']:.4f} +/- {ic_metrics['ic_std']:.4f}")
152
+ print(f"IC IR: {ic_metrics['ic_ir']:.4f}")
153
+
154
+ # Train volatility model
155
+ print("\n[2/4] Training Volatility Model...")
156
+ vol_engine = VolatilityEngine()
157
+
158
+ # Build returns matrix for covariance
159
+ returns_dict = {}
160
+ for ticker in args.tickers:
161
+ if ticker in data:
162
+ close = data[ticker]['Close'].values.flatten()
163
+ returns_dict[ticker] = pd.Series(
164
+ np.log(close[1:] / close[:-1]),
165
+ index=data[ticker].index[1:]
166
+ )
167
+ returns_df = pd.DataFrame(returns_dict).fillna(0)
168
+
169
+ # Fit GARCH for each ticker
170
+ for ticker in args.tickers:
171
+ if ticker in returns_df.columns:
172
+ vol_engine.fit_garch(returns_df[ticker], ticker)
173
+
174
+ # Portfolio optimization and backtest
175
+ print("\n[3/4] Running Portfolio Optimization...")
176
+
177
+ # Get unique test dates
178
+ test_dates = pd.to_datetime(pred_df['date'].unique())
179
+ test_dates = sorted(test_dates)
180
+
181
+ # Rebalance every 5 days
182
+ rebalance_dates = test_dates[::5]
183
+
184
+ optimizer = PortfolioOptimizer(
185
+ max_weight=0.25,
186
+ risk_aversion=2.0,
187
+ transaction_cost=0.0003,
188
+ turnover_penalty=0.001
189
+ )
190
+
191
+ weights_history = []
192
+
193
+ for rebalance_date in rebalance_dates:
194
+ # Get predictions for this date
195
+ day_preds = pred_df[pred_df['date'] == rebalance_date]
196
+
197
+ if len(day_preds) < 3:
198
+ continue
199
+
200
+ # Build mu vector
201
+ mu = day_preds.set_index('ticker')['predicted_return'].reindex(args.tickers).fillna(0).values
202
+
203
+ # Build covariance matrix
204
+ try:
205
+ Sigma = vol_engine.build_covariance_matrix(returns_df, rebalance_date)
206
+ Sigma = Sigma.reindex(index=args.tickers, columns=args.tickers).fillna(0)
207
+ Sigma = Sigma.values
208
+ except:
209
+ Sigma = np.eye(len(args.tickers)) * 0.04
210
+
211
+ # Optimize
212
+ result = optimizer.optimize_max_sharpe(mu, Sigma)
213
+
214
+ weights_row = pd.Series(result['weights'], index=args.tickers)
215
+ weights_row.name = rebalance_date
216
+ weights_history.append(weights_row)
217
+
218
+ weights_df = pd.DataFrame(weights_history)
219
+
220
+ # Build returns for backtest
221
+ backtest_returns = returns_df.reindex(weights_df.index).fillna(0)
222
+
223
+ # Run backtest
224
+ print("\n[4/4] Running Backtest...")
225
+ engine = BacktestEngine(
226
+ initial_capital=args.initial_capital,
227
+ transaction_cost=0.0003,
228
+ slippage=0.0001
229
+ )
230
+
231
+ metrics = engine.run_backtest(
232
+ backtest_returns,
233
+ weights_df,
234
+ rebalance_dates=weights_df.index
235
+ )
236
+
237
+ # Regime detection
238
+ if 'SPY' in returns_df.columns:
239
+ regime_detector = RegimeDetector()
240
+ spy_returns = returns_df['SPY'].reindex(weights_df.index).fillna(0)
241
+ regimes = regime_detector.detect_regimes(spy_returns)
242
+ regime_stats = regime_detector.get_regime_stats(spy_returns)
243
+ print("\nRegime Statistics:")
244
+ print(regime_stats.to_string())
245
+
246
+ # Print results
247
+ print("\n" + "=" * 60)
248
+ print("BACKTEST RESULTS")
249
+ print("=" * 60)
250
+ print(f"Total Return: {metrics['total_return']*100:.2f}%")
251
+ print(f"Annualized Return: {metrics['annualized_return']*100:.2f}%")
252
+ print(f"Volatility: {metrics['volatility']*100:.2f}%")
253
+ print(f"Sharpe Ratio: {metrics['sharpe_ratio']:.3f}")
254
+ print(f"Sortino Ratio: {metrics['sortino_ratio']:.3f}")
255
+ print(f"Max Drawdown: {metrics['max_drawdown']*100:.2f}%")
256
+ print(f"Calmar Ratio: {metrics['calmar_ratio']:.3f}")
257
+ print(f"Win Rate: {metrics['win_rate']*100:.1f}%")
258
+ print(f"Alpha: {metrics['alpha']*100:.2f}%")
259
+ print(f"Beta: {metrics['beta']:.3f}")
260
+ print(f"Information Ratio: {metrics['information_ratio']:.3f}")
261
+ print(f"Avg Turnover: {metrics['avg_turnover']*100:.2f}%")
262
+ print(f"Total Costs: ${metrics['total_transaction_costs']:,.2f}")
263
+ print(f"Final Capital: ${metrics['final_capital']:,.2f}")
264
+ print(f"Trades: {metrics['n_trades']}")
265
+
266
+ # Save results
267
+ import os
268
+ os.makedirs(args.output, exist_ok=True)
269
+
270
+ results = {
271
+ 'metrics': metrics,
272
+ 'ic_metrics': ic_metrics,
273
+ 'equity_curve': engine.get_equity_curve().to_dict(),
274
+ 'weights': weights_df.to_dict()
275
+ }
276
+
277
+ import json
278
+ with open(f"{args.output}/backtest_results.json", 'w') as f:
279
+ json.dump({k: v for k, v in results.items() if k != 'weights'}, f, indent=2, default=str)
280
+
281
+ weights_df.to_csv(f"{args.output}/weights_history.csv")
282
+
283
+ print(f"\nResults saved to {args.output}/")
284
+
285
+ return metrics, engine
286
+
287
+
288
+ def train_options_model(args):
289
+ """Train ML options pricing model"""
290
+ print("=" * 60)
291
+ print("ALPHA FORGE - Options Pricing Model")
292
+ print("=" * 60)
293
+
294
+ pricer = MLOptionsPricer(device=args.device)
295
+
296
+ # Generate synthetic training data
297
+ print("Generating synthetic option data...")
298
+ train_df = pricer.generate_synthetic_options(n_samples=50000)
299
+ val_df = pricer.generate_synthetic_options(n_samples=10000)
300
+
301
+ X_train = pricer.prepare_features(train_df)
302
+ y_train = train_df['price'].values
303
+ X_val = pricer.prepare_features(val_df)
304
+ y_val = val_df['price'].values
305
+
306
+ print(f"Training samples: {len(X_train)}, Validation: {len(X_val)}")
307
+
308
+ # Train
309
+ metrics = pricer.fit(X_train, y_train, X_val, y_val, epochs=100, batch_size=256)
310
+
311
+ # Test on a few examples
312
+ test_df = pricer.generate_synthetic_options(n_samples=5)
313
+ X_test = pricer.prepare_features(test_df)
314
+
315
+ ml_prices = pricer.predict(X_test)
316
+ bs_prices = []
317
+ for i in range(len(test_df)):
318
+ if test_df['option_type'].iloc[i] == 'call':
319
+ p = pricer.bs.call_price(
320
+ test_df['S'].iloc[i], test_df['K'].iloc[i],
321
+ test_df['T'].iloc[i], test_df['r'].iloc[i],
322
+ test_df['sigma_hist'].iloc[i]
323
+ )
324
+ else:
325
+ p = pricer.bs.put_price(
326
+ test_df['S'].iloc[i], test_df['K'].iloc[i],
327
+ test_df['T'].iloc[i], test_df['r'].iloc[i],
328
+ test_df['sigma_hist'].iloc[i]
329
+ )
330
+ bs_prices.append(p)
331
+
332
+ print("\nSample Predictions:")
333
+ print(f"{'True':>10} {'ML':>10} {'BS':>10} {'ML Err%':>10} {'BS Err%':>10}")
334
+ for i in range(len(test_df)):
335
+ true_p = test_df['price'].iloc[i]
336
+ ml_err = abs(ml_prices[i] - true_p) / true_p * 100
337
+ bs_err = abs(bs_prices[i] - true_p) / true_p * 100
338
+ print(f"{true_p:>10.2f} {ml_prices[i]:>10.2f} {bs_prices[i]:>10.2f} {ml_err:>10.2f} {bs_err:>10.2f}")
339
+
340
+ # Save
341
+ import os
342
+ os.makedirs(args.output, exist_ok=True)
343
+ torch.save(pricer.model.state_dict(), f"{args.output}/options_model.pt")
344
+
345
+ return pricer, metrics
346
+
347
+
348
+ def main():
349
+ args = parse_args()
350
+
351
+ if args.mode == 'train':
352
+ train_alpha_model(args)
353
+ elif args.mode == 'backtest':
354
+ run_backtest(args)
355
+ elif args.mode == 'options':
356
+ train_options_model(args)
357
+ else:
358
+ print("Live mode not implemented in this version")
359
+
360
+
361
+ if __name__ == '__main__':
362
+ main()