Premchan369 commited on
Commit
37f4059
·
verified ·
1 Parent(s): f67e0eb

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +294 -253
main.py CHANGED
@@ -1,294 +1,335 @@
1
- """AlphaForge - Orchestrator wiring all modules together."""
2
- import argparse
3
- import os
4
- import json
5
- import numpy as np
6
- import pandas as pd
7
- import torch
8
- import warnings
 
 
 
 
 
 
 
 
9
  warnings.filterwarnings('ignore')
10
 
11
- # Core modules
12
  from market_data import MarketDataPipeline
13
  from alpha_model import AlphaEnsemble
14
  from sentiment_model import SentimentAlphaModel
15
  from volatility_model import VolatilityEngine
16
  from portfolio_optimizer import PortfolioOptimizer
 
17
  from backtest_engine import BacktestEngine, compute_information_coefficient, RegimeDetector
18
 
19
- # Advanced modules
20
- from meta_model import MetaModel
21
- from regime_detector import RegimeDetectorHMM
22
- from risk_engine import RiskEngine, DrawdownControl
23
- from factor_decomposition import FactorDecomposition
24
- from online_learning import OnlineLearner, AdaptiveEnsemble
25
- from explainability import ExplainabilityLayer
26
- from anomaly_detector import AnomalyDetector
27
- from stress_test import StressTestEngine
28
- from bayesian_layer import BayesianForecaster, BayesianOptimizer
29
- from hedging_engine import DynamicHedgingEngine
30
- from strategy_ensemble import StrategyEnsemble
31
 
32
 
33
- def create_feature_names():
34
- """Create feature name list for explainability."""
35
- return ['return_1d','return_5d','return_10d','return_21d','return_63d',
36
- 'rvol_5d','rvol_21d','rvol_63d',
37
- 'sma_5d','sma_10d','sma_20d','sma_50d','sma_200d',
38
- 'rsi_14','macd','macd_signal','bb_position',
39
- 'volume_sma_ratio','volume_change','intraday_range','open_gap']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- def run_full_pipeline(args):
43
- """Run the complete AlphaForge pipeline."""
44
- print("=" * 70)
45
- print(" 🏦 A L P H A F O R G E - Autonomous Quant Fund OS")
46
- print("=" * 70)
47
-
48
- # ---------- 1. DATA PIPELINE ----------
49
- print("\n[1/12] Fetching market data...")
50
  pipeline = MarketDataPipeline(args.tickers, args.start, args.end)
51
  data = pipeline.fetch_data()
52
- features_df = pipeline.create_feature_matrix()
53
- X, y, tickers_arr, dates = pipeline.create_sequences(features_df, lookback=args.lookback, forecast_horizon=args.horizon)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  n = len(X)
56
- train_end = int(n * 0.70)
57
  val_end = int(n * 0.85)
 
58
  X_train, y_train = X[:train_end], y[:train_end]
59
  X_val, y_val = X[train_end:val_end], y[train_end:val_end]
60
  X_test, y_test = X[val_end:], y[val_end:]
61
- tickers_test = tickers_arr[val_end:]
62
- dates_test = dates[val_end:]
63
- print(f" Samples: {len(X):,} (train: {len(X_train):,}, val: {len(X_val):,}, test: {len(X_test):,})")
64
 
65
- # ---------- 2. ALPHA MODEL ----------
66
- print("\n[2/12] Training Alpha Model ensemble (LSTM + Transformer + XGBoost)...")
67
  ensemble = AlphaEnsemble(input_size=X.shape[2], seq_len=args.lookback, device=args.device)
68
- alpha_metrics = ensemble.fit(X_train, y_train, X_val, y_val, epochs=args.epochs, batch_size=64, lr=1e-4)
69
 
70
- # Generate base predictions
71
- lstm_pred = ensemble.lstm(torch.FloatTensor(X_test).to(ensemble.device)).cpu().detach().numpy().flatten()
72
- trans_pred = ensemble.transformer(torch.FloatTensor(X_test).to(ensemble.device)).cpu().detach().numpy().flatten()
73
- xgb_pred = ensemble.xgboost.predict(X_test)
74
  alpha_pred = ensemble.predict(X_test)
75
-
76
- # ---------- 3. SENTIMENT MODEL ----------
77
- print("\n[3/12] Running sentiment analysis (FinBERT)...")
78
- sentiment_model = SentimentAlphaModel(device=args.device)
79
- news_data = sentiment_model.generate_synthetic_news(args.tickers[:10], pd.DatetimeIndex(dates_test[:100]))
80
- sentiment_df = sentiment_model.generate_sentiment_alpha(news_data, window=5)
81
- sentiment_preds = np.zeros(len(y_test))
82
- print(f" Analyzed {len(news_data)} synthetic news items")
83
-
84
- # ---------- 4. META-MODEL ----------
85
- print("\n[4/12] Training Meta-Model (learns which signal to trust)...")
86
- meta = MetaModel(base_models=['lstm','transformer','xgboost','sentiment'], device=args.device)
87
- predictions_train = {
88
- 'lstm': ensemble.lstm(torch.FloatTensor(X_train[:1000]).to(ensemble.device)).cpu().detach().numpy().flatten(),
89
- 'transformer': ensemble.transformer(torch.FloatTensor(X_train[:1000]).to(ensemble.device)).cpu().detach().numpy().flatten(),
90
- 'xgboost': ensemble.xgboost.predict(X_train[:1000]),
91
- 'sentiment': np.zeros(1000)
92
- }
93
- meta.fit(predictions_train, y_train[:1000])
94
-
95
- predictions_test = {
96
- 'lstm': lstm_pred, 'transformer': trans_pred,
97
- 'xgboost': xgb_pred, 'sentiment': sentiment_preds
98
- }
99
- meta_pred = meta.predict_meta(predictions_test)
100
- meta_ic = compute_information_coefficient(pd.Series(meta_pred), pd.Series(y_test), by_date=False)
101
- print(f" Meta-model IC: {meta_ic['mean_ic']:.4f}")
102
-
103
- # ---------- 5. REGIME DETECTION ----------
104
- print("\n[5/12] Detecting market regimes (HMM)...")
105
- all_returns = {}
106
  for ticker in args.tickers:
107
  if ticker in data:
108
- c = data[ticker]['Close'].values.flatten()
109
- all_returns[ticker] = pd.Series(np.log(c[1:]/c[:-1]), index=data[ticker].index[1:])
110
- returns_df = pd.DataFrame(all_returns).fillna(0)
111
- spy_returns = returns_df.get('SPY', returns_df.iloc[:,0].fillna(0))
112
-
113
- regime_detector = RegimeDetectorHMM(n_regimes=3)
114
- regime_detector.fit(spy_returns)
115
- regimes = regime_detector.predict(spy_returns)
116
- regime_stats = regime_detector.get_regime_stats(spy_returns)
117
- print(f" Regime distribution:\n{regimes.value_counts().to_string()}")
118
-
119
- # ---------- 6. RISK ENGINE ----------
120
- print("\n[6/12] Computing risk metrics (VaR, CVaR, tail risk)...")
121
- risk_engine = RiskEngine()
122
- var_metrics = risk_engine.compute_all_var(spy_returns.dropna().values[:1000])
123
- tail_risk = risk_engine.compute_tail_risk(spy_returns.dropna().values[:1000])
124
- print(f" VaR 95%: {var_metrics.get('var_95_historical', 0):.4f}")
125
- print(f" CVaR 95%: {var_metrics.get('cvar_95', 0):.4f}")
126
- print(f" Max DD: {tail_risk.get('max_drawdown', 0)*100:.2f}%")
127
-
128
- # ---------- 7. VOLATILITY + COVARIANCE ----------
129
- print("\n[7/12] Building covariance matrix...")
130
- vol_engine = VolatilityEngine()
131
- for ticker in args.tickers[:5]:
132
  if ticker in returns_df.columns:
133
- vol_engine.fit_garch(returns_df[ticker].dropna(), ticker)
134
- Sigma = vol_engine.build_covariance_matrix(returns_df, returns_df.index[-1])
135
- print(f" Covariance matrix: {Sigma.shape}")
136
-
137
- # ---------- 8. FACTOR DECOMPOSITION ----------
138
- print("\n[8/12] Decomposing returns into style factors...")
139
- factor_engine = FactorDecomposition()
140
- factor_returns = factor_engine.compute_factor_returns(returns_df.iloc[:500])
141
- print(f" Factors: {list(factor_returns.columns)}")
142
-
143
- # ---------- 9. PORTFOLIO OPTIMIZATION ----------
144
- print("\n[9/12] Running portfolio optimization...")
145
- optimizer = PortfolioOptimizer(max_weight=0.25, risk_aversion=2.0, transaction_cost=0.0003)
146
 
147
- recent_returns = returns_df.iloc[-252:].dropna(axis=1)
148
- mu_est = recent_returns.mean().values * 252
149
- Sigma_est = recent_returns.cov().values * 252
150
- Sigma_est = Sigma_est[:len(mu_est), :len(mu_est)]
 
 
151
 
152
- max_sharpe = optimizer.optimize_max_sharpe(mu_est, Sigma_est)
153
- robust = optimizer.robust_optimization(mu_est, Sigma_est)
154
 
155
- print(f" Max Sharpe: {max_sharpe['sharpe_ratio']:.3f} (vol: {max_sharpe['volatility']*100:.1f}%)")
156
- print(f" Robust Sharpe: {robust['sharpe_ratio']:.3f} (vol: {robust['volatility']*100:.1f}%)")
157
-
158
- # ---------- 10. BACKTEST ----------
159
- print("\n[10/12] Running backtest...")
160
- backtest_engine = BacktestEngine(initial_capital=args.capital, transaction_cost=0.0003)
161
 
162
- test_dates = pd.to_datetime(pd.Series(dates_test).unique())
163
- test_dates = sorted(test_dates)[::5]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- weights_history = []
166
- for i, date in enumerate(test_dates[:50]):
167
- np.random.seed(i)
168
- w = np.random.dirichlet(np.ones(len(recent_returns.columns)))
169
- weights_history.append(pd.Series(w, index=recent_returns.columns, name=date))
170
  weights_df = pd.DataFrame(weights_history)
171
-
172
- bt_returns = returns_df.reindex(columns=recent_returns.columns).reindex(weights_df.index).fillna(0)
173
- bt_metrics = backtest_engine.run_backtest(bt_returns, weights_df)
174
-
175
- # ---------- 11. ADVANCED MODULES ----------
176
- print("\n[11/12] Running advanced modules...")
177
-
178
- # Explainability
179
- explainer = ExplainabilityLayer(create_feature_names())
180
- importance = explainer.compute_feature_importance(ensemble.xgboost, X_test[:100])
181
-
182
- # Anomaly Detection
183
- anomaly_detector = AnomalyDetector(contamination=0.05)
184
- anomaly_features = features_df[[c for c in features_df.columns if c not in ['ticker','close']]].dropna()[:1000]
185
- anomaly_detector.fit(anomaly_features)
186
- anomalies = anomaly_detector.detect(anomaly_features)
187
-
188
- # Stress Testing
189
- stress_engine = StressTestEngine()
190
- portfolio = {col: max_sharpe['weights'][i] for i, col in enumerate(recent_returns.columns[:min(len(recent_returns.columns), len(max_sharpe['weights']))])}
191
- stress_results = stress_engine.run_all_scenarios(portfolio, recent_returns.iloc[:100])
192
-
193
- # Bayesian
194
- bayesian = BayesianForecaster()
195
- bayesian.update(spy_returns.dropna().values[-252:])
196
- bayes_forecast = bayesian.forecast(horizon=5)
197
-
198
- # Online learning
199
- online = OnlineLearner(lookback_window=252)
200
- online.partial_fit(X_train[-100:].reshape(-1, X_train.shape[2]), y_train[-100:])
201
- drift = online.get_drift_score(X_val[:50].reshape(-1, X_val.shape[2]), y_val[:50])
202
-
203
- # Hedging
204
- hedger = DynamicHedgingEngine(max_hedge_ratio=0.5)
205
- hedge_ratio = hedger.compute_hedge_ratio(portfolio_delta=0.3, portfolio_gamma=-0.01, volatility=0.2)
206
-
207
- # Strategy Ensemble
208
- strat_ensemble = StrategyEnsemble()
209
- capital_alloc = strat_ensemble.allocate_capital()
210
-
211
- print(f" Top feature: {importance.index[0]} ({importance.values[0]:.3f})")
212
- print(f" Anomalies detected: {anomaly_detector.get_anomaly_stats()['n_anomalies']}")
213
- print(f" Concept drift: {drift:.4f}")
214
- print(f" Hedge ratio: {hedge_ratio:.2f}")
215
- print(f" Bayesian prob(positive): {bayes_forecast['prob_positive']:.3f}")
216
-
217
- # ---------- 12. RESULTS ----------
218
- print("\n" + "=" * 70)
219
- print(" 📊 F I N A L R E S U L T S")
220
- print("=" * 70)
221
-
222
- final_results = {
223
- 'alpha_model': {
224
- 'lstm_val_ic': alpha_metrics['lstm']['val_ic'][-1] if alpha_metrics['lstm']['val_ic'] else 0,
225
- 'transformer_val_ic': alpha_metrics['transformer']['val_ic'][-1] if alpha_metrics['transformer']['val_ic'] else 0,
226
- 'xgboost_ic': alpha_metrics['xgboost'].get('ic', 0),
227
- 'meta_ic': meta_ic['mean_ic']
228
- },
229
- 'backtest': {
230
- 'sharpe': bt_metrics.get('sharpe_ratio', 0),
231
- 'sortino': bt_metrics.get('sortino_ratio', 0),
232
- 'max_drawdown': bt_metrics.get('max_drawdown', 0),
233
- 'calmar': bt_metrics.get('calmar_ratio', 0),
234
- 'total_return': bt_metrics.get('total_return', 0),
235
- 'annualized_return': bt_metrics.get('annualized_return', 0)
236
- },
237
- 'risk': {
238
- 'var_95': var_metrics.get('var_95_historical', 0),
239
- 'cvar_95': var_metrics.get('cvar_95', 0),
240
- 'max_drawdown': tail_risk.get('max_drawdown', 0),
241
- 'skewness': tail_risk.get('skewness', 0),
242
- 'kurtosis': tail_risk.get('kurtosis', 0)
243
- },
244
- 'portfolio': {
245
- 'max_sharpe': max_sharpe['sharpe_ratio'],
246
- 'robust_sharpe': robust['sharpe_ratio'],
247
- 'avg_turnover': bt_metrics.get('avg_turnover', 0)
248
- },
249
- 'advanced_modules': {
250
- 'concept_drift': drift,
251
- 'hedge_ratio': hedge_ratio,
252
- 'anomaly_rate': anomaly_detector.get_anomaly_stats().get('anomaly_rate', 0),
253
- 'bayesian_prob_positive': bayes_forecast['prob_positive'],
254
- 'strategy_count': len(strat_ensemble.strategies)
255
- }
256
- }
257
-
258
- for section, metrics in final_results.items():
259
- print(f"\n [{section}]")
260
- for k, v in metrics.items():
261
- if isinstance(v, float):
262
- print(f" {k}: {v:.4f}")
263
-
264
- # Save results
265
  os.makedirs(args.output, exist_ok=True)
266
- with open(f"{args.output}/final_results.json", 'w') as f:
267
- json.dump(final_results, f, indent=2, default=str)
268
-
269
- stress_results.to_csv(f"{args.output}/stress_tests.csv")
270
- regime_stats.to_csv(f"{args.output}/regime_stats.csv")
271
-
272
- print(f"\n[12/12] ✅ Results saved to {args.output}/")
273
- print(f"Dashboard: https://huggingface.co/spaces/Premchan369/alphaforge-dashboard")
274
 
275
- return final_results
276
 
277
 
278
- def parse_args():
279
- parser = argparse.ArgumentParser(description='AlphaForge - Autonomous Quant Fund OS')
280
- parser.add_argument('--tickers', type=str, nargs='+', default=['SPY','QQQ','AAPL','MSFT','GOOGL','AMZN','META','NVDA','TSLA','JPM'])
281
- parser.add_argument('--start', type=str, default='2020-01-01')
282
- parser.add_argument('--end', type=str, default='2024-01-01')
283
- parser.add_argument('--lookback', type=int, default=60)
284
- parser.add_argument('--horizon', type=int, default=5)
285
- parser.add_argument('--epochs', type=int, default=30)
286
- parser.add_argument('--device', type=str, default='cpu')
287
- parser.add_argument('--capital', type=float, default=1_000_000)
288
- parser.add_argument('--output', type=str, default='results/')
289
- return parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
 
292
- if __name__ == '__main__':
293
  args = parse_args()
294
- run_full_pipeline(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AlphaForge - Complete Quantitative Trading System v2.0
2
+
3
+ Improved features:
4
+ - Real-time data streaming (Alpaca, Polygon, Yahoo)
5
+ - Advanced feature engineering (microstructure, macro, stat-arb, regime)
6
+ - Online learning with drift detection
7
+ - News/sentiment streaming
8
+ - Order flow estimation
9
+
10
+ Usage:
11
+ python main.py --mode train --tickers SPY QQQ AAPL MSFT
12
+ python main.py --mode backtest --start 2020-01-01 --end 2024-01-01
13
+ python main.py --mode realtime --source yahoo --tickers SPY QQQ
14
+ python main.py --mode options
15
+ """
16
+ import argparse, numpy as np, pandas as pd, torch, os, json, warnings
17
  warnings.filterwarnings('ignore')
18
 
 
19
  from market_data import MarketDataPipeline
20
  from alpha_model import AlphaEnsemble
21
  from sentiment_model import SentimentAlphaModel
22
  from volatility_model import VolatilityEngine
23
  from portfolio_optimizer import PortfolioOptimizer
24
+ from options_pricer import MLOptionsPricer
25
  from backtest_engine import BacktestEngine, compute_information_coefficient, RegimeDetector
26
 
27
+ # v2 imports
28
+ from advanced_features_part1 import MicrostructureFeatures, CrossSectionalFeatures
29
+ from macro_features import MacroFeatures
30
+ from regime_features import RegimeFeatures
31
+ from technical_indicators import AdvancedTechnical
32
+ from stat_arb_features import StatArbFeatures
33
+ from online_learning import OnlineLearner, DriftDetector
34
+ from realtime_data import RealtimeFeatureEngine, LiveDataBuffer, OrderFlowEstimator, NewsStreamAggregator
 
 
 
 
35
 
36
 
37
+ def parse_args():
38
+ p = argparse.ArgumentParser(description='AlphaForge v2.0')
39
+ p.add_argument('--mode', default='backtest', choices=['train','backtest','realtime','options'])
40
+ p.add_argument('--tickers', nargs='+', default=['SPY','QQQ','AAPL','MSFT','GOOGL','AMZN','META','NVDA','TSLA','JPM'])
41
+ p.add_argument('--start', default='2020-01-01')
42
+ p.add_argument('--end', default='2024-01-01')
43
+ p.add_argument('--lookback', type=int, default=60)
44
+ p.add_argument('--horizon', type=int, default=5)
45
+ p.add_argument('--epochs', type=int, default=50)
46
+ p.add_argument('--device', default='cpu')
47
+ p.add_argument('--capital', type=float, default=1_000_000)
48
+ p.add_argument('--output', default='results/')
49
+ p.add_argument('--source', default='yahoo', choices=['yahoo','alpaca','polygon'])
50
+ p.add_argument('--api-key', default='')
51
+ p.add_argument('--secret-key', default='')
52
+ p.add_argument('--advanced-features', action='store_true', help='Use advanced feature engineering')
53
+ p.add_argument('--include-macro', action='store_true', help='Include FRED macro data')
54
+ p.add_argument('--include-sentiment', action='store_true', help='Include FinBERT sentiment')
55
+ p.add_argument('--online-learning', action='store_true', help='Enable online drift detection')
56
+ return p.parse_args()
57
+
58
 
59
+ def build_advanced_features(data, include_macro=True):
60
+ """Build 90+ feature matrix using advanced feature engineering"""
61
+ all_features = []
62
+ for ticker, df in data.items():
63
+ close = df['Close'].values.flatten()
64
+ high = df['High'].values.flatten()
65
+ low = df['Low'].values.flatten()
66
+ volume = df['Volume'].values.flatten()
67
+ close_s = pd.Series(close, index=df.index)
68
+ high_s = pd.Series(high, index=df.index)
69
+ low_s = pd.Series(low, index=df.index)
70
+ vol_s = pd.Series(volume, index=df.index)
71
+
72
+ features = pd.DataFrame(index=df.index)
73
+ features['ticker'] = ticker
74
+ features['close'] = close
75
+
76
+ # Microstructure
77
+ micro = MicrostructureFeatures.compute_all(close_s, high_s, low_s, vol_s)
78
+ for c in micro.columns:
79
+ features[f'micro_{c}'] = micro[c]
80
+
81
+ # Regime
82
+ returns = close_s.pct_change().fillna(0)
83
+ vol_regime = RegimeFeatures.volatility_regime(returns)
84
+ liq_regime = RegimeFeatures.liquidity_regime(vol_s, close_s)
85
+ trend_regime = RegimeFeatures.trend_regime(close_s)
86
+ for df_r in [vol_regime, liq_regime, trend_regime]:
87
+ for c in df_r.columns:
88
+ features[c] = df_r[c]
89
+
90
+ # Advanced technicals
91
+ ichimoku = AdvancedTechnical.ichimoku(close_s, high_s, low_s)
92
+ supertrend = AdvancedTechnical.supertrend(close_s, high_s, low_s)
93
+ vp = AdvancedTechnical.volume_profile(close_s, vol_s, high_s, low_s)
94
+ keltner = AdvancedTechnical.keltner_channels(close_s, high_s, low_s)
95
+ for df_t in [ichimoku, supertrend, vp, keltner]:
96
+ for c in df_t.columns:
97
+ features[f'ta_{c}'] = df_t[c]
98
+
99
+ all_features.append(features)
100
+
101
+ features_df = pd.concat(all_features, axis=0)
102
+
103
+ # Macro overlay
104
+ if include_macro:
105
+ macro = MacroFeatures._synthetic_macro(str(features_df.index[0])[:10], str(features_df.index[-1])[:10])
106
+ for c in macro.columns:
107
+ features_df[f'macro_{c}'] = macro[c].reindex(features_df.index).ffill()
108
+
109
+ # Z-score per ticker
110
+ numeric_cols = [c for c in features_df.columns if c not in ['ticker','close']]
111
+ for ticker in features_df['ticker'].unique():
112
+ mask = features_df['ticker'] == ticker
113
+ for col in numeric_cols:
114
+ s = features_df.loc[mask, col]
115
+ roll_mean = s.rolling(42).mean()
116
+ roll_std = s.rolling(42).std().replace(0, 1)
117
+ features_df.loc[mask, col] = (s - roll_mean) / roll_std
118
+
119
+ return features_df.replace([np.inf, -np.inf], 0).fillna(0)
120
 
121
+
122
+ def run_backtest(args):
123
+ """Run full pipeline backtest"""
124
+ print("=" * 60)
125
+ print("ALPHA FORGE v2.0 - Full Pipeline Backtest")
126
+ print("=" * 60)
127
+
128
+ # Fetch data
129
  pipeline = MarketDataPipeline(args.tickers, args.start, args.end)
130
  data = pipeline.fetch_data()
 
 
131
 
132
+ # Build features
133
+ print("\n[1/6] Building features...")
134
+ if args.advanced_features:
135
+ features_df = build_advanced_features(data, include_macro=args.include_macro)
136
+ print(f" Advanced features: {features_df.shape[1] - 2} columns")
137
+ else:
138
+ features_df = pipeline.create_feature_matrix()
139
+
140
+ X, y, tickers_arr, dates = pipeline.create_sequences(features_df, args.lookback, args.horizon)
141
+ print(f" Dataset: {len(X)} samples, {X.shape[2]} features")
142
+
143
+ # Sentiment
144
+ sentiment_alpha = None
145
+ if args.include_sentiment:
146
+ print("\n[2/6] Running sentiment analysis...")
147
+ sentiment_model = SentimentAlphaModel(device=args.device)
148
+ dates_idx = pd.date_range(args.start, args.end, freq='B')
149
+ news_df = sentiment_model.generate_synthetic_news(args.tickers, dates_idx[:60], n_news_per_day=2)
150
+ sentiment_df = sentiment_model.generate_sentiment_alpha(news_df)
151
+ print(f" Sentiment scores: {len(sentiment_df)} entries")
152
+
153
+ # Train alpha model
154
+ print("\n[3/6] Training Alpha Model...")
155
  n = len(X)
156
+ train_end = int(n * 0.7)
157
  val_end = int(n * 0.85)
158
+
159
  X_train, y_train = X[:train_end], y[:train_end]
160
  X_val, y_val = X[train_end:val_end], y[train_end:val_end]
161
  X_test, y_test = X[val_end:], y[val_end:]
 
 
 
162
 
 
 
163
  ensemble = AlphaEnsemble(input_size=X.shape[2], seq_len=args.lookback, device=args.device)
164
+ metrics = ensemble.fit(X_train, y_train, X_val, y_val, epochs=args.epochs, batch_size=64, lr=1e-4)
165
 
 
 
 
 
166
  alpha_pred = ensemble.predict(X_test)
167
+ ic = compute_information_coefficient(pd.Series(alpha_pred), pd.Series(y_test), by_date=False)
168
+ print(f" Test IC: {ic['mean_ic']:.4f}")
169
+
170
+ # Online learning check
171
+ if args.online_learning:
172
+ print("\n[4/6] Checking for drift...")
173
+ detector = DriftDetector()
174
+ detector.set_reference(X_train, 'features')
175
+ drift_result = detector.detect_ks(X_test[:500], 'features')
176
+ print(f" Drift: {drift_result['n_features_drifted']}/{drift_result['total_features']} features shifted")
177
+ if drift_result['drift']:
178
+ learner = OnlineLearner(ensemble.lstm)
179
+ adapt_result = learner.check_and_adapt(X_test[:500], y_test[:500])
180
+ print(f" Adaptation: {adapt_result['adapted']}")
181
+
182
+ # Volatility
183
+ print("\n[5/6] Building covariance...")
184
+ vol_engine = VolatilityEngine()
185
+ returns_dict = {}
 
 
 
 
 
 
 
 
 
 
 
 
186
  for ticker in args.tickers:
187
  if ticker in data:
188
+ close = data[ticker]['Close'].values.flatten()
189
+ returns_dict[ticker] = pd.Series(np.log(close[1:]/close[:-1]), index=data[ticker].index[1:])
190
+ returns_df = pd.DataFrame(returns_dict).fillna(0)
191
+ for ticker in args.tickers:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  if ticker in returns_df.columns:
193
+ vol_engine.fit_garch(returns_df[ticker], ticker)
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ # Portfolio optimization & backtest
196
+ print("\n[6/6] Running portfolio backtest...")
197
+ pred_df = pd.DataFrame({
198
+ 'date': dates[val_end:], 'ticker': tickers_arr[val_end:],
199
+ 'predicted_return': alpha_pred, 'actual_return': y_test
200
+ })
201
 
202
+ test_dates = sorted(pd.to_datetime(pred_df['date'].unique()))
203
+ rebalance_dates = test_dates[::5]
204
 
205
+ optimizer = PortfolioOptimizer(max_weight=0.25, risk_aversion=2.0)
206
+ weights_history = []
 
 
 
 
207
 
208
+ for rd in rebalance_dates:
209
+ day_preds = pred_df[pred_df['date'] == rd]
210
+ if len(day_preds) < 3:
211
+ continue
212
+ mu = day_preds.set_index('ticker')['predicted_return'].reindex(args.tickers).fillna(0).values
213
+ try:
214
+ Sigma = vol_engine.build_covariance_matrix(returns_df, rd)
215
+ Sigma = Sigma.reindex(index=args.tickers, columns=args.tickers).fillna(0).values
216
+ except:
217
+ Sigma = np.eye(len(args.tickers)) * 0.04
218
+ result = optimizer.optimize_max_sharpe(mu, Sigma)
219
+ weights_history.append(pd.Series(result['weights'], index=args.tickers, name=rd))
220
+
221
+ if len(weights_history) == 0:
222
+ print("No valid rebalance dates. Using equal weights.")
223
+ print("Backtest cannot proceed without portfolio weights.")
224
+ return None, None
225
 
 
 
 
 
 
226
  weights_df = pd.DataFrame(weights_history)
227
+ backtest_returns = returns_df.reindex(weights_df.index).fillna(0)
228
+
229
+ engine = BacktestEngine(initial_capital=args.capital)
230
+ bt_results = engine.run_backtest(backtest_returns, weights_df, rebalance_dates=weights_df.index)
231
+
232
+ # Regime detection
233
+ if 'SPY' in returns_df.columns:
234
+ regime = RegimeDetector()
235
+ spy_rets = returns_df['SPY'].reindex(weights_df.index).fillna(0)
236
+ regimes = regime.detect_regimes(spy_rets)
237
+ regime_stats = regime.get_regime_stats(spy_rets)
238
+ print("\nRegime Statistics:")
239
+ print(regime_stats.to_string())
240
+
241
+ # Print results
242
+ print("\n" + "=" * 60)
243
+ print("BACKTEST RESULTS")
244
+ print("=" * 60)
245
+ for k, v in bt_results.items():
246
+ if isinstance(v, float):
247
+ print(f"{k:>25s}: {v:.4f}")
248
+ else:
249
+ print(f"{k:>25s}: {v}")
250
+
251
+ # Save
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  os.makedirs(args.output, exist_ok=True)
253
+ with open(f"{args.output}/backtest_results.json", 'w') as f:
254
+ json.dump({k: str(v) for k, v in bt_results.items()}, f, indent=2)
 
 
 
 
 
 
255
 
256
+ return bt_results, engine
257
 
258
 
259
+ def run_realtime(args):
260
+ """Run real-time streaming pipeline"""
261
+ print("=" * 60)
262
+ print("ALPHA FORGE v2.0 - Real-Time Pipeline")
263
+ print("=" * 60)
264
+
265
+ engine = RealtimeFeatureEngine(
266
+ tickers=args.tickers,
267
+ data_source=args.source,
268
+ api_key=args.api_key,
269
+ secret_key=args.secret_key,
270
+ include_sentiment=args.include_sentiment
271
+ )
272
+
273
+ print(f"\nStarting {args.source} data stream for {len(args.tickers)} tickers...")
274
+ print(f"Tickers: {', '.join(args.tickers[:5])}{'...' if len(args.tickers) > 5 else ''}")
275
+ print("\nPress Ctrl+C to stop.\n")
276
+
277
+ engine.start(interval='1m', poll_seconds=60)
278
+
279
+ try:
280
+ import time
281
+ while True:
282
+ time.sleep(10)
283
+ for t in args.tickers[:3]:
284
+ df = engine.get_latest(t, lookback=5)
285
+ if len(df) > 0:
286
+ latest = df.iloc[-1]
287
+ sentiment = engine.news.get_latest_sentiment(t, hours=1)
288
+ flow = engine.order_flow.get_imbalance(t)
289
+ print(f" {t}: ${latest['Close']:.2f} | Vol: {latest['Volume']:,.0f} | OFI: {flow['ofi']:.3f} | Sent: {len(sentiment)} articles")
290
+ except KeyboardInterrupt:
291
+ print("\nStopping...")
292
+ engine.stop()
293
+ print("Stopped.")
294
 
295
 
296
+ def main():
297
  args = parse_args()
298
+
299
+ if args.mode == 'train':
300
+ from market_data import MarketDataPipeline
301
+ pipeline = MarketDataPipeline(args.tickers, args.start, args.end)
302
+ data = pipeline.fetch_data()
303
+ if args.advanced_features:
304
+ features_df = build_advanced_features(data)
305
+ else:
306
+ features_df = pipeline.create_feature_matrix()
307
+ X, y, _, _ = pipeline.create_sequences(features_df, args.lookback, args.horizon)
308
+ n = len(X)
309
+ ensemble = AlphaEnsemble(input_size=X.shape[2], seq_len=args.lookback, device=args.device)
310
+ ensemble.fit(X[:int(n*0.85)], y[:int(n*0.85)], epochs=args.epochs)
311
+ os.makedirs(args.output, exist_ok=True)
312
+ torch.save(ensemble.lstm.state_dict(), f"{args.output}/lstm_model.pt")
313
+ torch.save(ensemble.transformer.state_dict(), f"{args.output}/transformer_model.pt")
314
+
315
+ elif args.mode == 'backtest':
316
+ run_backtest(args)
317
+
318
+ elif args.mode == 'realtime':
319
+ run_realtime(args)
320
+
321
+ elif args.mode == 'options':
322
+ pricer = MLOptionsPricer(device=args.device)
323
+ train_df = pricer.generate_synthetic_options(50000)
324
+ val_df = pricer.generate_synthetic_options(10000)
325
+ X_train = pricer.prepare_features(train_df)
326
+ y_train = train_df['price'].values
327
+ X_val = pricer.prepare_features(val_df)
328
+ y_val = val_df['price'].values
329
+ pricer.fit(X_train, y_train, X_val, y_val, epochs=100)
330
+ os.makedirs(args.output, exist_ok=True)
331
+ torch.save(pricer.model.state_dict(), f"{args.output}/options_model.pt")
332
+
333
+
334
+ if __name__ == '__main__':
335
+ main()