saadrizvi09 commited on
Commit
5bf05bb
·
verified ·
1 Parent(s): b4f22a3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +355 -0
src/streamlit_app.py CHANGED
@@ -8,6 +8,16 @@ from sklearn.preprocessing import StandardScaler
8
  import plotly.graph_objects as go
9
  import plotly.express as px
10
  from datetime import datetime, timedelta
 
 
 
 
 
 
 
 
 
 
11
 
12
  # --- Config ---
13
  st.set_page_config(page_title="Hybrid HMM-SVR Strategy Backtester", layout="wide")
@@ -178,6 +188,351 @@ st.markdown("""
178
  * *If SVR predicts lower risk -> Increase Position Size.*
179
  """)
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # Sidebar Inputs
182
  with st.sidebar:
183
  st.header("Settings")
 
8
  import plotly.graph_objects as go
9
  import plotly.express as px
10
  from datetime import datetime, timedelta
11
+ import streamlit as st
12
+ import yfinance as yf
13
+ import pandas as pd
14
+ import numpy as np
15
+ from hmmlearn.hmm import GaussianHMM
16
+ from sklearn.svm import SVR
17
+ from sklearn.preprocessing import StandardScaler
18
+ import plotly.graph_objects as go
19
+ import plotly.express as px
20
+ from datetime import datetime, timedelta
21
 
22
  # --- Config ---
23
  st.set_page_config(page_title="Hybrid HMM-SVR Strategy Backtester", layout="wide")
 
188
  * *If SVR predicts lower risk -> Increase Position Size.*
189
  """)
190
 
191
+ # Sidebar Inputs
192
+ with st.sidebar:
193
+ st.header("Settings")
194
+
195
+ ticker = st.selectbox(
196
+ "Ticker",
197
+ ["BTC-USD", "BNB-USD", "SOL-USD"]
198
+ )
199
+
200
+ backtest_start = st.date_input("Backtest Start Date", datetime.now() - timedelta(days=1425))
201
+ backtest_end = st.date_input("Backtest End Date", datetime.now())
202
+
203
+ st.caption("Note: Models will automatically train on the **4 years** of data prior to your selected Start Date.")
204
+
205
+ st.divider()
206
+ short_window = st.number_input("Fast EMA", 12)
207
+ long_window = st.number_input("Slow EMA", 26)
208
+ n_states = st.slider("HMM States", 2, 4, 3)
209
+
210
+ if st.button("Run Hybrid Backtest"):
211
+ train_start_date = pd.Timestamp(backtest_start) - pd.DateOffset(years=4)
212
+
213
+ df = fetch_data(ticker, train_start_date, backtest_end)
214
+
215
+ if df is None or len(df) < 200:
216
+ st.error(f"Not enough data found for {ticker}. Ensure the ticker existed 4 years prior to {backtest_start}.")
217
+ else:
218
+ # 1. Feature Engineering
219
+ df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
220
+ df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
221
+
222
+ df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
223
+ df['Downside_Vol'] = df['Downside_Returns'].rolling(window=10).std()
224
+
225
+ df['EMA_Short'] = df['Close'].ewm(span=short_window, adjust=False).mean()
226
+ df['EMA_Long'] = df['Close'].ewm(span=long_window, adjust=False).mean()
227
+
228
+ df['Target_Next_Vol'] = df['Volatility'].shift(-1)
229
+
230
+ df = df.dropna()
231
+
232
+ # 2. Split Data
233
+ train_df = df[df.index < pd.Timestamp(backtest_start)].copy()
234
+ test_df = df[df.index >= pd.Timestamp(backtest_start)].copy()
235
+
236
+ if len(train_df) < 365:
237
+ st.warning(f"Warning: Only {len(train_df)} days found for training. HMM performs best with >2 years of data.")
238
+
239
+ if len(test_df) < 10:
240
+ st.error("Not enough data for backtesting range.")
241
+ else:
242
+ st.info(f"Training on {len(train_df)} days ({train_df.index[0].date()} to {train_df.index[-1].date()}). Backtesting on {len(test_df)} days.")
243
+
244
+ with st.spinner("Training HMM (Regime Detection)..."):
245
+ hmm_model, state_map = train_hmm_model(train_df, n_states)
246
+
247
+ X_train_hmm = train_df[['Log_Returns', 'Volatility']].values * 100
248
+ train_raw_states = hmm_model.predict(X_train_hmm)
249
+ train_df['Regime'] = [state_map.get(s, s) for s in train_raw_states]
250
+
251
+ with st.spinner("Training SVR (Volatility Forecasting)..."):
252
+ svr_model, svr_scaler = train_svr_model(train_df)
253
+
254
+ with st.spinner("Running Backtest Loop..."):
255
+ # --- OUT OF SAMPLE BACKTEST ---
256
+
257
+ X_test_hmm = test_df[['Log_Returns', 'Volatility']].values * 100
258
+ test_raw_states = hmm_model.predict(X_test_hmm)
259
+ test_df['Regime'] = [state_map.get(s, s) for s in test_raw_states]
260
+
261
+ X_test_svr = test_df[['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']].values
262
+ X_test_svr_scaled = svr_scaler.transform(X_test_svr)
263
+ test_df['Predicted_Vol'] = svr_model.predict(X_test_svr_scaled)
264
+
265
+ high_vol_state = n_states - 1
266
+
267
+ test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
268
+
269
+ avg_train_vol = train_df['Volatility'].mean()
270
+
271
+ test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol
272
+ test_df['Position_Size'] = (1.0 / test_df['Risk_Ratio']).clip(upper=1.0, lower=0.0)
273
+
274
+ test_df['Position_Size'] = np.where(
275
+ test_df['Regime'] == high_vol_state ,
276
+ 0.0,
277
+ test_df['Position_Size']
278
+ )
279
+
280
+ test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
281
+
282
+ test_df['Simple_Returns'] = test_df['Close'].pct_change()
283
+ test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Simple_Returns']
284
+ test_df['Buy_Hold_Returns'] = test_df['Simple_Returns']
285
+
286
+ test_df['Strategy_Value'] = (1 + test_df['Strategy_Returns'].fillna(0)).cumprod()
287
+ test_df['Buy_Hold_Value'] = (1 + test_df['Buy_Hold_Returns'].fillna(0)).cumprod()
288
+
289
+ test_df.dropna(inplace=True)
290
+
291
+ # --- EXTRACT TRADES ---
292
+ trade_log = generate_trade_log(test_df)
293
+
294
+ # --- RESULTS ---
295
+
296
+ metrics_df = calculate_metrics(test_df)
297
+ st.subheader("Performance Metrics")
298
+ st.table(metrics_df)
299
+
300
+ # Charts
301
+ col1, col2 = st.columns([2, 1])
302
+
303
+ with col1:
304
+ st.subheader("Equity Curve & Trade Executions")
305
+ fig = go.Figure()
306
+
307
+ # 1. Equity Curves
308
+ fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Buy_Hold_Value'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
309
+ fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Strategy_Value'], name='Hybrid Strategy', line=dict(color='#00CC96', width=2)))
310
+
311
+ # 2. Add Trade Markers
312
+ # Filter Entry Points (Buy)
313
+ if not trade_log.empty:
314
+ # Map dates to Strategy Value for Y-axis placement
315
+ buy_points = trade_log.set_index('Entry Date')
316
+ buy_vals = test_df.loc[buy_points.index]['Strategy_Value']
317
+
318
+ sell_points = trade_log.set_index('Exit Date')
319
+ sell_vals = test_df.loc[sell_points.index]['Strategy_Value']
320
+
321
+ fig.add_trace(go.Scatter(
322
+ x=buy_points.index,
323
+ y=buy_vals,
324
+ mode='markers',
325
+ name='Buy Signal',
326
+ marker=dict(symbol='triangle-up', size=10, color='lime')
327
+ ))
328
+
329
+ fig.add_trace(go.Scatter(
330
+ x=sell_points.index,
331
+ y=sell_vals,
332
+ mode='markers',
333
+ name='Sell Signal',
334
+ marker=dict(symbol='triangle-down', size=10, color='red')
335
+ ))
336
+
337
+ st.plotly_chart(fig, use_container_width=True)
338
+
339
+ with col2:
340
+ st.subheader("Position Sizing (SVR Effect)")
341
+ st.caption("How SVR adjusted trade size over time (0.0 to 1.0)")
342
+ fig_size = px.area(test_df, x=test_df.index, y='Position_Size', title="Dynamic Exposure")
343
+ st.plotly_chart(fig_size, use_container_width=True)
344
+
345
+ # --- NEW: Trade Log Table ---
346
+ st.divider()
347
+ st.subheader("📝 Detailed Trade Log")
348
+ if not trade_log.empty:
349
+ # Formatting for cleaner display
350
+ display_log = trade_log.copy()
351
+ display_log['Entry Date'] = display_log['Entry Date'].dt.date
352
+ display_log['Exit Date'] = display_log['Exit Date'].dt.date
353
+ display_log['Trade PnL'] = display_log['Trade PnL'].map('{:.2%}'.format)
354
+ display_log['Entry Price (Approx)'] = display_log['Entry Price (Approx)'].map('{:.2f}'.format)
355
+ display_log['Exit Price'] = display_log['Exit Price'].map('{:.2f}'.format)
356
+
357
+ st.dataframe(display_log, use_container_width=True)
358
+ else:
359
+ st.write("No trades executed in this period.")
360
+
361
+ st.subheader("SVR Prediction Accuracy (Test Set)")
362
+ fig_svr = go.Figure()
363
+ slice_df = test_df.iloc[-100:]
364
+ fig_svr.add_trace(go.Scatter(x=slice_df.index, y=slice_df['Target_Next_Vol'], name='Actual Volatility'))
365
+ fig_svr.add_trace(go.Scatter(x=slice_df.index, y=slice_df['Predicted_Vol'], name='SVR Prediction', line=dict(dash='dot')))
366
+ st.plotly_chart(fig_svr, use_container_width=True)
367
+ # --- Config ---
368
+ st.set_page_config(page_title="Hybrid HMM-SVR Strategy Backtester", layout="wide")
369
+
370
+ # --- Helper Functions ---
371
+
372
+ @st.cache_data(ttl=3600)
373
+ def fetch_data(ticker, start_date, end_date):
374
+ """
375
+ Robust data fetching with caching, error handling, and string conversion.
376
+ """
377
+ ticker = ticker.strip().upper()
378
+
379
+ if isinstance(start_date, (datetime, pd.Timestamp)):
380
+ start_date = start_date.strftime('%Y-%m-%d')
381
+ if isinstance(end_date, (datetime, pd.Timestamp)):
382
+ end_date = end_date.strftime('%Y-%m-%d')
383
+
384
+ try:
385
+ df = yf.download(ticker, start=start_date, end=end_date, progress=False)
386
+
387
+ if df.empty:
388
+ return None
389
+
390
+ if isinstance(df.columns, pd.MultiIndex):
391
+ df.columns = df.columns.get_level_values(0)
392
+
393
+ df = df.dropna(how='all')
394
+
395
+ if len(df) < 10:
396
+ return None
397
+
398
+ return df
399
+
400
+ except Exception as e:
401
+ print(f"Error fetching data: {e}")
402
+ return None
403
+
404
+ def calculate_metrics(df, strategy_col='Strategy_Value', benchmark_col='Buy_Hold_Value'):
405
+ """Calculates CAG, Sharpe, Drawdown, etc."""
406
+ stats = {}
407
+
408
+ for col, name in [(strategy_col, 'Hybrid Strategy'), (benchmark_col, 'Buy & Hold')]:
409
+ initial = df[col].iloc[0]
410
+ final = df[col].iloc[-1]
411
+ total_return = (final - initial) / initial
412
+
413
+ daily_ret = df[col].pct_change().dropna()
414
+
415
+ sharpe = (daily_ret.mean() / daily_ret.std()) * np.sqrt(365) if daily_ret.std() != 0 else 0
416
+
417
+ rolling_max = df[col].cummax()
418
+ drawdown = (df[col] - rolling_max) / rolling_max
419
+ max_drawdown = drawdown.min()
420
+
421
+ stats[name] = {
422
+ "Total Return": f"{total_return:.2%}",
423
+ "Sharpe Ratio": f"{sharpe:.2f}",
424
+ "Max Drawdown": f"{max_drawdown:.2%}"
425
+ }
426
+
427
+ return pd.DataFrame(stats)
428
+
429
+ def train_hmm_model(train_df, n_states):
430
+ """Trains HMM on historical data (In-Sample)."""
431
+ X_train = train_df[['Log_Returns', 'Volatility']].values * 100
432
+
433
+ model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
434
+ model.fit(X_train)
435
+
436
+ hidden_states = model.predict(X_train)
437
+ state_vol = []
438
+ for i in range(n_states):
439
+ avg_vol = X_train[hidden_states == i, 1].mean()
440
+ state_vol.append((i, avg_vol))
441
+ state_vol.sort(key=lambda x: x[1])
442
+
443
+ mapping = {old: new for new, (old, _) in enumerate(state_vol)}
444
+
445
+ return model, mapping
446
+
447
+ def train_svr_model(train_df):
448
+ """Trains SVR to predict next day's volatility."""
449
+ feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
450
+ target_col = 'Target_Next_Vol'
451
+
452
+ X = train_df[feature_cols].values
453
+ y = train_df[target_col].values
454
+
455
+ scaler = StandardScaler()
456
+ X_scaled = scaler.fit_transform(X)
457
+
458
+ model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
459
+ model.fit(X_scaled, y)
460
+
461
+ return model, scaler
462
+
463
+ def generate_trade_log(df):
464
+ """
465
+ Scans the backtest dataframe to identify individual trade cycles.
466
+ A 'Trade' is defined as a period where Position Size > 0.
467
+ """
468
+ trades = []
469
+ in_trade = False
470
+ entry_date = None
471
+ entry_price = 0
472
+ trade_returns = []
473
+
474
+ # We iterate through the dataframe
475
+ for date, row in df.iterrows():
476
+ pos = row['Final_Position']
477
+ close_price = row['Close']
478
+
479
+ # Check for Entry (Position goes from 0 to > 0)
480
+ if pos > 0 and not in_trade:
481
+ in_trade = True
482
+ entry_date = date
483
+ entry_price = close_price # Approximation for log visualization
484
+ trade_returns = [row['Strategy_Returns']] # Start tracking returns for this specific trade
485
+
486
+ # Check for adjustments while in trade
487
+ elif pos > 0 and in_trade:
488
+ trade_returns.append(row['Strategy_Returns'])
489
+
490
+ # Check for Exit (Position goes to 0 while we were in a trade)
491
+ elif pos == 0 and in_trade:
492
+ in_trade = False
493
+ exit_date = date
494
+ exit_price = close_price
495
+
496
+ # Calculate compounded return for this specific trade period
497
+ # (1+r1)*(1+r2)... - 1
498
+ cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
499
+
500
+ trades.append({
501
+ 'Entry Date': entry_date,
502
+ 'Exit Date': exit_date,
503
+ 'Entry Price (Approx)': entry_price,
504
+ 'Exit Price': exit_price,
505
+ 'Duration (Days)': len(trade_returns),
506
+ 'Trade PnL': cum_trade_ret
507
+ })
508
+ trade_returns = []
509
+
510
+ # Handle case where trade is still open at end of data
511
+ if in_trade:
512
+ cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
513
+ trades.append({
514
+ 'Entry Date': entry_date,
515
+ 'Exit Date': df.index[-1],
516
+ 'Entry Price (Approx)': entry_price,
517
+ 'Exit Price': df.iloc[-1]['Close'],
518
+ 'Duration (Days)': len(trade_returns),
519
+ 'Trade PnL': cum_trade_ret
520
+ })
521
+
522
+ return pd.DataFrame(trades)
523
+
524
+ # --- Main Logic ---
525
+
526
+ st.title("🧠 Hybrid HMM-SVR Strategy Backtester")
527
+ st.markdown("""
528
+ **The Hybrid Strategy:**
529
+ 1. **Driver:** EMA Crossover (Fast > Slow = Bullish).
530
+ 2. **Filter (HMM):** If Regime is "High Vol/Crash", **Block Trade** (Size = 0).
531
+ 3. **Sizing (SVR):** If Regime is Safe, adjust size based on predicted risk.
532
+ * *If SVR predicts higher risk -> Reduce Position Size.*
533
+ * *If SVR predicts lower risk -> Increase Position Size.*
534
+ """)
535
+
536
  # Sidebar Inputs
537
  with st.sidebar:
538
  st.header("Settings")