saadrizvi09 commited on
Commit
b793be9
·
verified ·
1 Parent(s): 7a20800

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +244 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,246 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import yfinance as yf
3
+ import pandas as pd
4
+ import numpy as np
5
+ from hmmlearn.hmm import GaussianHMM
6
+ from sklearn.svm import SVR
7
+ from sklearn.preprocessing import StandardScaler
8
+ import plotly.graph_objects as go
9
+ import plotly.express as px
10
+ from datetime import datetime, timedelta
11
+
12
+ # --- Config ---
13
+ st.set_page_config(page_title="Hybrid HMM-SVR Strategy Backtester", layout="wide")
14
+
15
+ # --- Helper Functions ---
16
+
17
+ @st.cache_data
18
+ def fetch_data(ticker, start_date, end_date):
19
+ df = yf.download(ticker, start=start_date, end=end_date)
20
+ if isinstance(df.columns, pd.MultiIndex):
21
+ df.columns = df.columns.get_level_values(0)
22
+ return df
23
+
24
+ def calculate_metrics(df, strategy_col='Strategy_Value', benchmark_col='Buy_Hold_Value'):
25
+ """Calculates CAG, Sharpe, Drawdown, etc."""
26
+ stats = {}
27
+
28
+ for col, name in [(strategy_col, 'Hybrid Strategy'), (benchmark_col, 'Buy & Hold')]:
29
+ # Returns
30
+ initial = df[col].iloc[0]
31
+ final = df[col].iloc[-1]
32
+ total_return = (final - initial) / initial
33
+
34
+ # Daily Returns
35
+ daily_ret = df[col].pct_change().dropna()
36
+
37
+ # Sharpe (Annualized, assuming 365 trading days for crypto)
38
+ sharpe = (daily_ret.mean() / daily_ret.std()) * np.sqrt(365) if daily_ret.std() != 0 else 0
39
+
40
+ # Max Drawdown
41
+ rolling_max = df[col].cummax()
42
+ drawdown = (df[col] - rolling_max) / rolling_max
43
+ max_drawdown = drawdown.min()
44
+
45
+ stats[name] = {
46
+ "Total Return": f"{total_return:.2%}",
47
+ "Sharpe Ratio": f"{sharpe:.2f}",
48
+ "Max Drawdown": f"{max_drawdown:.2%}"
49
+ }
50
+
51
+ return pd.DataFrame(stats)
52
+
53
+ def train_hmm_model(train_df, n_states):
54
+ """Trains HMM on historical data (In-Sample)."""
55
+ # Features: Log Returns and Volatility
56
+ X_train = train_df[['Log_Returns', 'Volatility']].values * 100
57
+
58
+ model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
59
+ model.fit(X_train)
60
+
61
+ # Sort states by Volatility (State 0 = Lowest Risk)
62
+ hidden_states = model.predict(X_train)
63
+ state_vol = []
64
+ for i in range(n_states):
65
+ avg_vol = X_train[hidden_states == i, 1].mean()
66
+ state_vol.append((i, avg_vol))
67
+ state_vol.sort(key=lambda x: x[1])
68
+
69
+ # Create mapping: {Random_ID: Sorted_ID}
70
+ mapping = {old: new for new, (old, _) in enumerate(state_vol)}
71
+
72
+ return model, mapping
73
+
74
+ def train_svr_model(train_df):
75
+ """Trains SVR to predict next day's volatility."""
76
+ # Features for SVR: Returns, Current Vol, Downside Vol, Regime
77
+ feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
78
+ target_col = 'Target_Next_Vol'
79
+
80
+ X = train_df[feature_cols].values
81
+ y = train_df[target_col].values
82
+
83
+ # Scale features
84
+ scaler = StandardScaler()
85
+ X_scaled = scaler.fit_transform(X)
86
+
87
+ # SVR with RBF kernel
88
+ model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
89
+ model.fit(X_scaled, y)
90
+
91
+ return model, scaler
92
+
93
+ # --- Main Logic ---
94
+
95
+ st.title("🧠 Saad Rizvi Gand phad strategy")
96
+ st.markdown("""
97
+ **The Hybrid Strategy:**
98
+ 1. **Driver:** EMA Crossover (Fast > Slow = Bullish).
99
+ 2. **Filter (HMM):** If Regime is "High Vol/Crash", **Block Trade** (Size = 0).
100
+ 3. **Sizing (SVR):** If Regime is Safe, adjust size based on predicted risk.
101
+ """)
102
+
103
+ # Sidebar Inputs
104
+ with st.sidebar:
105
+ st.header("Settings")
106
+ ticker = st.text_input("Ticker", "BTC-USD")
107
+
108
+ # Modified Date Logic: User selects Trading Period
109
+ backtest_start = st.date_input("Backtest Start Date", datetime.now() - timedelta(days=365))
110
+ backtest_end = st.date_input("Backtest End Date", datetime.now())
111
+
112
+ st.caption("Note: Models will automatically train on the **4 years** of data prior to your selected Start Date.")
113
+
114
+ st.divider()
115
+ short_window = st.number_input("Fast EMA", 12)
116
+ long_window = st.number_input("Slow EMA", 26)
117
+ n_states = st.slider("HMM States", 2, 4, 3)
118
 
119
+ if st.button("Run Hybrid Backtest"):
120
+ # Calculate the Training Start Date (4 Years before Backtest Start)
121
+ train_start_date = pd.Timestamp(backtest_start) - pd.DateOffset(years=4)
122
+
123
+ # Fetch ALL data (Training Period + Backtest Period)
124
+ df = fetch_data(ticker, train_start_date, backtest_end)
125
+
126
+ if df is None or len(df) < 200:
127
+ st.error("Not enough data to backtest. Ensure the ticker existed 4 years prior to your start date.")
128
+ else:
129
+ # 1. Feature Engineering
130
+ df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
131
+ df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
132
+
133
+ # Downside Volatility (Leverage Effect Feature)
134
+ df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
135
+ df['Downside_Vol'] = df['Downside_Returns'].rolling(window=10).std()
136
+
137
+ # Strategy Indicators
138
+ df['EMA_Short'] = df['Close'].ewm(span=short_window, adjust=False).mean()
139
+ df['EMA_Long'] = df['Close'].ewm(span=long_window, adjust=False).mean()
140
+
141
+ # Target for SVR (Next Day Volatility)
142
+ df['Target_Next_Vol'] = df['Volatility'].shift(-1)
143
+
144
+ df = df.dropna()
145
+
146
+ # 2. Split Data based on Dates
147
+ train_df = df[df.index < pd.Timestamp(backtest_start)].copy()
148
+ test_df = df[df.index >= pd.Timestamp(backtest_start)].copy()
149
+
150
+ if len(train_df) < 365:
151
+ st.warning(f"Warning: Only {len(train_df)} days found for training. HMM performs best with >2 years of data.")
152
+
153
+ if len(test_df) < 10:
154
+ st.error("Not enough data for backtesting range.")
155
+ else:
156
+ st.info(f"Training on {len(train_df)} days ({train_df.index[0].date()} to {train_df.index[-1].date()}). Backtesting on {len(test_df)} days.")
157
+
158
+ with st.spinner("Training HMM (Regime Detection)..."):
159
+ hmm_model, state_map = train_hmm_model(train_df, n_states)
160
+
161
+ # Predict Train Regimes (Needed for SVR training input)
162
+ X_train_hmm = train_df[['Log_Returns', 'Volatility']].values * 100
163
+ train_raw_states = hmm_model.predict(X_train_hmm)
164
+ train_df['Regime'] = [state_map.get(s, s) for s in train_raw_states]
165
+
166
+ with st.spinner("Training SVR (Volatility Forecasting)..."):
167
+ svr_model, svr_scaler = train_svr_model(train_df)
168
+
169
+ with st.spinner("Running Backtest Loop..."):
170
+ # --- OUT OF SAMPLE BACKTEST ---
171
+
172
+ # 1. Predict Regimes for Test Data
173
+ X_test_hmm = test_df[['Log_Returns', 'Volatility']].values * 100
174
+ test_raw_states = hmm_model.predict(X_test_hmm)
175
+ test_df['Regime'] = [state_map.get(s, s) for s in test_raw_states]
176
+
177
+ # 2. Predict Volatility for Test Data (Using SVR)
178
+ X_test_svr = test_df[['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']].values
179
+ X_test_svr_scaled = svr_scaler.transform(X_test_svr)
180
+ test_df['Predicted_Vol'] = svr_model.predict(X_test_svr_scaled)
181
+
182
+ # 3. Calculate Strategy Logic
183
+ high_vol_state = n_states - 1
184
+
185
+ # Base Signal (EMA)
186
+ test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
187
+
188
+ # Calculate Baseline Risk (Average Volatility seen in Training)
189
+ avg_train_vol = train_df['Volatility'].mean()
190
+
191
+ # Calculate Position Size (The "Dimmer Switch")
192
+ # Logic: Size = Average_Vol / Predicted_Vol
193
+ # If Predicted > Average, Size < 1.0 (Reduce Risk)
194
+ # If Predicted < Average, Size > 1.0 (Increase Risk) -> Capped at 1.0 for safety
195
+ test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol
196
+ test_df['Position_Size'] = (1.0 / test_df['Risk_Ratio']).clip(upper=1.0, lower=0.0)
197
+
198
+ # Override: If HMM says CRASH, Size = 0
199
+ test_df['Position_Size'] = np.where(
200
+ test_df['Regime'] == high_vol_state,
201
+ 0.0,
202
+ test_df['Position_Size']
203
+ )
204
+
205
+ # Final Position: Signal * Size
206
+ # We shift(1) because we calculate size today for tomorrow's return
207
+ test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
208
+
209
+ # 4. Returns
210
+ test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Log_Returns']
211
+ test_df['Buy_Hold_Returns'] = test_df['Log_Returns']
212
+
213
+ # Cumulative
214
+ test_df['Strategy_Value'] = (1 + test_df['Strategy_Returns']).cumprod()
215
+ test_df['Buy_Hold_Value'] = (1 + test_df['Buy_Hold_Returns']).cumprod()
216
+ test_df.dropna(inplace=True)
217
+
218
+ # --- RESULTS ---
219
+
220
+ metrics_df = calculate_metrics(test_df)
221
+ st.subheader("Performance Metrics")
222
+ st.table(metrics_df)
223
+
224
+ # Charts
225
+ col1, col2 = st.columns([2, 1])
226
+
227
+ with col1:
228
+ st.subheader("Equity Curve")
229
+ fig = go.Figure()
230
+ fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Buy_Hold_Value'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
231
+ fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Strategy_Value'], name='Hybrid Strategy', line=dict(color='#00CC96', width=2)))
232
+ st.plotly_chart(fig, use_container_width=True)
233
+
234
+ with col2:
235
+ st.subheader("Position Sizing (SVR Effect)")
236
+ st.caption("How SVR adjusted trade size over time (0.0 to 1.0)")
237
+ fig_size = px.area(test_df, x=test_df.index, y='Position_Size', title="Dynamic Exposure")
238
+ st.plotly_chart(fig_size, use_container_width=True)
239
+
240
+ st.subheader("SVR Prediction Accuracy (Test Set)")
241
+ fig_svr = go.Figure()
242
+ # Show a slice to avoid clutter
243
+ slice_df = test_df.iloc[-100:]
244
+ fig_svr.add_trace(go.Scatter(x=slice_df.index, y=slice_df['Target_Next_Vol'], name='Actual Volatility'))
245
+ fig_svr.add_trace(go.Scatter(x=slice_df.index, y=slice_df['Predicted_Vol'], name='SVR Prediction', line=dict(dash='dot')))
246
+ st.plotly_chart(fig_svr, use_container_width=True)