File size: 13,345 Bytes
d99dcd5
b793be9
 
 
 
 
 
 
3e1a1b1
 
5bf05bb
133b2c9
5bf05bb
 
 
 
 
 
 
 
 
 
 
 
 
dbc05f5
5bf05bb
 
 
dbc05f5
5bf05bb
 
dbc05f5
5bf05bb
 
 
 
3e1a1b1
5bf05bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e1a1b1
5bf05bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e1a1b1
5bf05bb
 
 
 
dbc05f5
5bf05bb
 
 
 
3e1a1b1
 
 
5bf05bb
 
3e1a1b1
5bf05bb
 
 
 
 
3e1a1b1
5bf05bb
dbc05f5
 
 
5bf05bb
 
 
3e1a1b1
5bf05bb
 
 
3e1a1b1
5bf05bb
dbc05f5
 
 
5bf05bb
 
 
 
 
 
48797b7
5bf05bb
dbc05f5
 
 
 
 
5bf05bb
 
 
 
a9cc75c
dbc05f5
 
b793be9
3e1a1b1
dbc05f5
 
d99dcd5
ba82550
b793be9
 
 
 
3e1a1b1
b793be9
 
 
 
 
 
 
 
 
b4f22a3
b793be9
 
 
dbc05f5
 
b793be9
dbc05f5
b793be9
dbc05f5
 
b793be9
 
dbc05f5
b793be9
 
 
 
3e1a1b1
b793be9
dbc05f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b793be9
dbc05f5
 
 
b793be9
dbc05f5
 
 
b793be9
dbc05f5
 
 
3e1a1b1
dbc05f5
 
 
 
 
 
 
 
b793be9
dbc05f5
b793be9
dbc05f5
 
 
 
 
 
 
 
 
 
b793be9
dbc05f5
 
 
 
3e1a1b1
4f45ede
dbc05f5
 
 
 
 
 
 
 
 
 
 
71b7beb
dbc05f5
 
 
71b7beb
dbc05f5
71b7beb
 
 
 
dbc05f5
71b7beb
 
 
 
dbc05f5
71b7beb
 
dbc05f5
71b7beb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554907e
71b7beb
 
 
 
554907e
dbc05f5
71b7beb
 
 
 
 
554907e
71b7beb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
from hmmlearn.hmm import GaussianHMM
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from datetime import datetime, date

# --- Config ---
st.set_page_config(page_title="HMM-SVR Leverage Sniper", layout="wide")

# --- Helper Functions ---

@st.cache_data(ttl=3600)
def fetch_data(ticker, start_date, end_date):
    ticker = ticker.strip().upper()
    if isinstance(start_date, (datetime, pd.Timestamp)):
        start_date = start_date.strftime('%Y-%m-%d')
    if isinstance(end_date, (datetime, pd.Timestamp)):
        end_date = end_date.strftime('%Y-%m-%d')
    
    try:
        df = yf.download(ticker, start=start_date, end=end_date, progress=False)
        if df.empty: return None
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)
        df = df.dropna(how='all')
        if len(df) < 10: return None
        return df
    except Exception as e:
        st.error(f"Error: {e}")
        return None

def calculate_metrics(df, strategy_col='Strategy_Value', benchmark_col='Buy_Hold_Value'):
    stats = {}
    for col, name in [(strategy_col, 'Smart Leverage Strategy'), (benchmark_col, 'Buy & Hold')]:
        initial = df[col].iloc[0]
        final = df[col].iloc[-1]
        total_return = (final - initial) / initial
        daily_ret = df[col].pct_change().dropna()
        sharpe = (daily_ret.mean() / daily_ret.std()) * np.sqrt(365) if daily_ret.std() != 0 else 0
        
        rolling_max = df[col].cummax()
        drawdown = (df[col] - rolling_max) / rolling_max
        max_drawdown = drawdown.min()
        
        stats[name] = {
            "Total Return": f"{total_return:.2%}",
            "Sharpe Ratio": f"{sharpe:.2f}",
            "Max Drawdown": f"{max_drawdown:.2%}"
        }
    return pd.DataFrame(stats)

def train_hmm_model(train_df, n_states):
    X_train = train_df[['Log_Returns', 'Volatility']].values * 100
    model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
    model.fit(X_train)
    
    hidden_states = model.predict(X_train)
    state_vol = []
    for i in range(n_states):
        avg_vol = X_train[hidden_states == i, 1].mean()
        state_vol.append((i, avg_vol))
    
    state_vol.sort(key=lambda x: x[1])
    mapping = {old: new for new, (old, _) in enumerate(state_vol)}
    return model, mapping

def train_svr_model(train_df):
    feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
    target_col = 'Target_Next_Vol'
    X = train_df[feature_cols].values
    y = train_df[target_col].values
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
    model.fit(X_scaled, y)
    return model, scaler

def generate_trade_log(df):
    trades = []
    in_trade = False
    entry_date = None
    entry_price = 0
    trade_returns = []
    avg_leverage = []
    
    for date, row in df.iterrows():
        pos = row['Final_Position']
        close_price = row['Close']
        lev = row['Position_Size']
        
        if pos > 0 and not in_trade:
            in_trade = True
            entry_date = date
            entry_price = close_price
            trade_returns = [row['Strategy_Returns']]
            avg_leverage = [lev]
        elif pos > 0 and in_trade:
            trade_returns.append(row['Strategy_Returns'])
            avg_leverage.append(lev)
        elif pos == 0 and in_trade:
            in_trade = False
            exit_date = date
            exit_price = close_price
            cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
            mean_lev = np.mean(avg_leverage)
            trades.append({
                'Entry Date': entry_date, 'Exit Date': exit_date,
                'Entry Price': entry_price, 'Exit Price': exit_price,
                'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
                'Trade PnL': cum_trade_ret
            })
            trade_returns = []
            avg_leverage = []

    if in_trade:
        cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
        mean_lev = np.mean(avg_leverage)
        trades.append({
            'Entry Date': entry_date, 'Exit Date': df.index[-1],
            'Entry Price': entry_price, 'Exit Price': df.iloc[-1]['Close'],
            'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
            'Trade PnL': cum_trade_ret
        })
    return pd.DataFrame(trades)

# --- Main Logic ---

st.title("⚡ HMM-SVR Leverage Backtester")
st.markdown("""
**The "Strict Rules" Strategy (No Lookahead Bias):**
1. **Baseline:** Buy when Fast EMA > Slow EMA.
2. **Safety (HMM):** Calculates market regime using ONLY past data.
3. **Leverage Boost:** Uses SVR to predict *tomorrow's* volatility based on *today's* data.
**Timing:** Uses End-of-Day (EOD) data to make decisions for the next trading day.
""")

with st.sidebar:
    st.header("Settings")
    ticker = st.selectbox("Ticker", ["BNB-USD", "ETH-USD", "SOL-USD", "LINK-USD", "BTC-USD"])
    backtest_start = st.date_input("Backtest Start Date", date(2022, 1, 1))
    backtest_end = st.date_input("Backtest End Date", datetime.now())
    st.divider()
    st.subheader("Leverage Rules")
    leverage_mult = st.number_input("Boost Leverage", value=3.0, step=0.5)
    risk_threshold = st.slider("Certainty Threshold", 0.1, 1.0, 0.5)

if st.button("Run Backtest"):
    train_start_date = pd.Timestamp(backtest_start) - pd.DateOffset(years=4)
    df = fetch_data(ticker, train_start_date, backtest_end)
    
    if df is None or len(df) < 200:
        st.error(f"Not enough data found for {ticker}.")
    else:
        # 1. Feature Engineering
        df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
        df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
        df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
        df['Downside_Vol'] = df['Downside_Returns'].rolling(window=10).std()
        df['Target_Next_Vol'] = df['Volatility'].shift(-1)
        df = df.dropna()
        
        # 2. Split Data
        train_df = df[df.index < pd.Timestamp(backtest_start)].copy()
        test_df = df[df.index >= pd.Timestamp(backtest_start)].copy()
        
        if len(train_df) < 365 or len(test_df) < 10:
            st.error("Data split error. Adjust dates.")
        else:
            n_states = 3
            
            with st.spinner("1. Training Models on History..."):
                # Train HMM on Past Data
                hmm_model, state_map = train_hmm_model(train_df, n_states)
                
                # Get Regimes for Train set to train SVR
                X_train_hmm = train_df[['Log_Returns', 'Volatility']].values * 100
                train_raw_states = hmm_model.predict(X_train_hmm)
                train_df['Regime'] = [state_map.get(s, s) for s in train_raw_states]
                
                # Train SVR
                svr_model, svr_scaler = train_svr_model(train_df)
            
            # --- HONEST WALK-FORWARD BACKTEST ---
            
            st.info("2. Running Walk-Forward Simulation (Step-by-Step)... This simulates real-time trading.")
            progress_bar = st.progress(0)
            
            # Prepare lists for storing honest predictions
            honest_regimes = []
            honest_predicted_vols = []
            
            # Concatenate for sliding window access
            all_data = pd.concat([train_df, test_df])
            start_idx = len(train_df)
            total_steps = len(test_df)
            
            # We use a fixed lookback window for HMM inference to keep it fast enough
            # Looking back 252 days (1 year) is usually sufficient for regime detection
            lookback_window = 252 
            
            for i in range(total_steps):
                # Update UI
                if i % 10 == 0: progress_bar.progress((i + 1) / total_steps)
                
                # Define the window: From (Now - Lookback) to Now
                curr_pointer = start_idx + i
                window_start = max(0, curr_pointer - lookback_window)
                
                # Slice data strictly up to the current day 'i'
                # We include 'i' because we are making a decision at Close of day 'i' for the next day
                history_slice = all_data.iloc[window_start : curr_pointer + 1]  # Remove the +1
                
                # --- A. Honest Regime Detection ---
                # HMM determines the path of states that best fits this specific history
                X_slice = history_slice[['Log_Returns', 'Volatility']].values * 100
                
                try:
                    # Predict sequence
                    hidden_states_slice = hmm_model.predict(X_slice)
                    # We only care about the LAST state (the state of "Today")
                    current_state_raw = hidden_states_slice[-1]
                    current_state = state_map.get(current_state_raw, current_state_raw)
                except:
                    current_state = 1 # Fallback to Neutral if error
                
                honest_regimes.append(current_state)
                
                # --- B. Honest Volatility Prediction ---
                # Prepare single row input for SVR: [Log_Ret, Vol, Down_Vol, Regime]
                # Note: We use the 'current_state' we just calculated
                row = test_df.iloc[i]
                svr_features = np.array([[
                    row['Log_Returns'], 
                    row['Volatility'], 
                    row['Downside_Vol'], 
                    current_state
                ]])
                
                # Scale and Predict
                svr_feat_scaled = svr_scaler.transform(svr_features)
                pred_vol = svr_model.predict(svr_feat_scaled)[0]
                honest_predicted_vols.append(pred_vol)
                
                # Calculated EMAs using only the history up to current day
                test_df.loc[test_df.index[i], 'EMA_Short'] = history_slice['Close'].ewm(span=12).mean().iloc[-1]
                test_df.loc[test_df.index[i], 'EMA_Long'] = history_slice['Close'].ewm(span=26).mean().iloc[-1]
            
            # Assign the honest predictions back to dataframe
            test_df['Regime'] = honest_regimes
            test_df['Predicted_Vol'] = honest_predicted_vols
            
            progress_bar.empty()
            
            # --- STRATEGY LOGIC (Same as before) ---
            
            test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
            avg_train_vol = train_df['Volatility'].mean()
            test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol
            
            test_df['Position_Size'] = 1.0
            
            # Logic
            cond_safe = (test_df['Regime'] == 0)
            cond_low_risk = (test_df['Risk_Ratio'] < risk_threshold)
            cond_crash = (test_df['Regime'] == (n_states - 1))
            
            # Boost
            test_df['Position_Size'] = np.where(cond_safe & cond_low_risk, leverage_mult, test_df['Position_Size'])
            # Cut
            test_df['Position_Size'] = np.where(cond_crash, 0.0, test_df['Position_Size'])
            
            # Calculate Returns
            test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
            test_df['Simple_Returns'] = test_df['Close'].pct_change()
            test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Simple_Returns']
            
            # Metrics & Plots
            test_df['Strategy_Value'] = (1 + test_df['Strategy_Returns'].fillna(0)).cumprod()
            test_df['Buy_Hold_Value'] = (1 + test_df['Simple_Returns'].fillna(0)).cumprod()
            test_df.dropna(inplace=True)
            
            metrics_df = calculate_metrics(test_df)
            st.subheader("Performance vs Benchmark")
            st.table(metrics_df)
            
            st.subheader("Equity Curve")
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Buy_Hold_Value'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
            fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Strategy_Value'], name='Smart Leverage', line=dict(color='#00CC96', width=2)))
            st.plotly_chart(fig, width=True)
            
            st.subheader("Leverage Deployment")
            fig_lev = go.Figure()
            fig_lev.add_trace(go.Scatter(x=test_df.index, y=test_df['Position_Size'], mode='lines', fill='tozeroy', name='Lev', line=dict(color='#636EFA')))
            st.plotly_chart(fig_lev, width=True)
            
            trade_log = generate_trade_log(test_df)
            st.subheader("📝 Trade Log")
            if not trade_log.empty:
                display_log = trade_log.copy()
                display_log['Trade PnL'] = display_log['Trade PnL'].map('{:.2%}'.format)
                st.dataframe(display_log, width=True)
            else:
                st.write("No trades generated.")