backtest / src /streamlit_app.py
saadrizvi09's picture
Update src/streamlit_app.py
554907e verified
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
from hmmlearn.hmm import GaussianHMM
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from datetime import datetime, date
# --- Config ---
st.set_page_config(page_title="HMM-SVR Leverage Sniper", layout="wide")
# --- Helper Functions ---
@st.cache_data(ttl=3600)
def fetch_data(ticker, start_date, end_date):
ticker = ticker.strip().upper()
if isinstance(start_date, (datetime, pd.Timestamp)):
start_date = start_date.strftime('%Y-%m-%d')
if isinstance(end_date, (datetime, pd.Timestamp)):
end_date = end_date.strftime('%Y-%m-%d')
try:
df = yf.download(ticker, start=start_date, end=end_date, progress=False)
if df.empty: return None
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
df = df.dropna(how='all')
if len(df) < 10: return None
return df
except Exception as e:
st.error(f"Error: {e}")
return None
def calculate_metrics(df, strategy_col='Strategy_Value', benchmark_col='Buy_Hold_Value'):
stats = {}
for col, name in [(strategy_col, 'Smart Leverage Strategy'), (benchmark_col, 'Buy & Hold')]:
initial = df[col].iloc[0]
final = df[col].iloc[-1]
total_return = (final - initial) / initial
daily_ret = df[col].pct_change().dropna()
sharpe = (daily_ret.mean() / daily_ret.std()) * np.sqrt(365) if daily_ret.std() != 0 else 0
rolling_max = df[col].cummax()
drawdown = (df[col] - rolling_max) / rolling_max
max_drawdown = drawdown.min()
stats[name] = {
"Total Return": f"{total_return:.2%}",
"Sharpe Ratio": f"{sharpe:.2f}",
"Max Drawdown": f"{max_drawdown:.2%}"
}
return pd.DataFrame(stats)
def train_hmm_model(train_df, n_states):
X_train = train_df[['Log_Returns', 'Volatility']].values * 100
model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
model.fit(X_train)
hidden_states = model.predict(X_train)
state_vol = []
for i in range(n_states):
avg_vol = X_train[hidden_states == i, 1].mean()
state_vol.append((i, avg_vol))
state_vol.sort(key=lambda x: x[1])
mapping = {old: new for new, (old, _) in enumerate(state_vol)}
return model, mapping
def train_svr_model(train_df):
feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
target_col = 'Target_Next_Vol'
X = train_df[feature_cols].values
y = train_df[target_col].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
model.fit(X_scaled, y)
return model, scaler
def generate_trade_log(df):
trades = []
in_trade = False
entry_date = None
entry_price = 0
trade_returns = []
avg_leverage = []
for date, row in df.iterrows():
pos = row['Final_Position']
close_price = row['Close']
lev = row['Position_Size']
if pos > 0 and not in_trade:
in_trade = True
entry_date = date
entry_price = close_price
trade_returns = [row['Strategy_Returns']]
avg_leverage = [lev]
elif pos > 0 and in_trade:
trade_returns.append(row['Strategy_Returns'])
avg_leverage.append(lev)
elif pos == 0 and in_trade:
in_trade = False
exit_date = date
exit_price = close_price
cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
mean_lev = np.mean(avg_leverage)
trades.append({
'Entry Date': entry_date, 'Exit Date': exit_date,
'Entry Price': entry_price, 'Exit Price': exit_price,
'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
'Trade PnL': cum_trade_ret
})
trade_returns = []
avg_leverage = []
if in_trade:
cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
mean_lev = np.mean(avg_leverage)
trades.append({
'Entry Date': entry_date, 'Exit Date': df.index[-1],
'Entry Price': entry_price, 'Exit Price': df.iloc[-1]['Close'],
'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
'Trade PnL': cum_trade_ret
})
return pd.DataFrame(trades)
# --- Main Logic ---
st.title("⚡ HMM-SVR Leverage Backtester")
st.markdown("""
**The "Strict Rules" Strategy (No Lookahead Bias):**
1. **Baseline:** Buy when Fast EMA > Slow EMA.
2. **Safety (HMM):** Calculates market regime using ONLY past data.
3. **Leverage Boost:** Uses SVR to predict *tomorrow's* volatility based on *today's* data.
**Timing:** Uses End-of-Day (EOD) data to make decisions for the next trading day.
""")
with st.sidebar:
st.header("Settings")
ticker = st.selectbox("Ticker", ["BNB-USD", "ETH-USD", "SOL-USD", "LINK-USD", "BTC-USD"])
backtest_start = st.date_input("Backtest Start Date", date(2022, 1, 1))
backtest_end = st.date_input("Backtest End Date", datetime.now())
st.divider()
st.subheader("Leverage Rules")
leverage_mult = st.number_input("Boost Leverage", value=3.0, step=0.5)
risk_threshold = st.slider("Certainty Threshold", 0.1, 1.0, 0.5)
if st.button("Run Backtest"):
train_start_date = pd.Timestamp(backtest_start) - pd.DateOffset(years=4)
df = fetch_data(ticker, train_start_date, backtest_end)
if df is None or len(df) < 200:
st.error(f"Not enough data found for {ticker}.")
else:
# 1. Feature Engineering
df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
df['Downside_Vol'] = df['Downside_Returns'].rolling(window=10).std()
df['Target_Next_Vol'] = df['Volatility'].shift(-1)
df = df.dropna()
# 2. Split Data
train_df = df[df.index < pd.Timestamp(backtest_start)].copy()
test_df = df[df.index >= pd.Timestamp(backtest_start)].copy()
if len(train_df) < 365 or len(test_df) < 10:
st.error("Data split error. Adjust dates.")
else:
n_states = 3
with st.spinner("1. Training Models on History..."):
# Train HMM on Past Data
hmm_model, state_map = train_hmm_model(train_df, n_states)
# Get Regimes for Train set to train SVR
X_train_hmm = train_df[['Log_Returns', 'Volatility']].values * 100
train_raw_states = hmm_model.predict(X_train_hmm)
train_df['Regime'] = [state_map.get(s, s) for s in train_raw_states]
# Train SVR
svr_model, svr_scaler = train_svr_model(train_df)
# --- HONEST WALK-FORWARD BACKTEST ---
st.info("2. Running Walk-Forward Simulation (Step-by-Step)... This simulates real-time trading.")
progress_bar = st.progress(0)
# Prepare lists for storing honest predictions
honest_regimes = []
honest_predicted_vols = []
# Concatenate for sliding window access
all_data = pd.concat([train_df, test_df])
start_idx = len(train_df)
total_steps = len(test_df)
# We use a fixed lookback window for HMM inference to keep it fast enough
# Looking back 252 days (1 year) is usually sufficient for regime detection
lookback_window = 252
for i in range(total_steps):
# Update UI
if i % 10 == 0: progress_bar.progress((i + 1) / total_steps)
# Define the window: From (Now - Lookback) to Now
curr_pointer = start_idx + i
window_start = max(0, curr_pointer - lookback_window)
# Slice data strictly up to the current day 'i'
# We include 'i' because we are making a decision at Close of day 'i' for the next day
history_slice = all_data.iloc[window_start : curr_pointer + 1] # Remove the +1
# --- A. Honest Regime Detection ---
# HMM determines the path of states that best fits this specific history
X_slice = history_slice[['Log_Returns', 'Volatility']].values * 100
try:
# Predict sequence
hidden_states_slice = hmm_model.predict(X_slice)
# We only care about the LAST state (the state of "Today")
current_state_raw = hidden_states_slice[-1]
current_state = state_map.get(current_state_raw, current_state_raw)
except:
current_state = 1 # Fallback to Neutral if error
honest_regimes.append(current_state)
# --- B. Honest Volatility Prediction ---
# Prepare single row input for SVR: [Log_Ret, Vol, Down_Vol, Regime]
# Note: We use the 'current_state' we just calculated
row = test_df.iloc[i]
svr_features = np.array([[
row['Log_Returns'],
row['Volatility'],
row['Downside_Vol'],
current_state
]])
# Scale and Predict
svr_feat_scaled = svr_scaler.transform(svr_features)
pred_vol = svr_model.predict(svr_feat_scaled)[0]
honest_predicted_vols.append(pred_vol)
# Calculated EMAs using only the history up to current day
test_df.loc[test_df.index[i], 'EMA_Short'] = history_slice['Close'].ewm(span=12).mean().iloc[-1]
test_df.loc[test_df.index[i], 'EMA_Long'] = history_slice['Close'].ewm(span=26).mean().iloc[-1]
# Assign the honest predictions back to dataframe
test_df['Regime'] = honest_regimes
test_df['Predicted_Vol'] = honest_predicted_vols
progress_bar.empty()
# --- STRATEGY LOGIC (Same as before) ---
test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
avg_train_vol = train_df['Volatility'].mean()
test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol
test_df['Position_Size'] = 1.0
# Logic
cond_safe = (test_df['Regime'] == 0)
cond_low_risk = (test_df['Risk_Ratio'] < risk_threshold)
cond_crash = (test_df['Regime'] == (n_states - 1))
# Boost
test_df['Position_Size'] = np.where(cond_safe & cond_low_risk, leverage_mult, test_df['Position_Size'])
# Cut
test_df['Position_Size'] = np.where(cond_crash, 0.0, test_df['Position_Size'])
# Calculate Returns
test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
test_df['Simple_Returns'] = test_df['Close'].pct_change()
test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Simple_Returns']
# Metrics & Plots
test_df['Strategy_Value'] = (1 + test_df['Strategy_Returns'].fillna(0)).cumprod()
test_df['Buy_Hold_Value'] = (1 + test_df['Simple_Returns'].fillna(0)).cumprod()
test_df.dropna(inplace=True)
metrics_df = calculate_metrics(test_df)
st.subheader("Performance vs Benchmark")
st.table(metrics_df)
st.subheader("Equity Curve")
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Buy_Hold_Value'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Strategy_Value'], name='Smart Leverage', line=dict(color='#00CC96', width=2)))
st.plotly_chart(fig, width=True)
st.subheader("Leverage Deployment")
fig_lev = go.Figure()
fig_lev.add_trace(go.Scatter(x=test_df.index, y=test_df['Position_Size'], mode='lines', fill='tozeroy', name='Lev', line=dict(color='#636EFA')))
st.plotly_chart(fig_lev, width=True)
trade_log = generate_trade_log(test_df)
st.subheader("📝 Trade Log")
if not trade_log.empty:
display_log = trade_log.copy()
display_log['Trade PnL'] = display_log['Trade PnL'].map('{:.2%}'.format)
st.dataframe(display_log, width=True)
else:
st.write("No trades generated.")