Spaces:

saadrizvi09
/

backtest

Sleeping

App Files Files Community

backtest / src /streamlit_app.py

saadrizvi09

Update src/streamlit_app.py

554907e verified about 2 months ago

raw

history blame contribute delete

13.3 kB

	import streamlit as st
	import yfinance as yf
	import pandas as pd
	import numpy as np
	from hmmlearn.hmm import GaussianHMM
	from sklearn.svm import SVR
	from sklearn.preprocessing import StandardScaler
	import plotly.graph_objects as go
	from datetime import datetime, date

	# --- Config ---
	st.set_page_config(page_title="HMM-SVR Leverage Sniper", layout="wide")

	# --- Helper Functions ---

	@st.cache_data(ttl=3600)
	def fetch_data(ticker, start_date, end_date):
	ticker = ticker.strip().upper()
	if isinstance(start_date, (datetime, pd.Timestamp)):
	start_date = start_date.strftime('%Y-%m-%d')
	if isinstance(end_date, (datetime, pd.Timestamp)):
	end_date = end_date.strftime('%Y-%m-%d')

	try:
	df = yf.download(ticker, start=start_date, end=end_date, progress=False)
	if df.empty: return None
	if isinstance(df.columns, pd.MultiIndex):
	df.columns = df.columns.get_level_values(0)
	df = df.dropna(how='all')
	if len(df) < 10: return None
	return df
	except Exception as e:
	st.error(f"Error: {e}")
	return None

	def calculate_metrics(df, strategy_col='Strategy_Value', benchmark_col='Buy_Hold_Value'):
	stats = {}
	for col, name in [(strategy_col, 'Smart Leverage Strategy'), (benchmark_col, 'Buy & Hold')]:
	initial = df[col].iloc[0]
	final = df[col].iloc[-1]
	total_return = (final - initial) / initial
	daily_ret = df[col].pct_change().dropna()
	sharpe = (daily_ret.mean() / daily_ret.std()) * np.sqrt(365) if daily_ret.std() != 0 else 0

	rolling_max = df[col].cummax()
	drawdown = (df[col] - rolling_max) / rolling_max
	max_drawdown = drawdown.min()

	stats[name] = {
	"Total Return": f"{total_return:.2%}",
	"Sharpe Ratio": f"{sharpe:.2f}",
	"Max Drawdown": f"{max_drawdown:.2%}"
	}
	return pd.DataFrame(stats)

	def train_hmm_model(train_df, n_states):
	X_train = train_df[['Log_Returns', 'Volatility']].values * 100
	model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
	model.fit(X_train)

	hidden_states = model.predict(X_train)
	state_vol = []
	for i in range(n_states):
	avg_vol = X_train[hidden_states == i, 1].mean()
	state_vol.append((i, avg_vol))

	state_vol.sort(key=lambda x: x[1])
	mapping = {old: new for new, (old, _) in enumerate(state_vol)}
	return model, mapping

	def train_svr_model(train_df):
	feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
	target_col = 'Target_Next_Vol'
	X = train_df[feature_cols].values
	y = train_df[target_col].values
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)
	model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
	model.fit(X_scaled, y)
	return model, scaler

	def generate_trade_log(df):
	trades = []
	in_trade = False
	entry_date = None
	entry_price = 0
	trade_returns = []
	avg_leverage = []

	for date, row in df.iterrows():
	pos = row['Final_Position']
	close_price = row['Close']
	lev = row['Position_Size']

	if pos > 0 and not in_trade:
	in_trade = True
	entry_date = date
	entry_price = close_price
	trade_returns = [row['Strategy_Returns']]
	avg_leverage = [lev]
	elif pos > 0 and in_trade:
	trade_returns.append(row['Strategy_Returns'])
	avg_leverage.append(lev)
	elif pos == 0 and in_trade:
	in_trade = False
	exit_date = date
	exit_price = close_price
	cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
	mean_lev = np.mean(avg_leverage)
	trades.append({
	'Entry Date': entry_date, 'Exit Date': exit_date,
	'Entry Price': entry_price, 'Exit Price': exit_price,
	'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
	'Trade PnL': cum_trade_ret
	})
	trade_returns = []
	avg_leverage = []

	if in_trade:
	cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
	mean_lev = np.mean(avg_leverage)
	trades.append({
	'Entry Date': entry_date, 'Exit Date': df.index[-1],
	'Entry Price': entry_price, 'Exit Price': df.iloc[-1]['Close'],
	'Duration': len(trade_returns), 'Avg Leverage': f"{mean_lev:.1f}x",
	'Trade PnL': cum_trade_ret
	})
	return pd.DataFrame(trades)

	# --- Main Logic ---

	st.title("⚡ HMM-SVR Leverage Backtester")
	st.markdown("""
	The "Strict Rules" Strategy (No Lookahead Bias):
	1. Baseline: Buy when Fast EMA > Slow EMA.
	2. Safety (HMM): Calculates market regime using ONLY past data.
	3. Leverage Boost: Uses SVR to predict tomorrow's volatility based on today's data.
	Timing: Uses End-of-Day (EOD) data to make decisions for the next trading day.
	""")

	with st.sidebar:
	st.header("Settings")
	ticker = st.selectbox("Ticker", ["BNB-USD", "ETH-USD", "SOL-USD", "LINK-USD", "BTC-USD"])
	backtest_start = st.date_input("Backtest Start Date", date(2022, 1, 1))
	backtest_end = st.date_input("Backtest End Date", datetime.now())
	st.divider()
	st.subheader("Leverage Rules")
	leverage_mult = st.number_input("Boost Leverage", value=3.0, step=0.5)
	risk_threshold = st.slider("Certainty Threshold", 0.1, 1.0, 0.5)

	if st.button("Run Backtest"):
	train_start_date = pd.Timestamp(backtest_start) - pd.DateOffset(years=4)
	df = fetch_data(ticker, train_start_date, backtest_end)

	if df is None or len(df) < 200:
	st.error(f"Not enough data found for {ticker}.")
	else:
	# 1. Feature Engineering
	df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
	df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
	df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
	df['Downside_Vol'] = df['Downside_Returns'].rolling(window=10).std()
	df['Target_Next_Vol'] = df['Volatility'].shift(-1)
	df = df.dropna()

	# 2. Split Data
	train_df = df[df.index < pd.Timestamp(backtest_start)].copy()
	test_df = df[df.index >= pd.Timestamp(backtest_start)].copy()

	if len(train_df) < 365 or len(test_df) < 10:
	st.error("Data split error. Adjust dates.")
	else:
	n_states = 3

	with st.spinner("1. Training Models on History..."):
	# Train HMM on Past Data
	hmm_model, state_map = train_hmm_model(train_df, n_states)

	# Get Regimes for Train set to train SVR
	X_train_hmm = train_df[['Log_Returns', 'Volatility']].values * 100
	train_raw_states = hmm_model.predict(X_train_hmm)
	train_df['Regime'] = [state_map.get(s, s) for s in train_raw_states]

	# Train SVR
	svr_model, svr_scaler = train_svr_model(train_df)

	# --- HONEST WALK-FORWARD BACKTEST ---

	st.info("2. Running Walk-Forward Simulation (Step-by-Step)... This simulates real-time trading.")
	progress_bar = st.progress(0)

	# Prepare lists for storing honest predictions
	honest_regimes = []
	honest_predicted_vols = []

	# Concatenate for sliding window access
	all_data = pd.concat([train_df, test_df])
	start_idx = len(train_df)
	total_steps = len(test_df)

	# We use a fixed lookback window for HMM inference to keep it fast enough
	# Looking back 252 days (1 year) is usually sufficient for regime detection
	lookback_window = 252

	for i in range(total_steps):
	# Update UI
	if i % 10 == 0: progress_bar.progress((i + 1) / total_steps)

	# Define the window: From (Now - Lookback) to Now
	curr_pointer = start_idx + i
	window_start = max(0, curr_pointer - lookback_window)

	# Slice data strictly up to the current day 'i'
	# We include 'i' because we are making a decision at Close of day 'i' for the next day
	history_slice = all_data.iloc[window_start : curr_pointer + 1] # Remove the +1

	# --- A. Honest Regime Detection ---
	# HMM determines the path of states that best fits this specific history
	X_slice = history_slice[['Log_Returns', 'Volatility']].values * 100

	try:
	# Predict sequence
	hidden_states_slice = hmm_model.predict(X_slice)
	# We only care about the LAST state (the state of "Today")
	current_state_raw = hidden_states_slice[-1]
	current_state = state_map.get(current_state_raw, current_state_raw)
	except:
	current_state = 1 # Fallback to Neutral if error

	honest_regimes.append(current_state)

	# --- B. Honest Volatility Prediction ---
	# Prepare single row input for SVR: [Log_Ret, Vol, Down_Vol, Regime]
	# Note: We use the 'current_state' we just calculated
	row = test_df.iloc[i]
	svr_features = np.array([[
	row['Log_Returns'],
	row['Volatility'],
	row['Downside_Vol'],
	current_state
	]])

	# Scale and Predict
	svr_feat_scaled = svr_scaler.transform(svr_features)
	pred_vol = svr_model.predict(svr_feat_scaled)[0]
	honest_predicted_vols.append(pred_vol)

	# Calculated EMAs using only the history up to current day
	test_df.loc[test_df.index[i], 'EMA_Short'] = history_slice['Close'].ewm(span=12).mean().iloc[-1]
	test_df.loc[test_df.index[i], 'EMA_Long'] = history_slice['Close'].ewm(span=26).mean().iloc[-1]

	# Assign the honest predictions back to dataframe
	test_df['Regime'] = honest_regimes
	test_df['Predicted_Vol'] = honest_predicted_vols

	progress_bar.empty()

	# --- STRATEGY LOGIC (Same as before) ---

	test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
	avg_train_vol = train_df['Volatility'].mean()
	test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol

	test_df['Position_Size'] = 1.0

	# Logic
	cond_safe = (test_df['Regime'] == 0)
	cond_low_risk = (test_df['Risk_Ratio'] < risk_threshold)
	cond_crash = (test_df['Regime'] == (n_states - 1))

	# Boost
	test_df['Position_Size'] = np.where(cond_safe & cond_low_risk, leverage_mult, test_df['Position_Size'])
	# Cut
	test_df['Position_Size'] = np.where(cond_crash, 0.0, test_df['Position_Size'])

	# Calculate Returns
	test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
	test_df['Simple_Returns'] = test_df['Close'].pct_change()
	test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Simple_Returns']

	# Metrics & Plots
	test_df['Strategy_Value'] = (1 + test_df['Strategy_Returns'].fillna(0)).cumprod()
	test_df['Buy_Hold_Value'] = (1 + test_df['Simple_Returns'].fillna(0)).cumprod()
	test_df.dropna(inplace=True)

	metrics_df = calculate_metrics(test_df)
	st.subheader("Performance vs Benchmark")
	st.table(metrics_df)

	st.subheader("Equity Curve")
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Buy_Hold_Value'], name='Buy & Hold', line=dict(color='gray', dash='dot')))
	fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Strategy_Value'], name='Smart Leverage', line=dict(color='#00CC96', width=2)))
	st.plotly_chart(fig, width=True)

	st.subheader("Leverage Deployment")
	fig_lev = go.Figure()
	fig_lev.add_trace(go.Scatter(x=test_df.index, y=test_df['Position_Size'], mode='lines', fill='tozeroy', name='Lev', line=dict(color='#636EFA')))
	st.plotly_chart(fig_lev, width=True)

	trade_log = generate_trade_log(test_df)
	st.subheader("📝 Trade Log")
	if not trade_log.empty:
	display_log = trade_log.copy()
	display_log['Trade PnL'] = display_log['Trade PnL'].map('{:.2%}'.format)
	st.dataframe(display_log, width=True)
	else:
	st.write("No trades generated.")