File size: 15,316 Bytes
b1f38ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
import yfinance as yf
import pandas as pd
import numpy as np
from hmmlearn.hmm import GaussianHMM
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import joblib
import os

def fetch_data(ticker, start_date, end_date):
    # FIXED: Added auto_adjust=True to clean up data splits/dividends automatically
    df = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=True)
    
    if df.empty: return None
    
    # FIXED: More robust MultiIndex handling
    if isinstance(df.columns, pd.MultiIndex):
        # Check if the first level contains 'Close' (standard format is Price, Ticker)
        if 'Close' in df.columns.get_level_values(0):
            df.columns = df.columns.get_level_values(0)
        else:
            # Fallback for (Ticker, Price) format
            df.columns = df.columns.get_level_values(1)
            
    return df.dropna()

def generate_trade_log(df):
    """
    Scans the backtest dataframe to identify individual trade cycles.
    Includes leverage information.
    """
    trades = []
    in_trade = False
    entry_date = None
    entry_price = 0
    trade_returns = []
    avg_leverage = []
    
    for date, row in df.iterrows():
        pos = row['Final_Position']
        close_price = row['Close']
        lev = row['Position_Size']
        
        # Check for Entry
        if pos > 0 and not in_trade:
            in_trade = True
            entry_date = date
            entry_price = close_price
            trade_returns = [row['Strategy_Returns']]
            avg_leverage = [lev]
            
        # Check for adjustments while in trade
        elif pos > 0 and in_trade:
            trade_returns.append(row['Strategy_Returns'])
            avg_leverage.append(lev)
            
        # Check for Exit
        elif pos == 0 and in_trade:
            in_trade = False
            exit_date = date
            exit_price = close_price
            
            # Calculate compounded return
            cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
            mean_lev = np.mean(avg_leverage)
            
            trades.append({
                'entry_date': entry_date,
                'exit_date': exit_date,
                'entry_price': entry_price,
                'exit_price': exit_price,
                'duration_days': len(trade_returns),
                'avg_leverage': mean_lev,
                'trade_pnl': cum_trade_ret,
                'trade_pnl_percent': cum_trade_ret * 100
            })
            trade_returns = []
            avg_leverage = []

    # Handle Open Trade
    if in_trade:
        cum_trade_ret = np.prod([1 + r for r in trade_returns]) - 1
        mean_lev = np.mean(avg_leverage)
        trades.append({
            'entry_date': entry_date,
            'exit_date': df.index[-1],
            'entry_price': entry_price,
            'exit_price': df.iloc[-1]['Close'],
            'duration_days': len(trade_returns),
            'avg_leverage': mean_lev,
            'trade_pnl': cum_trade_ret,
            'trade_pnl_percent': cum_trade_ret * 100
        })

    return trades


def train_hmm_model(train_df, n_states=3):
    """
    Trains HMM on historical data and sorts states by volatility.
    State 0 = Lowest Volatility (Safe)
    State N-1 = Highest Volatility (Crash)
    """
    X_train = train_df[['Log_Returns', 'Volatility']].values * 100
    
    model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100, random_state=42)
    model.fit(X_train)
    
    # Calculate average volatility per state
    hidden_states = model.predict(X_train)
    state_vol = []
    for i in range(n_states):
        avg_vol = X_train[hidden_states == i, 1].mean()
        state_vol.append((i, avg_vol))
    
    # Sort states by volatility: State 0 = Lowest, State N-1 = Highest
    state_vol.sort(key=lambda x: x[1])
    mapping = {old: new for new, (old, _) in enumerate(state_vol)}
    
    return model, mapping


def train_svr_model(train_df):
    """
    Trains SVR to predict next day's volatility.
    """
    feature_cols = ['Log_Returns', 'Volatility', 'Downside_Vol', 'Regime']
    target_col = 'Target_Next_Vol'
    
    X = train_df[feature_cols].values
    y = train_df[target_col].values
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.01)
    model.fit(X_scaled, y)
    
    return model, scaler

def train_models_and_backtest(ticker, start_date, end_date, short_window, long_window, n_states):
    """
    HMM-SVR Honest Leverage Strategy (Walk-Forward):
    Uses strict walk-forward simulation to eliminate lookahead bias.
    Each prediction uses only data available up to that point in time.
    """
    # 1. Fetch Data (extended training period)
    train_start = pd.Timestamp(start_date) - pd.DateOffset(years=4) 
    df = fetch_data(ticker, train_start, end_date)
    
    if df is None or len(df) < 200:
        return {"error": "Not enough data"}

    # Feature Engineering
    df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
    df['Volatility'] = df['Log_Returns'].rolling(window=10).std()
    
    # Downside volatility (std of negative returns only)
    df['Downside_Returns'] = df['Log_Returns'].apply(lambda x: x if x < 0 else 0)
    df['Downside_Vol'] = df['Downside_Returns'].rolling(10).std()
    
    # SVR target: next day's volatility
    df['Target_Next_Vol'] = df['Volatility'].shift(-1)
    df = df.dropna()

    # Split Data
    train_df = df[df.index < pd.Timestamp(start_date)].copy()
    test_df = df[df.index >= pd.Timestamp(start_date)].copy()

    if len(train_df) < 365 or len(test_df) < 10: 
        return {"error": "Data split error. Adjust dates."}

    print(f"βœ… Training on {len(train_df)} days, Testing on {len(test_df)} days")

    # 2. Train HMM (sorted by volatility)
    print("πŸ”„ Training HMM on historical data...")
    hmm_model, state_mapping = train_hmm_model(train_df, n_states=n_states)
    
    # Predict regimes on train and remap
    X_train = train_df[['Log_Returns', 'Volatility']].values * 100
    train_regimes = hmm_model.predict(X_train)
    train_df['Regime'] = [state_mapping[s] for s in train_regimes]
    
    # Calculate average training volatility for risk ratio
    avg_train_vol = train_df['Volatility'].mean()
    
    # 3. Train SVR
    print("πŸ”„ Training SVR for volatility prediction...")
    svr_model, svr_scaler = train_svr_model(train_df)
    
    # Save models for live trading
    model_data = {
        'hmm_model': hmm_model,
        'svr_model': svr_model,
        'svr_scaler': svr_scaler,
        'state_mapping': state_mapping,
        'avg_train_vol': avg_train_vol,
        'n_states': n_states,
        'trained_at': datetime.now().isoformat()
    }
    model_path = os.path.join(os.path.dirname(__file__), 'hmm_model.pkl')
    joblib.dump(model_data, model_path)
    print(f"βœ… HMM-SVR Model saved to {model_path}")
    print(f"   States: 0=Low Vol, {n_states-1}=High Vol (Crash)")
    print(f"   Avg training volatility: {avg_train_vol:.6f}")
    
    # --- HONEST WALK-FORWARD BACKTEST ---
    print("\nπŸ”„ Running Walk-Forward Simulation (No Lookahead Bias)...")
    
    # Prepare containers for honest predictions
    honest_regimes = []
    honest_predicted_vols = []
    honest_ema_short = []
    honest_ema_long = []
    
    # Concatenate for sliding window access
    all_data = pd.concat([train_df, test_df])
    start_idx = len(train_df)
    total_steps = len(test_df)
    lookback_window = 252  # 1 year lookback for regime detection
    
    # Walk forward one day at a time
    for i in range(total_steps):
        # Progress indicator
        if i % 50 == 0 and i > 0:
            print(f"   Processing day {i}/{total_steps}...")
        
        # Current position in full dataset
        curr_pointer = start_idx + i
        window_start = max(0, curr_pointer - lookback_window)
        
        # Slice history up to current day (inclusive)
        history_slice = all_data.iloc[window_start : curr_pointer + 1]
        
        # A. Honest Regime Detection (uses only history)
        X_slice = history_slice[['Log_Returns', 'Volatility']].values * 100
        try:
            hidden_states_slice = hmm_model.predict(X_slice)
            current_state_raw = hidden_states_slice[-1]
            current_state = state_mapping.get(current_state_raw, current_state_raw)
        except:
            current_state = 1  # Fallback to neutral
        
        honest_regimes.append(current_state)
        
        # B. Honest Volatility Prediction (uses today's data to predict tomorrow)
        row = test_df.iloc[i]
        svr_features = np.array([[
            row['Log_Returns'], 
            row['Volatility'], 
            row['Downside_Vol'], 
            current_state
        ]])
        
        svr_feat_scaled = svr_scaler.transform(svr_features)
        pred_vol = svr_model.predict(svr_feat_scaled)[0]
        honest_predicted_vols.append(pred_vol)
        
        # C. Honest EMA Calculation (uses only history)
        ema_short_val = history_slice['Close'].ewm(span=short_window).mean().iloc[-1]
        ema_long_val = history_slice['Close'].ewm(span=long_window).mean().iloc[-1]
        honest_ema_short.append(ema_short_val)
        honest_ema_long.append(ema_long_val)
    
    print(f"βœ… Walk-forward simulation complete!")
    
    # Assign honest predictions to test dataframe
    test_df['Regime'] = honest_regimes
    test_df['Predicted_Vol'] = honest_predicted_vols
    test_df['EMA_Short'] = honest_ema_short
    test_df['EMA_Long'] = honest_ema_long
    
    # 4. Generate trading signals (EMA crossover)
    test_df['Signal'] = np.where(test_df['EMA_Short'] > test_df['EMA_Long'], 1, 0)
    test_df['Risk_Ratio'] = test_df['Predicted_Vol'] / avg_train_vol
    
    # 5. Calculate leverage based on regime and risk
    test_df['Position_Size'] = 1.0  # Default
    
    # Boost: 3x in certainty (low vol + low risk)
    cond_safe = (test_df['Regime'] == 0)
    cond_low_risk = (test_df['Risk_Ratio'] < 0.5)
    test_df.loc[cond_safe & cond_low_risk, 'Position_Size'] = 3.0
    
    # Cut: 0x in crash regime
    cond_crash = (test_df['Regime'] == (n_states - 1))
    test_df.loc[cond_crash, 'Position_Size'] = 0.0
    
    # 6. Calculate final position (signal today decides position tomorrow)
    test_df['Final_Position'] = (test_df['Signal'] * test_df['Position_Size']).shift(1)
    
    # 7. Returns Calculation
    test_df['Simple_Returns'] = test_df['Close'].pct_change()
    test_df['Strategy_Returns'] = test_df['Final_Position'] * test_df['Simple_Returns']
    
    # --- CUMULATIVE CURVES ---
    test_df['Strategy_Equity'] = (1 + test_df['Strategy_Returns'].fillna(0)).cumprod()
    test_df['BuyHold_Equity'] = (1 + test_df['Simple_Returns'].fillna(0)).cumprod()
    test_df.dropna(inplace=True)

    # 8. Generate Trade Log with leverage info
    trades_list = generate_trade_log(test_df)
    
    trades_data = []
    for trade in trades_list:
        trades_data.append({
            'entry_date': trade['entry_date'].strftime('%Y-%m-%d'),
            'exit_date': trade['exit_date'].strftime('%Y-%m-%d'),
            'entry_price': float(trade['entry_price']),
            'exit_price': float(trade['exit_price']),
            'duration_days': int(trade['duration_days']),
            'avg_leverage': float(trade['avg_leverage']),
            'trade_pnl': float(trade['trade_pnl']),
            'trade_pnl_percent': float(trade['trade_pnl_percent']),
            'regime': int(test_df.loc[trade['entry_date'], 'Regime']) if trade['entry_date'] in test_df.index else 0
        })

    # 9. JSON Response - Chart Data
    chart_data = []
    for date, row in test_df.iterrows():
        chart_data.append({
            'date': date.strftime('%Y-%m-%d'),
            'strategy': float(row['Strategy_Equity']),
            'buy_hold': float(row['BuyHold_Equity']),
            'regime': int(row['Regime']),
            'leverage': float(row['Position_Size'])
        })

    # 10. Calculate Advanced Metrics
    strat_total = test_df['Strategy_Equity'].iloc[-1] - 1
    bh_total = test_df['BuyHold_Equity'].iloc[-1] - 1
    
    strategy_returns = test_df['Strategy_Returns'].dropna()
    sharpe = (strategy_returns.mean() / strategy_returns.std()) * np.sqrt(252) if strategy_returns.std() != 0 else 0
    
    # Max Drawdown
    rolling_max_strategy = test_df['Strategy_Equity'].cummax()
    drawdown_strategy = (test_df['Strategy_Equity'] - rolling_max_strategy) / rolling_max_strategy
    max_drawdown_strategy = drawdown_strategy.min()
    
    rolling_max_bh = test_df['BuyHold_Equity'].cummax()
    drawdown_bh = (test_df['BuyHold_Equity'] - rolling_max_bh) / rolling_max_bh
    max_drawdown_bh = drawdown_bh.min()
    
    # Win Rate Calculation
    closed_trades = [t for t in trades_list if t['exit_date'] != test_df.index[-1]]
    winning_closed_trades = [t for t in closed_trades if t['trade_pnl'] > 0]
    
    win_rate = (len(winning_closed_trades) / len(closed_trades) * 100) if closed_trades else 0
    
    # Sortino Ratio
    negative_returns = strategy_returns[strategy_returns < 0]
    downside_std = negative_returns.std() if len(negative_returns) > 0 else 0
    sortino = (strategy_returns.mean() / downside_std) * np.sqrt(252) if downside_std != 0 else 0
    
    # Profit Factor (All trades including open ones)
    winning_trades = [t for t in trades_list if t['trade_pnl'] > 0]
    losing_trades = [t for t in trades_list if t['trade_pnl'] < 0]
    
    total_wins = sum([t['trade_pnl'] for t in winning_trades]) if winning_trades else 0
    total_losses = abs(sum([t['trade_pnl'] for t in losing_trades])) if losing_trades else 0
    profit_factor = total_wins / total_losses if total_losses != 0 else (float('inf') if total_wins > 0 else 0)
    
    # Risk Reward
    avg_win = (total_wins / len(winning_trades)) if winning_trades else 0
    avg_loss = (total_losses / len(losing_trades)) if losing_trades else 0
    risk_reward = avg_win / avg_loss if avg_loss != 0 else 0
    
    recovery_factor = abs(strat_total / max_drawdown_strategy) if max_drawdown_strategy != 0 else 0
    
    # Average leverage used
    avg_leverage_used = test_df[test_df['Final_Position'] > 0]['Position_Size'].mean() if (test_df['Final_Position'] > 0).any() else 0

    return {
        "metrics": {
            "strategy_return": f"{strat_total:.2%}",
            "buy_hold_return": f"{bh_total:.2%}",
            "final_value": f"${test_df['Strategy_Equity'].iloc[-1] * 10000:.2f}",
            "sharpe_ratio": f"{sharpe:.2f}",
            "sortino_ratio": f"{sortino:.2f}",
            "max_drawdown": f"{max_drawdown_strategy:.2%}",
            "max_drawdown_bh": f"{max_drawdown_bh:.2%}",
            "profit_factor": f"{profit_factor:.2f}" if profit_factor != float('inf') else "∞",
            "risk_reward": f"{risk_reward:.2f}",
            "recovery_factor": f"{recovery_factor:.2f}",
            "win_rate": f"{win_rate:.1f}%",
            "avg_leverage": f"{avg_leverage_used:.2f}x",
            "total_trades": len(trades_data)
        },
        "chart_data": chart_data,
        "trades": trades_data
    }