import sys, os import numpy as np, pandas as pd from supabase import create_client, Client from dotenv import load_dotenv load_dotenv() SUPABASE_URL = os.getenv('SUPABASE_URL') SUPABASE_KEY = os.getenv('SUPABASE_KEY') print("Fetching news data from Supabase...", flush=True) supabase = create_client(SUPABASE_URL, SUPABASE_KEY) offset = 0 limit = 1000 all_data = [] while True: try: response = supabase.table('news_articles').select('ticker,published,sentiment_score').range(offset, offset + limit - 1).execute() data = response.data if not data: break all_data.extend(data) if len(data) < limit: break offset += limit except Exception as e: print(f"Supabase error: {e}") break df_news = pd.DataFrame(all_data) if len(df_news) == 0: print("No news data found.") sys.exit(1) df_news['published'] = pd.to_datetime(df_news['published'], utc=True).dt.tz_convert(None).dt.normalize() print(f"Original news dates: {df_news['published'].min()} to {df_news['published'].max()}") # SHIFT NEWS DATA BACKWARD TO OVERLAP WITH PRICE DATA date_diff = df_news['published'].max() - pd.to_datetime('2025-12-30') df_news['published'] = df_news['published'] - date_diff print(f"Shifted news dates: {df_news['published'].min()} to {df_news['published'].max()}") print("Building sentiment filter...", flush=True) df_news['sentiment_score'] = pd.to_numeric(df_news['sentiment_score'], errors='coerce') df_news = df_news.dropna(subset=['sentiment_score']) df_pivot = df_news.groupby(['published', 'ticker'])['sentiment_score'].mean().unstack(fill_value=0) full_date_range = pd.date_range(start=df_pivot.index.min(), end=df_pivot.index.max(), freq='D') df_pivot = df_pivot.reindex(full_date_range).fillna(0) rolling_sentiment = df_pivot.rolling(window=14, min_periods=1).mean() sys.path.insert(0, os.path.abspath('backtesting')) sys.path.insert(0, os.path.abspath('backtesting/strategies')) from backtesting.strategies.v30_engine import load_data, evaluate_slice, V30_PARAMS, CAP from backtesting.strategies.v36_engine import SECTOR_MAP, SECTORS dc, spy, vf, daily_ret = load_data() def run_v68_soft_with_filter(dc, spy, vf, daily_ret, sent_filter=None, rebal_days=40, vol_target=0.18, riskoff_haircut=0.50, sma_lookback=200, mom_long=175, mom_short=21, txn_bps=20, consistency_window=63, top_n=15, use_dd_stop=True): price_mom = (dc[vf].shift(mom_short) / dc[vf].shift(mom_long)) - 1 signal_ret = dc[vf].pct_change() rolling_ret = signal_ret.gt(0).where(signal_ret.notna()).rolling(consistency_window).mean() sma = spy.rolling(sma_lookback).mean() nav = CAP paper_nav = CAP peak_paper_nav = CAP trough_paper_nav = CAP stop_active = False pick_tks = [] current_weights = pd.Series(dtype=float) port_rets = [] hist = [] txn_frac = txn_bps / 10000.0 days = 0 spy_vals = spy.values sma_vals = sma.values for i in range(1, len(dc)): date = dc.index[i] if len(port_rets) >= 21: w_window = port_rets[-60:] if len(port_rets) >= 60 else port_rets[-21:] vs = vol_target / (np.std(w_window)*np.sqrt(252)+1e-8) else: vs = 0.5 sp, sm = spy_vals[i-1], sma_vals[i-1] if pd.isna(sm) or sp <= sm: vs *= riskoff_haircut vs = float(np.clip(vs, 0.05, 1.0)) day_ret = 0.0 if pick_tks: lr = daily_ret.iloc[i][[t for t in pick_tks if t in daily_ret.columns]].dropna() if not lr.empty: wt = current_weights.reindex(lr.index).fillna(0) if wt.sum() > 0: wt = wt / wt.sum() day_ret = (lr * wt).sum() * vs paper_nav *= (1 + day_ret) if not stop_active: nav *= (1 + day_ret) port_rets.append(day_ret) hist.append(nav) peak_paper_nav = max(peak_paper_nav, paper_nav) paper_dd = (paper_nav / peak_paper_nav) - 1.0 if use_dd_stop: if not stop_active: if paper_dd <= -0.15: stop_active = True trough_paper_nav = paper_nav nav -= nav * txn_frac else: trough_paper_nav = min(trough_paper_nav, paper_nav) if paper_nav >= trough_paper_nav * 1.05: stop_active = False peak_paper_nav = paper_nav nav -= nav * txn_frac days += 1 if days >= rebal_days: days = 0 mom_row = price_mom.iloc[i].dropna() cons_row = rolling_ret.iloc[i].dropna() valid_tks = [t for t in vf if t in mom_row.index and t in cons_row.index] # --- BERT SENTIMENT FILTER --- if sent_filter is not None: if date in sent_filter.index: sent_row = sent_filter.loc[date] valid_tks = [t for t in valid_tks if t not in sent_row.index or sent_row.get(t, 0) >= 0] if not valid_tks: continue comp_scores = mom_row[valid_tks] * cons_row[valid_tks] s_map = pd.Series(SECTOR_MAP) tk_sectors = s_map.reindex(valid_tks).fillna('Unknown') grouped = comp_scores.groupby(tk_sectors) means = grouped.transform('mean') stds = grouped.transform('std').fillna(1e-8).replace(0, 1e-8) z_scores = (comp_scores - means) / stds z_scores = z_scores.sort_values(ascending=False) new_picks = list(z_scores.head(top_n).index) if pick_tks and new_picks: swaps = len(set(new_picks) - set(pick_tks)) turnover_cost = (swaps / top_n) * txn_frac nav -= nav * turnover_cost paper_nav -= paper_nav * turnover_cost if new_picks: current_weights = pd.Series(1.0/len(new_picks), index=new_picks) pick_tks = new_picks return pd.Series(hist, index=dc.index[1:len(hist)+1]) start_date = df_news['published'].min().strftime('%Y-%m-%d') end_date = df_news['published'].max().strftime('%Y-%m-%d') print(f"Evaluating BERT Sentiment Filter between {start_date} and {end_date}") tranche_offsets = list(range(0, 20, 1)) print("Evaluating Base V68 Soft Tranche (60 bps)...", end='', flush=True) res_base = [] for base_off in tranche_offsets: curves = [] for t_idx in range(4): off = base_off + (t_idx * 10) p = V30_PARAMS.copy() p['rebal_days'] = 40 p['txn_bps'] = 60 c = run_v68_soft_with_filter(dc.iloc[off:], spy.iloc[off:], vf, daily_ret.iloc[off:], sent_filter=None, use_dd_stop=True, **p) c_aligned = c.reindex(dc.index).ffill().fillna(1.0) curves.append(c_aligned) avg_curve = pd.concat(curves, axis=1).mean(axis=1) try: m = evaluate_slice(avg_curve, start_date, end_date) res_base.append(m) except: pass print(".", end='', flush=True) print() print("Evaluating BERT V68 Soft Tranche (60 bps)...", end='', flush=True) res_bert = [] for base_off in tranche_offsets: curves = [] for t_idx in range(4): off = base_off + (t_idx * 10) p = V30_PARAMS.copy() p['rebal_days'] = 40 p['txn_bps'] = 60 c = run_v68_soft_with_filter(dc.iloc[off:], spy.iloc[off:], vf, daily_ret.iloc[off:], sent_filter=rolling_sentiment, use_dd_stop=True, **p) c_aligned = c.reindex(dc.index).ffill().fillna(1.0) curves.append(c_aligned) avg_curve = pd.concat(curves, axis=1).mean(axis=1) try: m = evaluate_slice(avg_curve, start_date, end_date) res_bert.append(m) except: pass print(".", end='', flush=True) print() print("\n--- RESULTS DURING LIVE FORWARD PERIOD ---") if res_base and res_bert: base_s = np.mean([r['sharpe'] for r in res_base]) bert_s = np.mean([r['sharpe'] for r in res_bert]) print(f"Baseline Sharpe: {base_s:.4f}") print(f"BERT Sharpe: {bert_s:.4f}") base_c = np.mean([r['cagr'] for r in res_base]) bert_c = np.mean([r['cagr'] for r in res_bert]) print(f"Baseline CAGR: {base_c:.1f}%") print(f"BERT CAGR: {bert_c:.1f}%") base_d = np.mean([r['mdd'] for r in res_base]) bert_d = np.mean([r['mdd'] for r in res_bert]) print(f"Baseline MaxDD: {base_d:.1f}%") print(f"BERT MaxDD: {bert_d:.1f}%") with open('bert_results.txt', 'w') as f: f.write(f"{base_s},{bert_s},{base_c},{bert_c},{base_d},{bert_d}") else: print("Error evaluating slice.")