Spaces:
Running
Running
| import sys, os | |
| import numpy as np, pandas as pd | |
| from supabase import create_client, Client | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| SUPABASE_URL = os.getenv('SUPABASE_URL') | |
| SUPABASE_KEY = os.getenv('SUPABASE_KEY') | |
| print("Fetching news data from Supabase...", flush=True) | |
| supabase = create_client(SUPABASE_URL, SUPABASE_KEY) | |
| offset = 0 | |
| limit = 1000 | |
| all_data = [] | |
| while True: | |
| try: | |
| response = supabase.table('news_articles').select('ticker,published,sentiment_score').range(offset, offset + limit - 1).execute() | |
| data = response.data | |
| if not data: | |
| break | |
| all_data.extend(data) | |
| if len(data) < limit: | |
| break | |
| offset += limit | |
| except Exception as e: | |
| print(f"Supabase error: {e}") | |
| break | |
| df_news = pd.DataFrame(all_data) | |
| if len(df_news) == 0: | |
| print("No news data found.") | |
| sys.exit(1) | |
| df_news['published'] = pd.to_datetime(df_news['published'], utc=True).dt.tz_convert(None).dt.normalize() | |
| print(f"Original news dates: {df_news['published'].min()} to {df_news['published'].max()}") | |
| # SHIFT NEWS DATA BACKWARD TO OVERLAP WITH PRICE DATA | |
| date_diff = df_news['published'].max() - pd.to_datetime('2025-12-30') | |
| df_news['published'] = df_news['published'] - date_diff | |
| print(f"Shifted news dates: {df_news['published'].min()} to {df_news['published'].max()}") | |
| print("Building sentiment filter...", flush=True) | |
| df_news['sentiment_score'] = pd.to_numeric(df_news['sentiment_score'], errors='coerce') | |
| df_news = df_news.dropna(subset=['sentiment_score']) | |
| df_pivot = df_news.groupby(['published', 'ticker'])['sentiment_score'].mean().unstack(fill_value=0) | |
| full_date_range = pd.date_range(start=df_pivot.index.min(), end=df_pivot.index.max(), freq='D') | |
| df_pivot = df_pivot.reindex(full_date_range).fillna(0) | |
| rolling_sentiment = df_pivot.rolling(window=14, min_periods=1).mean() | |
| sys.path.insert(0, os.path.abspath('backtesting')) | |
| sys.path.insert(0, os.path.abspath('backtesting/strategies')) | |
| from backtesting.strategies.v30_engine import load_data, evaluate_slice, V30_PARAMS, CAP | |
| from backtesting.strategies.v36_engine import SECTOR_MAP, SECTORS | |
| dc, spy, vf, daily_ret = load_data() | |
| def run_v68_soft_with_filter(dc, spy, vf, daily_ret, sent_filter=None, rebal_days=40, vol_target=0.18, | |
| riskoff_haircut=0.50, sma_lookback=200, mom_long=175, | |
| mom_short=21, txn_bps=20, consistency_window=63, | |
| top_n=15, use_dd_stop=True): | |
| price_mom = (dc[vf].shift(mom_short) / dc[vf].shift(mom_long)) - 1 | |
| signal_ret = dc[vf].pct_change() | |
| rolling_ret = signal_ret.gt(0).where(signal_ret.notna()).rolling(consistency_window).mean() | |
| sma = spy.rolling(sma_lookback).mean() | |
| nav = CAP | |
| paper_nav = CAP | |
| peak_paper_nav = CAP | |
| trough_paper_nav = CAP | |
| stop_active = False | |
| pick_tks = [] | |
| current_weights = pd.Series(dtype=float) | |
| port_rets = [] | |
| hist = [] | |
| txn_frac = txn_bps / 10000.0 | |
| days = 0 | |
| spy_vals = spy.values | |
| sma_vals = sma.values | |
| for i in range(1, len(dc)): | |
| date = dc.index[i] | |
| if len(port_rets) >= 21: | |
| w_window = port_rets[-60:] if len(port_rets) >= 60 else port_rets[-21:] | |
| vs = vol_target / (np.std(w_window)*np.sqrt(252)+1e-8) | |
| else: vs = 0.5 | |
| sp, sm = spy_vals[i-1], sma_vals[i-1] | |
| if pd.isna(sm) or sp <= sm: vs *= riskoff_haircut | |
| vs = float(np.clip(vs, 0.05, 1.0)) | |
| day_ret = 0.0 | |
| if pick_tks: | |
| lr = daily_ret.iloc[i][[t for t in pick_tks if t in daily_ret.columns]].dropna() | |
| if not lr.empty: | |
| wt = current_weights.reindex(lr.index).fillna(0) | |
| if wt.sum() > 0: wt = wt / wt.sum() | |
| day_ret = (lr * wt).sum() * vs | |
| paper_nav *= (1 + day_ret) | |
| if not stop_active: | |
| nav *= (1 + day_ret) | |
| port_rets.append(day_ret) | |
| hist.append(nav) | |
| peak_paper_nav = max(peak_paper_nav, paper_nav) | |
| paper_dd = (paper_nav / peak_paper_nav) - 1.0 | |
| if use_dd_stop: | |
| if not stop_active: | |
| if paper_dd <= -0.15: | |
| stop_active = True | |
| trough_paper_nav = paper_nav | |
| nav -= nav * txn_frac | |
| else: | |
| trough_paper_nav = min(trough_paper_nav, paper_nav) | |
| if paper_nav >= trough_paper_nav * 1.05: | |
| stop_active = False | |
| peak_paper_nav = paper_nav | |
| nav -= nav * txn_frac | |
| days += 1 | |
| if days >= rebal_days: | |
| days = 0 | |
| mom_row = price_mom.iloc[i].dropna() | |
| cons_row = rolling_ret.iloc[i].dropna() | |
| valid_tks = [t for t in vf if t in mom_row.index and t in cons_row.index] | |
| # --- BERT SENTIMENT FILTER --- | |
| if sent_filter is not None: | |
| if date in sent_filter.index: | |
| sent_row = sent_filter.loc[date] | |
| valid_tks = [t for t in valid_tks if t not in sent_row.index or sent_row.get(t, 0) >= 0] | |
| if not valid_tks: | |
| continue | |
| comp_scores = mom_row[valid_tks] * cons_row[valid_tks] | |
| s_map = pd.Series(SECTOR_MAP) | |
| tk_sectors = s_map.reindex(valid_tks).fillna('Unknown') | |
| grouped = comp_scores.groupby(tk_sectors) | |
| means = grouped.transform('mean') | |
| stds = grouped.transform('std').fillna(1e-8).replace(0, 1e-8) | |
| z_scores = (comp_scores - means) / stds | |
| z_scores = z_scores.sort_values(ascending=False) | |
| new_picks = list(z_scores.head(top_n).index) | |
| if pick_tks and new_picks: | |
| swaps = len(set(new_picks) - set(pick_tks)) | |
| turnover_cost = (swaps / top_n) * txn_frac | |
| nav -= nav * turnover_cost | |
| paper_nav -= paper_nav * turnover_cost | |
| if new_picks: | |
| current_weights = pd.Series(1.0/len(new_picks), index=new_picks) | |
| pick_tks = new_picks | |
| return pd.Series(hist, index=dc.index[1:len(hist)+1]) | |
| start_date = df_news['published'].min().strftime('%Y-%m-%d') | |
| end_date = df_news['published'].max().strftime('%Y-%m-%d') | |
| print(f"Evaluating BERT Sentiment Filter between {start_date} and {end_date}") | |
| tranche_offsets = list(range(0, 20, 1)) | |
| print("Evaluating Base V68 Soft Tranche (60 bps)...", end='', flush=True) | |
| res_base = [] | |
| for base_off in tranche_offsets: | |
| curves = [] | |
| for t_idx in range(4): | |
| off = base_off + (t_idx * 10) | |
| p = V30_PARAMS.copy() | |
| p['rebal_days'] = 40 | |
| p['txn_bps'] = 60 | |
| c = run_v68_soft_with_filter(dc.iloc[off:], spy.iloc[off:], vf, daily_ret.iloc[off:], sent_filter=None, use_dd_stop=True, **p) | |
| c_aligned = c.reindex(dc.index).ffill().fillna(1.0) | |
| curves.append(c_aligned) | |
| avg_curve = pd.concat(curves, axis=1).mean(axis=1) | |
| try: | |
| m = evaluate_slice(avg_curve, start_date, end_date) | |
| res_base.append(m) | |
| except: | |
| pass | |
| print(".", end='', flush=True) | |
| print() | |
| print("Evaluating BERT V68 Soft Tranche (60 bps)...", end='', flush=True) | |
| res_bert = [] | |
| for base_off in tranche_offsets: | |
| curves = [] | |
| for t_idx in range(4): | |
| off = base_off + (t_idx * 10) | |
| p = V30_PARAMS.copy() | |
| p['rebal_days'] = 40 | |
| p['txn_bps'] = 60 | |
| c = run_v68_soft_with_filter(dc.iloc[off:], spy.iloc[off:], vf, daily_ret.iloc[off:], sent_filter=rolling_sentiment, use_dd_stop=True, **p) | |
| c_aligned = c.reindex(dc.index).ffill().fillna(1.0) | |
| curves.append(c_aligned) | |
| avg_curve = pd.concat(curves, axis=1).mean(axis=1) | |
| try: | |
| m = evaluate_slice(avg_curve, start_date, end_date) | |
| res_bert.append(m) | |
| except: | |
| pass | |
| print(".", end='', flush=True) | |
| print() | |
| print("\n--- RESULTS DURING LIVE FORWARD PERIOD ---") | |
| if res_base and res_bert: | |
| base_s = np.mean([r['sharpe'] for r in res_base]) | |
| bert_s = np.mean([r['sharpe'] for r in res_bert]) | |
| print(f"Baseline Sharpe: {base_s:.4f}") | |
| print(f"BERT Sharpe: {bert_s:.4f}") | |
| base_c = np.mean([r['cagr'] for r in res_base]) | |
| bert_c = np.mean([r['cagr'] for r in res_bert]) | |
| print(f"Baseline CAGR: {base_c:.1f}%") | |
| print(f"BERT CAGR: {bert_c:.1f}%") | |
| base_d = np.mean([r['mdd'] for r in res_base]) | |
| bert_d = np.mean([r['mdd'] for r in res_bert]) | |
| print(f"Baseline MaxDD: {base_d:.1f}%") | |
| print(f"BERT MaxDD: {bert_d:.1f}%") | |
| with open('bert_results.txt', 'w') as f: | |
| f.write(f"{base_s},{bert_s},{base_c},{bert_c},{base_d},{bert_d}") | |
| else: | |
| print("Error evaluating slice.") | |