Autopsy / data /indicators.py
ARKAISW's picture
Finalizing AUTOPSY for Hackathon submission
d64fd01
# data/indicators.py
import pandas as pd
import numpy as np
def compute_velocity(series: pd.Series, window: int = 10) -> pd.Series:
"""Rate of change over window days, as percentage."""
return series.pct_change(periods=window) * 100
def compute_zscore(series: pd.Series, window: int = 252) -> pd.Series:
"""
Rolling z-score against a trailing window.
FIX #9: Clamp std to avoid near-zero division blowups.
"""
mean = series.rolling(window=window, min_periods=window // 2).mean()
std = series.rolling(window=window, min_periods=window // 2).std()
# FIX #9: prevent near-zero std from producing huge z-scores
std = std.clip(lower=1e-8)
return (series - mean) / std
def compute_rolling_correlation(s1: pd.Series, s2: pd.Series, window: int = 21) -> pd.Series:
"""Rolling correlation between two return series."""
r1 = s1.pct_change()
r2 = s2.pct_change()
return r1.rolling(window=window, min_periods=window // 2).corr(r2)
def compute_realized_vol(series: pd.Series, window: int = 21) -> pd.Series:
"""Annualized realized volatility."""
log_returns = np.log(series / series.shift(1))
return log_returns.rolling(window=window, min_periods=window // 2).std() * np.sqrt(252) * 100
def compute_all_indicators(df: pd.DataFrame) -> pd.DataFrame:
"""
Takes raw combined DataFrame from pipeline.py.
Returns a new DataFrame with all 40 indicator columns.
"""
ind = pd.DataFrame(index=df.index)
# ── DIMENSION 1: LIQUIDITY ────────────────────────────────────────────────
if "ted_spread" in df.columns:
ind["ted_spread_level"] = df["ted_spread"]
ind["ted_spread_velocity"] = compute_velocity(df["ted_spread"], 10)
else:
ind["ted_spread_level"] = 0.0
ind["ted_spread_velocity"] = 0.0
if "hy_credit_spread" in df.columns and "ig_credit_spread" in df.columns:
ind["hy_ig_spread_ratio"] = df["hy_credit_spread"] / df["ig_credit_spread"].replace(0, np.nan)
ind["hy_spread_velocity"] = compute_velocity(df["hy_credit_spread"], 10)
ind["ig_spread_velocity"] = compute_velocity(df["ig_credit_spread"], 10)
ind["hy_spread_level"] = df["hy_credit_spread"]
ind["ig_spread_level"] = df["ig_credit_spread"]
ind["hy_spread_z_score"] = compute_zscore(df["hy_credit_spread"])
ind["ig_spread_z_score"] = compute_zscore(df["ig_credit_spread"])
else:
for col in ["hy_ig_spread_ratio", "hy_spread_velocity", "ig_spread_velocity",
"hy_spread_level", "ig_spread_level", "hy_spread_z_score", "ig_spread_z_score"]:
ind[col] = 0.0
if "yield_curve_10y2y" in df.columns:
ind["yield_curve_level"] = df["yield_curve_10y2y"]
ind["yield_curve_velocity"] = compute_velocity(df["yield_curve_10y2y"], 10)
else:
ind["yield_curve_level"] = 0.0
ind["yield_curve_velocity"] = 0.0
if "ief" in df.columns and "spy" in df.columns:
ind["bond_equity_correlation"] = compute_rolling_correlation(df["ief"], df["spy"], 21)
else:
ind["bond_equity_correlation"] = 0.0
# ── DIMENSION 2: VOLATILITY ───────────────────────────────────────────────
vix_col = "vix" if "vix" in df.columns else "vix_fred"
if vix_col in df.columns:
ind["vix_level"] = df[vix_col]
ind["vix_velocity"] = compute_velocity(df[vix_col], 5)
ind["vix_z_score"] = compute_zscore(df[vix_col])
ind["vol_of_vol"] = df[vix_col].diff().rolling(21).std()
else:
for col in ["vix_level", "vix_velocity", "vix_z_score", "vol_of_vol"]:
ind[col] = 0.0
if "vix_9d" in df.columns and "vix_3m" in df.columns:
ind["vix_term_structure"] = df["vix_9d"] / df["vix_3m"].replace(0, np.nan)
else:
ind["vix_term_structure"] = 1.0
if "spy" in df.columns:
ind["realized_vol_spy"] = compute_realized_vol(df["spy"], 21)
else:
ind["realized_vol_spy"] = 0.0
if "vix_level" in ind.columns and "realized_vol_spy" in ind.columns:
ind["implied_realized_vol_gap"] = ind["vix_level"] - ind["realized_vol_spy"]
else:
ind["implied_realized_vol_gap"] = 0.0
# FIX #5: cross_asset_vol_spike β€” use Series, not scalar fallback
vix_vel = ind["vix_velocity"] if "vix_velocity" in ind.columns else pd.Series(0.0, index=ind.index)
ind["cross_asset_vol_spike"] = np.abs(vix_vel)
# ── DIMENSION 3: CORRELATION ──────────────────────────────────────────────
if "spy" in df.columns and "ief" in df.columns:
ind["equity_bond_corr"] = compute_rolling_correlation(df["spy"], df["ief"], 21)
else:
ind["equity_bond_corr"] = 0.0
if "spy" in df.columns and "gld" in df.columns:
ind["equity_gold_corr"] = compute_rolling_correlation(df["spy"], df["gld"], 21)
else:
ind["equity_gold_corr"] = 0.0
if "spy" in df.columns and "hyg" in df.columns:
ind["equity_hy_corr"] = compute_rolling_correlation(df["spy"], df["hyg"], 21)
else:
ind["equity_hy_corr"] = 0.0
# Safe haven demand
safe_haven_components = []
for ticker in ["gld", "jpyusd", "chfusd"]:
if ticker in df.columns:
safe_haven_components.append(df[ticker].pct_change(21))
if safe_haven_components and "spy" in df.columns:
avg_safe = pd.concat(safe_haven_components, axis=1).mean(axis=1)
ind["safe_haven_demand"] = avg_safe - df["spy"].pct_change(21)
else:
ind["safe_haven_demand"] = 0.0
# Cross-sector correlation
sector_tickers = [t for t in ["xlf", "xle", "xlu", "qqq"] if t in df.columns]
if len(sector_tickers) >= 2:
sector_returns = df[sector_tickers].pct_change()
rolling_corrs = []
for i in range(len(sector_tickers)):
for j in range(i+1, len(sector_tickers)):
c = sector_returns[sector_tickers[i]].rolling(21).corr(sector_returns[sector_tickers[j]])
rolling_corrs.append(c)
ind["cross_sector_correlation"] = pd.concat(rolling_corrs, axis=1).mean(axis=1)
ind["correlation_breakdown_score"] = pd.concat(rolling_corrs, axis=1).std(axis=1)
else:
ind["cross_sector_correlation"] = 0.0
ind["correlation_breakdown_score"] = 0.0
if "spy" in df.columns and "eem" in df.columns:
ind["em_dm_divergence"] = df["spy"].pct_change(21) - df["eem"].pct_change(21)
else:
ind["em_dm_divergence"] = 0.0
if "uup" in df.columns:
ind["dollar_stress_signal"] = compute_velocity(df["uup"], 10)
else:
ind["dollar_stress_signal"] = 0.0
# ── DIMENSION 4: CREDIT ───────────────────────────────────────────────────
if "hy_credit_spread" in df.columns and "ig_credit_spread" in df.columns:
hy_ig_gap = df["hy_credit_spread"] - df["ig_credit_spread"]
ind["hy_ig_divergence_velocity"] = compute_velocity(hy_ig_gap, 10)
else:
ind["hy_ig_divergence_velocity"] = 0.0
if "lqd" in df.columns and "hyg" in df.columns:
ind["lqd_hyg_return_spread"] = df["lqd"].pct_change(21) - df["hyg"].pct_change(21)
else:
ind["lqd_hyg_return_spread"] = 0.0
if "xlf" in df.columns and "spy" in df.columns:
ind["xlf_spy_relative"] = df["xlf"].pct_change(21) - df["spy"].pct_change(21)
else:
ind["xlf_spy_relative"] = 0.0
if "hyg" in df.columns and "spy" in df.columns:
ind["credit_equity_dislocation"] = df["hyg"].pct_change(21) - df["spy"].pct_change(21)
else:
ind["credit_equity_dislocation"] = 0.0
# ── DIMENSION 5: POSITIONING / FLOWS ─────────────────────────────────────
for ticker, col_name in [("gld", "gold_flow_signal"), ("jpyusd", "jpy_flow_signal"),
("chfusd", "chf_flow_signal")]:
if ticker in df.columns:
ind[col_name] = compute_velocity(df[ticker], 10)
else:
ind[col_name] = 0.0
if "xlu" in df.columns and "spy" in df.columns:
ind["utilities_relative"] = df["xlu"].pct_change(21) - df["spy"].pct_change(21)
else:
ind["utilities_relative"] = 0.0
if "xle" in df.columns and "spy" in df.columns:
ind["energy_relative"] = df["xle"].pct_change(21) - df["spy"].pct_change(21)
else:
ind["energy_relative"] = 0.0
if "uso" in df.columns:
ind["oil_velocity"] = compute_velocity(df["uso"], 10)
else:
ind["oil_velocity"] = 0.0
if "eem" in df.columns:
ind["em_outflow_signal"] = compute_velocity(df["eem"], 10) * -1
else:
ind["em_outflow_signal"] = 0.0
# Risk-off composite
risk_off_cols = [c for c in ["gold_flow_signal", "jpy_flow_signal",
"chf_flow_signal", "utilities_relative"] if c in ind.columns]
if risk_off_cols:
ind["risk_off_composite"] = ind[risk_off_cols].mean(axis=1)
else:
ind["risk_off_composite"] = 0.0
# ── FINAL CLEANUP ─────────────────────────────────────────────────────────
ind = ind.ffill().fillna(0.0)
ind = ind.replace([np.inf, -np.inf], 0.0)
return ind
# ── The 40 canonical indicator names, grouped by dimension ──────────────────
INDICATOR_GROUPS = {
"Liquidity": [
"ted_spread_level", "ted_spread_velocity", "hy_ig_spread_ratio",
"hy_spread_velocity", "ig_spread_velocity", "yield_curve_level",
"yield_curve_velocity", "bond_equity_correlation"
],
"Volatility": [
"vix_level", "vix_velocity", "vix_term_structure", "vix_z_score",
"realized_vol_spy", "implied_realized_vol_gap", "vol_of_vol",
"cross_asset_vol_spike"
],
"Correlation": [
"equity_bond_corr", "equity_gold_corr", "equity_hy_corr",
"safe_haven_demand", "cross_sector_correlation", "em_dm_divergence",
"dollar_stress_signal", "correlation_breakdown_score"
],
"Credit": [
"hy_spread_level", "ig_spread_level", "hy_ig_divergence_velocity",
"lqd_hyg_return_spread", "xlf_spy_relative", "credit_equity_dislocation",
"ig_spread_z_score", "hy_spread_z_score"
],
"Positioning": [
"gold_flow_signal", "jpy_flow_signal", "chf_flow_signal",
"utilities_relative", "energy_relative", "oil_velocity",
"em_outflow_signal", "risk_off_composite"
]
}
ALL_INDICATORS = [ind for group in INDICATOR_GROUPS.values() for ind in group]