Spaces:

ARKAISW
/

Autopsy

Sleeping

App Files Files Community

Autopsy / data /indicators.py

ARKAISW

Finalizing AUTOPSY for Hackathon submission

d64fd01 27 days ago

raw

history blame contribute delete

11 kB

	# data/indicators.py

	import pandas as pd
	import numpy as np


	def compute_velocity(series: pd.Series, window: int = 10) -> pd.Series:
	"""Rate of change over window days, as percentage."""
	return series.pct_change(periods=window) * 100


	def compute_zscore(series: pd.Series, window: int = 252) -> pd.Series:
	"""
	Rolling z-score against a trailing window.
	FIX #9: Clamp std to avoid near-zero division blowups.
	"""
	mean = series.rolling(window=window, min_periods=window // 2).mean()
	std = series.rolling(window=window, min_periods=window // 2).std()
	# FIX #9: prevent near-zero std from producing huge z-scores
	std = std.clip(lower=1e-8)
	return (series - mean) / std


	def compute_rolling_correlation(s1: pd.Series, s2: pd.Series, window: int = 21) -> pd.Series:
	"""Rolling correlation between two return series."""
	r1 = s1.pct_change()
	r2 = s2.pct_change()
	return r1.rolling(window=window, min_periods=window // 2).corr(r2)


	def compute_realized_vol(series: pd.Series, window: int = 21) -> pd.Series:
	"""Annualized realized volatility."""
	log_returns = np.log(series / series.shift(1))
	return log_returns.rolling(window=window, min_periods=window // 2).std() * np.sqrt(252) * 100


	def compute_all_indicators(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Takes raw combined DataFrame from pipeline.py.
	Returns a new DataFrame with all 40 indicator columns.
	"""
	ind = pd.DataFrame(index=df.index)

	# ── DIMENSION 1: LIQUIDITY ────────────────────────────────────────────────
	if "ted_spread" in df.columns:
	ind["ted_spread_level"] = df["ted_spread"]
	ind["ted_spread_velocity"] = compute_velocity(df["ted_spread"], 10)
	else:
	ind["ted_spread_level"] = 0.0
	ind["ted_spread_velocity"] = 0.0

	if "hy_credit_spread" in df.columns and "ig_credit_spread" in df.columns:
	ind["hy_ig_spread_ratio"] = df["hy_credit_spread"] / df["ig_credit_spread"].replace(0, np.nan)
	ind["hy_spread_velocity"] = compute_velocity(df["hy_credit_spread"], 10)
	ind["ig_spread_velocity"] = compute_velocity(df["ig_credit_spread"], 10)
	ind["hy_spread_level"] = df["hy_credit_spread"]
	ind["ig_spread_level"] = df["ig_credit_spread"]
	ind["hy_spread_z_score"] = compute_zscore(df["hy_credit_spread"])
	ind["ig_spread_z_score"] = compute_zscore(df["ig_credit_spread"])
	else:
	for col in ["hy_ig_spread_ratio", "hy_spread_velocity", "ig_spread_velocity",
	"hy_spread_level", "ig_spread_level", "hy_spread_z_score", "ig_spread_z_score"]:
	ind[col] = 0.0

	if "yield_curve_10y2y" in df.columns:
	ind["yield_curve_level"] = df["yield_curve_10y2y"]
	ind["yield_curve_velocity"] = compute_velocity(df["yield_curve_10y2y"], 10)
	else:
	ind["yield_curve_level"] = 0.0
	ind["yield_curve_velocity"] = 0.0

	if "ief" in df.columns and "spy" in df.columns:
	ind["bond_equity_correlation"] = compute_rolling_correlation(df["ief"], df["spy"], 21)
	else:
	ind["bond_equity_correlation"] = 0.0

	# ── DIMENSION 2: VOLATILITY ───────────────────────────────────────────────
	vix_col = "vix" if "vix" in df.columns else "vix_fred"
	if vix_col in df.columns:
	ind["vix_level"] = df[vix_col]
	ind["vix_velocity"] = compute_velocity(df[vix_col], 5)
	ind["vix_z_score"] = compute_zscore(df[vix_col])
	ind["vol_of_vol"] = df[vix_col].diff().rolling(21).std()
	else:
	for col in ["vix_level", "vix_velocity", "vix_z_score", "vol_of_vol"]:
	ind[col] = 0.0

	if "vix_9d" in df.columns and "vix_3m" in df.columns:
	ind["vix_term_structure"] = df["vix_9d"] / df["vix_3m"].replace(0, np.nan)
	else:
	ind["vix_term_structure"] = 1.0

	if "spy" in df.columns:
	ind["realized_vol_spy"] = compute_realized_vol(df["spy"], 21)
	else:
	ind["realized_vol_spy"] = 0.0

	if "vix_level" in ind.columns and "realized_vol_spy" in ind.columns:
	ind["implied_realized_vol_gap"] = ind["vix_level"] - ind["realized_vol_spy"]
	else:
	ind["implied_realized_vol_gap"] = 0.0

	# FIX #5: cross_asset_vol_spike — use Series, not scalar fallback
	vix_vel = ind["vix_velocity"] if "vix_velocity" in ind.columns else pd.Series(0.0, index=ind.index)
	ind["cross_asset_vol_spike"] = np.abs(vix_vel)

	# ── DIMENSION 3: CORRELATION ──────────────────────────────────────────────
	if "spy" in df.columns and "ief" in df.columns:
	ind["equity_bond_corr"] = compute_rolling_correlation(df["spy"], df["ief"], 21)
	else:
	ind["equity_bond_corr"] = 0.0

	if "spy" in df.columns and "gld" in df.columns:
	ind["equity_gold_corr"] = compute_rolling_correlation(df["spy"], df["gld"], 21)
	else:
	ind["equity_gold_corr"] = 0.0

	if "spy" in df.columns and "hyg" in df.columns:
	ind["equity_hy_corr"] = compute_rolling_correlation(df["spy"], df["hyg"], 21)
	else:
	ind["equity_hy_corr"] = 0.0

	# Safe haven demand
	safe_haven_components = []
	for ticker in ["gld", "jpyusd", "chfusd"]:
	if ticker in df.columns:
	safe_haven_components.append(df[ticker].pct_change(21))
	if safe_haven_components and "spy" in df.columns:
	avg_safe = pd.concat(safe_haven_components, axis=1).mean(axis=1)
	ind["safe_haven_demand"] = avg_safe - df["spy"].pct_change(21)
	else:
	ind["safe_haven_demand"] = 0.0

	# Cross-sector correlation
	sector_tickers = [t for t in ["xlf", "xle", "xlu", "qqq"] if t in df.columns]
	if len(sector_tickers) >= 2:
	sector_returns = df[sector_tickers].pct_change()
	rolling_corrs = []
	for i in range(len(sector_tickers)):
	for j in range(i+1, len(sector_tickers)):
	c = sector_returns[sector_tickers[i]].rolling(21).corr(sector_returns[sector_tickers[j]])
	rolling_corrs.append(c)
	ind["cross_sector_correlation"] = pd.concat(rolling_corrs, axis=1).mean(axis=1)
	ind["correlation_breakdown_score"] = pd.concat(rolling_corrs, axis=1).std(axis=1)
	else:
	ind["cross_sector_correlation"] = 0.0
	ind["correlation_breakdown_score"] = 0.0

	if "spy" in df.columns and "eem" in df.columns:
	ind["em_dm_divergence"] = df["spy"].pct_change(21) - df["eem"].pct_change(21)
	else:
	ind["em_dm_divergence"] = 0.0

	if "uup" in df.columns:
	ind["dollar_stress_signal"] = compute_velocity(df["uup"], 10)
	else:
	ind["dollar_stress_signal"] = 0.0

	# ── DIMENSION 4: CREDIT ───────────────────────────────────────────────────
	if "hy_credit_spread" in df.columns and "ig_credit_spread" in df.columns:
	hy_ig_gap = df["hy_credit_spread"] - df["ig_credit_spread"]
	ind["hy_ig_divergence_velocity"] = compute_velocity(hy_ig_gap, 10)
	else:
	ind["hy_ig_divergence_velocity"] = 0.0

	if "lqd" in df.columns and "hyg" in df.columns:
	ind["lqd_hyg_return_spread"] = df["lqd"].pct_change(21) - df["hyg"].pct_change(21)
	else:
	ind["lqd_hyg_return_spread"] = 0.0

	if "xlf" in df.columns and "spy" in df.columns:
	ind["xlf_spy_relative"] = df["xlf"].pct_change(21) - df["spy"].pct_change(21)
	else:
	ind["xlf_spy_relative"] = 0.0

	if "hyg" in df.columns and "spy" in df.columns:
	ind["credit_equity_dislocation"] = df["hyg"].pct_change(21) - df["spy"].pct_change(21)
	else:
	ind["credit_equity_dislocation"] = 0.0

	# ── DIMENSION 5: POSITIONING / FLOWS ─────────────────────────────────────
	for ticker, col_name in [("gld", "gold_flow_signal"), ("jpyusd", "jpy_flow_signal"),
	("chfusd", "chf_flow_signal")]:
	if ticker in df.columns:
	ind[col_name] = compute_velocity(df[ticker], 10)
	else:
	ind[col_name] = 0.0

	if "xlu" in df.columns and "spy" in df.columns:
	ind["utilities_relative"] = df["xlu"].pct_change(21) - df["spy"].pct_change(21)
	else:
	ind["utilities_relative"] = 0.0

	if "xle" in df.columns and "spy" in df.columns:
	ind["energy_relative"] = df["xle"].pct_change(21) - df["spy"].pct_change(21)
	else:
	ind["energy_relative"] = 0.0

	if "uso" in df.columns:
	ind["oil_velocity"] = compute_velocity(df["uso"], 10)
	else:
	ind["oil_velocity"] = 0.0

	if "eem" in df.columns:
	ind["em_outflow_signal"] = compute_velocity(df["eem"], 10) * -1
	else:
	ind["em_outflow_signal"] = 0.0

	# Risk-off composite
	risk_off_cols = [c for c in ["gold_flow_signal", "jpy_flow_signal",
	"chf_flow_signal", "utilities_relative"] if c in ind.columns]
	if risk_off_cols:
	ind["risk_off_composite"] = ind[risk_off_cols].mean(axis=1)
	else:
	ind["risk_off_composite"] = 0.0

	# ── FINAL CLEANUP ─────────────────────────────────────────────────────────
	ind = ind.ffill().fillna(0.0)
	ind = ind.replace([np.inf, -np.inf], 0.0)
	return ind


	# ── The 40 canonical indicator names, grouped by dimension ──────────────────
	INDICATOR_GROUPS = {
	"Liquidity": [
	"ted_spread_level", "ted_spread_velocity", "hy_ig_spread_ratio",
	"hy_spread_velocity", "ig_spread_velocity", "yield_curve_level",
	"yield_curve_velocity", "bond_equity_correlation"
	],
	"Volatility": [
	"vix_level", "vix_velocity", "vix_term_structure", "vix_z_score",
	"realized_vol_spy", "implied_realized_vol_gap", "vol_of_vol",
	"cross_asset_vol_spike"
	],
	"Correlation": [
	"equity_bond_corr", "equity_gold_corr", "equity_hy_corr",
	"safe_haven_demand", "cross_sector_correlation", "em_dm_divergence",
	"dollar_stress_signal", "correlation_breakdown_score"
	],
	"Credit": [
	"hy_spread_level", "ig_spread_level", "hy_ig_divergence_velocity",
	"lqd_hyg_return_spread", "xlf_spy_relative", "credit_equity_dislocation",
	"ig_spread_z_score", "hy_spread_z_score"
	],
	"Positioning": [
	"gold_flow_signal", "jpy_flow_signal", "chf_flow_signal",
	"utilities_relative", "energy_relative", "oil_velocity",
	"em_outflow_signal", "risk_off_composite"
	]
	}

	ALL_INDICATORS = [ind for group in INDICATOR_GROUPS.values() for ind in group]