import numpy as np import pandas as pd FEATURE_COLUMNS = [ "ret_1d", "ret_5d", "ret_20d", "vol_20d", "ma_ratio_10_50", "volume_z", "market_return", "momentum_factor", "value_factor", ] def build_features(df: pd.DataFrame, sector_map: dict[str, str]) -> pd.DataFrame: if df.empty: raise ValueError("Input market dataframe is empty") out = df.sort_values(["ticker", "date"]).copy() market_daily = out.groupby("date")["close"].mean().pct_change().fillna(0.0).rename("market_return") out = out.merge(market_daily, on="date", how="left") def add_group_features(g: pd.DataFrame) -> pd.DataFrame: g = g.sort_values("date").copy() g["ret_1d"] = g["close"].pct_change(1) g["ret_5d"] = g["close"].pct_change(5) g["ret_20d"] = g["close"].pct_change(20) g["vol_20d"] = g["close"].pct_change().rolling(20).std() ma10 = g["close"].rolling(10).mean() ma50 = g["close"].rolling(50).mean() g["ma_ratio_10_50"] = ma10 / ma50 - 1.0 vol_mean = g["volume"].rolling(20).mean() vol_std = g["volume"].rolling(20).std().replace(0, np.nan) g["volume_z"] = ((g["volume"] - vol_mean) / vol_std).fillna(0.0) g["momentum_factor"] = g["ret_20d"].rolling(5).mean() g["value_factor"] = -g["ma_ratio_10_50"] g["target_return_5d"] = g["close"].shift(-5) / g["close"] - 1.0 return g pieces = [] for _, g in out.groupby("ticker"): pieces.append(add_group_features(g)) out = pd.concat(pieces, ignore_index=True) out["sector"] = out["ticker"].map(sector_map).fillna("Unknown") out = out.dropna().reset_index(drop=True) if out.empty: raise ValueError("No usable rows after feature engineering") return out