Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| FEATURE_COLUMNS = [ | |
| "ret_1d", "ret_5d", "ret_20d", "vol_20d", | |
| "ma_ratio_10_50", "volume_z", "market_return", | |
| "momentum_factor", "value_factor", | |
| ] | |
| def build_features(df: pd.DataFrame, sector_map: dict[str, str]) -> pd.DataFrame: | |
| if df.empty: | |
| raise ValueError("Input market dataframe is empty") | |
| out = df.sort_values(["ticker", "date"]).copy() | |
| market_daily = out.groupby("date")["close"].mean().pct_change().fillna(0.0).rename("market_return") | |
| out = out.merge(market_daily, on="date", how="left") | |
| def add_group_features(g: pd.DataFrame) -> pd.DataFrame: | |
| g = g.sort_values("date").copy() | |
| g["ret_1d"] = g["close"].pct_change(1) | |
| g["ret_5d"] = g["close"].pct_change(5) | |
| g["ret_20d"] = g["close"].pct_change(20) | |
| g["vol_20d"] = g["close"].pct_change().rolling(20).std() | |
| ma10 = g["close"].rolling(10).mean() | |
| ma50 = g["close"].rolling(50).mean() | |
| g["ma_ratio_10_50"] = ma10 / ma50 - 1.0 | |
| vol_mean = g["volume"].rolling(20).mean() | |
| vol_std = g["volume"].rolling(20).std().replace(0, np.nan) | |
| g["volume_z"] = ((g["volume"] - vol_mean) / vol_std).fillna(0.0) | |
| g["momentum_factor"] = g["ret_20d"].rolling(5).mean() | |
| g["value_factor"] = -g["ma_ratio_10_50"] | |
| g["target_return_5d"] = g["close"].shift(-5) / g["close"] - 1.0 | |
| return g | |
| pieces = [] | |
| for _, g in out.groupby("ticker"): | |
| pieces.append(add_group_features(g)) | |
| out = pd.concat(pieces, ignore_index=True) | |
| out["sector"] = out["ticker"].map(sector_map).fillna("Unknown") | |
| out = out.dropna().reset_index(drop=True) | |
| if out.empty: | |
| raise ValueError("No usable rows after feature engineering") | |
| return out | |