Adisri99's picture
Upload 13 files
e98cfad verified
import numpy as np
import pandas as pd
FEATURE_COLUMNS = [
"ret_1d", "ret_5d", "ret_20d", "vol_20d",
"ma_ratio_10_50", "volume_z", "market_return",
"momentum_factor", "value_factor",
]
def build_features(df: pd.DataFrame, sector_map: dict[str, str]) -> pd.DataFrame:
if df.empty:
raise ValueError("Input market dataframe is empty")
out = df.sort_values(["ticker", "date"]).copy()
market_daily = out.groupby("date")["close"].mean().pct_change().fillna(0.0).rename("market_return")
out = out.merge(market_daily, on="date", how="left")
def add_group_features(g: pd.DataFrame) -> pd.DataFrame:
g = g.sort_values("date").copy()
g["ret_1d"] = g["close"].pct_change(1)
g["ret_5d"] = g["close"].pct_change(5)
g["ret_20d"] = g["close"].pct_change(20)
g["vol_20d"] = g["close"].pct_change().rolling(20).std()
ma10 = g["close"].rolling(10).mean()
ma50 = g["close"].rolling(50).mean()
g["ma_ratio_10_50"] = ma10 / ma50 - 1.0
vol_mean = g["volume"].rolling(20).mean()
vol_std = g["volume"].rolling(20).std().replace(0, np.nan)
g["volume_z"] = ((g["volume"] - vol_mean) / vol_std).fillna(0.0)
g["momentum_factor"] = g["ret_20d"].rolling(5).mean()
g["value_factor"] = -g["ma_ratio_10_50"]
g["target_return_5d"] = g["close"].shift(-5) / g["close"] - 1.0
return g
pieces = []
for _, g in out.groupby("ticker"):
pieces.append(add_group_features(g))
out = pd.concat(pieces, ignore_index=True)
out["sector"] = out["ticker"].map(sector_map).fillna("Unknown")
out = out.dropna().reset_index(drop=True)
if out.empty:
raise ValueError("No usable rows after feature engineering")
return out