import pandas as pd import pandas_ta as ta import numpy as np from .fracdiff import frac_diff_ffd from .fractals import get_fractal_features def build_advanced_features(data: pd.DataFrame) -> pd.DataFrame: """ Constructs an advanced set of features for financial machine learning models. """ close = data["close"] high = data["high"] low = data["low"] volume = data.get("volume", data.get("tick_volume", pd.Series(1, index=data.index))) features = pd.DataFrame(index=data.index) # 1. Basic Returns & Momentum features["ret_1"] = close.pct_change(1) features["ret_5"] = close.pct_change(5) features["ret_20"] = close.pct_change(20) features["mom_10"] = ta.mom(close, length=10) / close features["rsi_14"] = ta.rsi(close, length=14) / 100.0 # 2. Fractional Differentiation (Preserving Memory) features["ffd_04"] = frac_diff_ffd(close, d=0.4, thres=1e-4) # 3. Bollinger Bands (Relative) bbands = ta.bbands(close, length=20, std=2) if bbands is not None: # Find columns by position or name pattern mid_col = [c for c in bbands.columns if 'BBM' in c][0] upper_col = [c for c in bbands.columns if 'BBU' in c][0] lower_col = [c for c in bbands.columns if 'BBL' in c][0] # Distance from middle band as % features["bb_mid_rel"] = (close - bbands[mid_col]) / bbands[mid_col] # Bandwidth features["bb_width"] = (bbands[upper_col] - bbands[lower_col]) / bbands[mid_col] # %B (Position within bands) features["bb_pct_b"] = (close - bbands[lower_col]) / (bbands[upper_col] - bbands[lower_col]).replace(0, np.nan) # 4. MACD (Trend & Momentum) macd = ta.macd(close, fast=12, slow=26, signal=9) if macd is not None: hist_col = [c for c in macd.columns if 'MACDh' in c][0] macd_col = [c for c in macd.columns if 'MACD' in c and 'h' not in c and 's' not in c][0] features["macd_hist"] = macd[hist_col] / close features["macd_rel"] = macd[macd_col] / close # 5. Volatility & Risk atr = ta.atr(high, low, close, length=14) features["atr_rel"] = atr / close features["vol_20"] = close.pct_change().rolling(20).std() # 6. Fractal Features (Advanced structural points) # Note: Fractal features might be slow, using small lookback fractal_features = get_fractal_features(data, volatility=atr, n=2) # Select only most relevant fractal features to avoid dimensionality blowup selected_fractals = [ "fractal_trend_strength", "fractal_trend_direction", "distance_to_resistance", "distance_to_support" ] features = features.join(fractal_features[selected_fractals]) # 8. Volume Dynamics features["vpt"] = ta.pvt(close, volume) # Price Volume Trend features["vpt_roc"] = features["vpt"].pct_change(5) # Clean up return features.replace([np.inf, -np.inf], np.nan).ffill().dropna()