Spaces:
No application file
No application file
| import pandas as pd | |
| import pandas_ta as ta | |
| import numpy as np | |
| from .fracdiff import frac_diff_ffd | |
| from .fractals import get_fractal_features | |
| def build_advanced_features(data: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Constructs an advanced set of features for financial machine learning models. | |
| """ | |
| close = data["close"] | |
| high = data["high"] | |
| low = data["low"] | |
| volume = data.get("volume", data.get("tick_volume", pd.Series(1, index=data.index))) | |
| features = pd.DataFrame(index=data.index) | |
| # 1. Basic Returns & Momentum | |
| features["ret_1"] = close.pct_change(1) | |
| features["ret_5"] = close.pct_change(5) | |
| features["ret_20"] = close.pct_change(20) | |
| features["mom_10"] = ta.mom(close, length=10) / close | |
| features["rsi_14"] = ta.rsi(close, length=14) / 100.0 | |
| # 2. Fractional Differentiation (Preserving Memory) | |
| features["ffd_04"] = frac_diff_ffd(close, d=0.4, thres=1e-4) | |
| # 3. Bollinger Bands (Relative) | |
| bbands = ta.bbands(close, length=20, std=2) | |
| if bbands is not None: | |
| # Find columns by position or name pattern | |
| mid_col = [c for c in bbands.columns if 'BBM' in c][0] | |
| upper_col = [c for c in bbands.columns if 'BBU' in c][0] | |
| lower_col = [c for c in bbands.columns if 'BBL' in c][0] | |
| # Distance from middle band as % | |
| features["bb_mid_rel"] = (close - bbands[mid_col]) / bbands[mid_col] | |
| # Bandwidth | |
| features["bb_width"] = (bbands[upper_col] - bbands[lower_col]) / bbands[mid_col] | |
| # %B (Position within bands) | |
| features["bb_pct_b"] = (close - bbands[lower_col]) / (bbands[upper_col] - bbands[lower_col]).replace(0, np.nan) | |
| # 4. MACD (Trend & Momentum) | |
| macd = ta.macd(close, fast=12, slow=26, signal=9) | |
| if macd is not None: | |
| hist_col = [c for c in macd.columns if 'MACDh' in c][0] | |
| macd_col = [c for c in macd.columns if 'MACD' in c and 'h' not in c and 's' not in c][0] | |
| features["macd_hist"] = macd[hist_col] / close | |
| features["macd_rel"] = macd[macd_col] / close | |
| # 5. Volatility & Risk | |
| atr = ta.atr(high, low, close, length=14) | |
| features["atr_rel"] = atr / close | |
| features["vol_20"] = close.pct_change().rolling(20).std() | |
| # 6. Fractal Features (Advanced structural points) | |
| # Note: Fractal features might be slow, using small lookback | |
| fractal_features = get_fractal_features(data, volatility=atr, n=2) | |
| # Select only most relevant fractal features to avoid dimensionality blowup | |
| selected_fractals = [ | |
| "fractal_trend_strength", "fractal_trend_direction", | |
| "distance_to_resistance", "distance_to_support" | |
| ] | |
| features = features.join(fractal_features[selected_fractals]) | |
| # 8. Volume Dynamics | |
| features["vpt"] = ta.pvt(close, volume) # Price Volume Trend | |
| features["vpt_roc"] = features["vpt"].pct_change(5) | |
| # Clean up | |
| return features.replace([np.inf, -np.inf], np.nan).ffill().dropna() | |