| """ |
| ML-3m-trader Feature Engineering |
| ================================= |
| Vectorized computation of all technical indicators using NumPy/Pandas. |
| No external TA library required. |
| """ |
|
|
| import numpy as np |
| import pandas as pd |
|
|
| import config as cfg |
|
|
|
|
| |
| |
| |
|
|
| def sma(series: pd.Series, period: int) -> pd.Series: |
| """Simple Moving Average.""" |
| return series.rolling(window=period, min_periods=period).mean() |
|
|
|
|
| def double_moving_average_signal(close: pd.Series) -> pd.Series: |
| """ |
| Double Moving Average crossover signal. |
| 1 = fast above slow (bullish) |
| -1 = fast below slow (bearish) |
| 0 = undefined (insufficient data) |
| """ |
| fast = sma(close, cfg.SMA_FAST_PERIOD) |
| slow = sma(close, cfg.SMA_SLOW_PERIOD) |
| signal = pd.Series(np.where(fast > slow, 1, np.where(fast < slow, -1, 0)), |
| index=close.index) |
| signal[slow.isna()] = 0 |
| return signal |
|
|
|
|
| def vroc(volume: pd.Series, period: int = cfg.VROC_PERIOD) -> pd.Series: |
| """Volume Rate of Change (percentage).""" |
| prev = volume.shift(period) |
| return ((volume - prev) / prev.replace(0, np.nan)) * 100.0 |
|
|
|
|
| def synthetic_vix(close: pd.Series, period: int = cfg.VIX_PROXY_PERIOD) -> pd.Series: |
| """ |
| Synthetic VIX proxy: annualized rolling standard deviation of |
| log-returns, expressed as a percentage. |
| """ |
| log_ret = np.log(close / close.shift(1)) |
| |
| bars_per_day = (6.5 * 60) / cfg.TIMEFRAME_MINUTES |
| annual_factor = np.sqrt(252 * bars_per_day) |
| rolling_std = log_ret.rolling(window=period, min_periods=period).std() |
| return rolling_std * annual_factor * 100.0 |
|
|
|
|
| def momentum_strength_index(close: pd.Series, |
| period: int = cfg.MOMENTUM_SI_PERIOD) -> pd.Series: |
| """ |
| Momentum Strength Index (MSI): measures the ratio of positive-momentum |
| bars to total bars over *period*, scaled 0-100. Similar concept to RSI |
| but purely count-based rather than magnitude-based. |
| """ |
| delta = close.diff() |
| up = (delta > 0).astype(float) |
| msi = up.rolling(window=period, min_periods=period).sum() / period * 100.0 |
| return msi |
|
|
|
|
| def _wilder_smooth(values: pd.Series, period: int) -> pd.Series: |
| """Wilder's exponential smoothing (used by ADX).""" |
| result = values.copy() |
| result.iloc[:period] = np.nan |
| result.iloc[period - 1] = values.iloc[:period].sum() |
| alpha = 1.0 / period |
| for i in range(period, len(values)): |
| result.iloc[i] = result.iloc[i - 1] * (1 - alpha) + values.iloc[i] * alpha |
| return result |
|
|
|
|
| def adx(high: pd.Series, low: pd.Series, close: pd.Series, |
| period: int = cfg.ADX_PERIOD) -> pd.Series: |
| """ |
| Average Directional Index via Wilder's method. |
| Returns the ADX line (0-100 scale). |
| """ |
| prev_high = high.shift(1) |
| prev_low = low.shift(1) |
| prev_close = close.shift(1) |
|
|
| tr1 = high - low |
| tr2 = (high - prev_close).abs() |
| tr3 = (low - prev_close).abs() |
| tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) |
|
|
| plus_dm = np.where((high - prev_high) > (prev_low - low), |
| np.maximum(high - prev_high, 0), 0) |
| minus_dm = np.where((prev_low - low) > (high - prev_high), |
| np.maximum(prev_low - low, 0), 0) |
|
|
| plus_dm = pd.Series(plus_dm, index=high.index, dtype=float) |
| minus_dm = pd.Series(minus_dm, index=high.index, dtype=float) |
|
|
| atr = _wilder_smooth(tr, period) |
| smooth_plus = _wilder_smooth(plus_dm, period) |
| smooth_minus = _wilder_smooth(minus_dm, period) |
|
|
| plus_di = 100.0 * smooth_plus / atr.replace(0, np.nan) |
| minus_di = 100.0 * smooth_minus / atr.replace(0, np.nan) |
|
|
| dx = 100.0 * (plus_di - minus_di).abs() / (plus_di + minus_di).replace(0, np.nan) |
| adx_line = _wilder_smooth(dx, period) |
| return adx_line |
|
|
|
|
| def time_features(dt_series: pd.Series) -> pd.DataFrame: |
| """ |
| Extract cyclical time features from a datetime Series. |
| Uses sin/cos encoding for hour, minute, day-of-week. |
| """ |
| hour = dt_series.dt.hour + dt_series.dt.minute / 60.0 |
| dow = dt_series.dt.dayofweek |
| minute = dt_series.dt.minute |
|
|
| return pd.DataFrame({ |
| "hour_sin": np.sin(2 * np.pi * hour / 24.0), |
| "hour_cos": np.cos(2 * np.pi * hour / 24.0), |
| "minute_sin": np.sin(2 * np.pi * minute / 60.0), |
| "minute_cos": np.cos(2 * np.pi * minute / 60.0), |
| "dow_sin": np.sin(2 * np.pi * dow / 5.0), |
| "dow_cos": np.cos(2 * np.pi * dow / 5.0), |
| }, index=dt_series.index) |
|
|
|
|
| |
| |
| |
|
|
| def build_features(df: pd.DataFrame) -> pd.DataFrame: |
| """ |
| Compute all technical features from raw OHLCV data. |
| |
| Parameters |
| ---------- |
| df : pd.DataFrame |
| Must contain columns: time, open, high, low, close, volume |
| |
| Returns |
| ------- |
| pd.DataFrame |
| Original columns plus all computed features. |
| NaN rows from lookback periods are dropped. |
| """ |
| out = df.copy() |
|
|
| |
| out["sma_fast"] = sma(out["close"], cfg.SMA_FAST_PERIOD) |
| out["sma_slow"] = sma(out["close"], cfg.SMA_SLOW_PERIOD) |
| out["dma_signal"] = double_moving_average_signal(out["close"]) |
|
|
| |
| out["vroc"] = vroc(out["volume"], cfg.VROC_PERIOD) |
|
|
| |
| out["vix_proxy"] = synthetic_vix(out["close"], cfg.VIX_PROXY_PERIOD) |
|
|
| |
| out["msi"] = momentum_strength_index(out["close"], cfg.MOMENTUM_SI_PERIOD) |
|
|
| |
| out["adx"] = adx(out["high"], out["low"], out["close"], cfg.ADX_PERIOD) |
|
|
| |
| if not pd.api.types.is_datetime64_any_dtype(out["time"]): |
| out["time"] = pd.to_datetime(out["time"]) |
| time_feats = time_features(out["time"]) |
| out = pd.concat([out, time_feats], axis=1) |
|
|
| |
| out.dropna(inplace=True) |
| out.reset_index(drop=True, inplace=True) |
|
|
| print(f"[INFO] Features built: {out.shape[1]} columns, {len(out):,} rows " |
| f"(dropped {len(df) - len(out):,} warm-up rows)") |
| return out |
|
|
|
|
| def get_feature_columns() -> list: |
| """Return the list of feature column names used for model input.""" |
| return [ |
| "open", "high", "low", "close", "volume", |
| "sma_fast", "sma_slow", "dma_signal", |
| "vroc", "vix_proxy", "msi", "adx", |
| "hour_sin", "hour_cos", |
| "minute_sin", "minute_cos", |
| "dow_sin", "dow_cos", |
| ] |
|
|