sol-arb-trainer / features_ohlcv.py
commanderzee's picture
v1_arb trainer code for sol
9a6e556 verified
"""
OHLCV feature extraction for ARB-MAX. 329 features, hardcoded order.
extract(window_frame, at_tick=120) -> np.ndarray shape (329,)
Uses rolling statistics over ticks [0, at_tick]. Robust to NaN: anything
non-finite is replaced with 0.0 before return.
"""
from __future__ import annotations
from typing import List
import numpy as np
import pandas as pd
# ---------------------------------------------------------------------------
# Horizon lists
# ---------------------------------------------------------------------------
_RETURN_HORIZONS = [1, 5, 15, 30, 60, 180]
_STD_HORIZONS = [1, 5, 15, 30, 60, 180]
_SHARPE_HORIZONS = [1, 5, 15, 30, 60, 180]
_MOMENT_HORIZONS = [60, 180]
_RSI_HORIZONS = [30, 60, 180]
_BOLL_HORIZONS = [30, 60, 180]
_VOL_HORIZONS = [30, 60, 180]
def _build_feature_names() -> List[str]:
names: List[str] = []
# returns (6)
for h in _RETURN_HORIZONS:
names.append(f"ret_{h}s")
# rolling std (6)
for h in _STD_HORIZONS:
names.append(f"retstd_{h}s")
# rolling sharpe (6)
for h in _SHARPE_HORIZONS:
names.append(f"sharpe_{h}s")
# higher moments (4) — skew & kurt at 60s/180s
for h in _MOMENT_HORIZONS:
names.append(f"retskew_{h}s")
names.append(f"retkurt_{h}s")
# VWMP & VW return (2)
names.append("vwmp")
names.append("vw_ret")
# MACD (3)
names.append("macd")
names.append("macd_signal")
names.append("macd_hist")
# RSI at 3 horizons (3)
for h in _RSI_HORIZONS:
names.append(f"rsi_{h}s")
# Bollinger position at 3 horizons (3*3 = 9)
for h in _BOLL_HORIZONS:
names.append(f"boll_pos_{h}s")
names.append(f"boll_width_{h}s")
names.append(f"boll_z_{h}s")
# Cumulative return since t=0 (1)
names.append("cum_ret_since_0")
# Price vs window_open ratio (1)
names.append("px_over_open")
# Trade intensity & volume accel (3 * len(_VOL_HORIZONS) = 9)
for h in _VOL_HORIZONS:
names.append(f"trade_rate_{h}s")
names.append(f"vol_accel_{h}s")
names.append(f"taker_buy_ratio_{h}s")
# --- So far: 6+6+6+4+2+3+3+9+1+1+9 = 50 ---
# Now pad with derived / finer features up to 329.
# Rolling min / max / mean of returns at all horizons (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"retmin_{h}s")
names.append(f"retmax_{h}s")
names.append(f"retmean_{h}s")
# running total: 50 + 18 = 68
# Rolling min / max / mean of close prices at all horizons (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"pxmin_{h}s")
names.append(f"pxmax_{h}s")
names.append(f"pxmean_{h}s")
# total: 86
# Rolling range / ATR-like at horizons (2 * 6 = 12)
for h in _RETURN_HORIZONS:
names.append(f"range_{h}s")
names.append(f"atr_{h}s")
# total: 98
# Log return statistics (4 * 6 = 24)
for h in _RETURN_HORIZONS:
names.append(f"logret_mean_{h}s")
names.append(f"logret_std_{h}s")
names.append(f"logret_abs_sum_{h}s")
names.append(f"logret_sign_sum_{h}s")
# total: 122
# Up / down move counts (2 * 6 = 12)
for h in _RETURN_HORIZONS:
names.append(f"up_count_{h}s")
names.append(f"dn_count_{h}s")
# total: 134
# Consecutive-direction run lengths (2)
names.append("run_length_up")
names.append("run_length_dn")
# total: 136
# Volume stats (4 * 6 = 24)
for h in _RETURN_HORIZONS:
names.append(f"vol_mean_{h}s")
names.append(f"vol_std_{h}s")
names.append(f"vol_max_{h}s")
names.append(f"vol_sum_{h}s")
# total: 160
# Quote volume stats (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"qvol_mean_{h}s")
names.append(f"qvol_sum_{h}s")
names.append(f"qvol_std_{h}s")
# total: 178
# Trade count stats (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"trades_mean_{h}s")
names.append(f"trades_sum_{h}s")
names.append(f"trades_std_{h}s")
# total: 196
# Taker buy stats (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"tbbase_sum_{h}s")
names.append(f"tbquote_sum_{h}s")
names.append(f"tb_imbalance_{h}s")
# total: 214
# Price-level quantile features at longer horizons (5 * 3 = 15)
for h in [30, 60, 180]:
names.append(f"px_q05_{h}s")
names.append(f"px_q25_{h}s")
names.append(f"px_q50_{h}s")
names.append(f"px_q75_{h}s")
names.append(f"px_q95_{h}s")
# total: 229
# Return autocorrelation at lags 1/5/15 for horizons 60/180 (2 * 3 = 6)
for h in [60, 180]:
names.append(f"autocorr1_{h}s")
names.append(f"autocorr5_{h}s")
names.append(f"autocorr15_{h}s")
# total: 235
# Rolling z-scores of volume & trades at 60/180 (4)
for h in [60, 180]:
names.append(f"vol_z_{h}s")
names.append(f"trades_z_{h}s")
# total: 239
# Momentum crossovers (EMA fast/slow ratio) at 4 pairs (4)
names.append("ema5_over_ema30")
names.append("ema15_over_ema60")
names.append("ema30_over_ema180")
names.append("ema60_over_ema180")
# total: 243
# Acceleration features (2nd-diff of return) at 4 horizons (4)
for h in [5, 15, 60, 180]:
names.append(f"accel_{h}s")
# total: 247
# Hour-of-day and minute-of-hour are part of state features — skip here.
# Tick position
names.append("at_tick")
names.append("at_tick_over_900")
# total: 249
# High-low-close patterns (HLC) at horizons (3 * 6 = 18)
for h in _RETURN_HORIZONS:
names.append(f"high_over_close_{h}s")
names.append(f"low_over_close_{h}s")
names.append(f"hl_range_over_close_{h}s")
# total: 267
# Rolling skew / kurt of ABS returns at 60/180 (4)
for h in [60, 180]:
names.append(f"absret_skew_{h}s")
names.append(f"absret_kurt_{h}s")
# total: 271
# Rolling correlation of volume & |ret| at 60/180 (2)
for h in [60, 180]:
names.append(f"corr_vol_absret_{h}s")
# total: 273
# Ticks since last up / down move (2)
names.append("ticks_since_up")
names.append("ticks_since_dn")
# total: 275
# Rolling maximum drawdown of close at 60/180 (2)
for h in [60, 180]:
names.append(f"max_dd_{h}s")
# total: 277
# Rolling maximum run-up at 60/180 (2)
for h in [60, 180]:
names.append(f"max_ru_{h}s")
# total: 279
# Binary "above VWMP" count at 60/180 (2)
for h in [60, 180]:
names.append(f"frac_above_vwmp_{h}s")
# total: 281
# ADX-ish trend strength proxies at 60/180 (2)
for h in [60, 180]:
names.append(f"trend_strength_{h}s")
# total: 283
# Log-return cumulants (mean, var, skew, kurt) on full window so far (4)
names.append("cum_logret_mean")
names.append("cum_logret_var")
names.append("cum_logret_skew")
names.append("cum_logret_kurt")
# total: 287
# Per-horizon Sharpe of abs-returns (6)
for h in _RETURN_HORIZONS:
names.append(f"absret_sharpe_{h}s")
# total: 293
# Per-horizon Sortino proxy (downside std only) (6)
for h in _RETURN_HORIZONS:
names.append(f"sortino_{h}s")
# total: 299
# Rolling ranks of latest ret within horizon (6)
for h in _RETURN_HORIZONS:
names.append(f"ret_rank_{h}s")
# total: 305
# Rolling ranks of latest volume within horizon (6)
for h in _RETURN_HORIZONS:
names.append(f"vol_rank_{h}s")
# total: 311
# Rolling ranks of latest price within horizon (6)
for h in _RETURN_HORIZONS:
names.append(f"px_rank_{h}s")
# total: 317
# Rolling fraction of positive returns at all horizons (6)
for h in _RETURN_HORIZONS:
names.append(f"pos_frac_{h}s")
# total: 323
# Final six: coarse momentum / vol summary
names.append("mom_60_180_ratio")
names.append("vol_60_180_ratio")
names.append("range_60_180_ratio")
names.append("trades_60_180_ratio")
names.append("vwmp_vs_close_pct")
names.append("realized_vol_full")
# total: 329
return names
FEATURE_NAMES: List[str] = _build_feature_names()
assert len(FEATURE_NAMES) == 329, f"expected 329, got {len(FEATURE_NAMES)}"
# ---------------------------------------------------------------------------
# Extraction helpers
# ---------------------------------------------------------------------------
def _tail(arr: np.ndarray, n: int) -> np.ndarray:
if n <= 0:
return arr[-0:]
return arr[-n:] if len(arr) >= n else arr
def _safe_ret(series: np.ndarray, h: int) -> float:
if len(series) <= h:
return 0.0
base = series[-h - 1]
last = series[-1]
if not np.isfinite(base) or base == 0 or not np.isfinite(last):
return 0.0
return float(last / base - 1.0)
def _safe_std(arr: np.ndarray) -> float:
if len(arr) < 2:
return 0.0
v = float(np.nanstd(arr))
return v if np.isfinite(v) else 0.0
def _safe_mean(arr: np.ndarray) -> float:
if len(arr) == 0:
return 0.0
v = float(np.nanmean(arr))
return v if np.isfinite(v) else 0.0
def _safe_skew(arr: np.ndarray) -> float:
arr = arr[np.isfinite(arr)]
if len(arr) < 3:
return 0.0
m = arr.mean()
s = arr.std()
if s == 0:
return 0.0
return float(((arr - m) ** 3).mean() / (s ** 3))
def _safe_kurt(arr: np.ndarray) -> float:
arr = arr[np.isfinite(arr)]
if len(arr) < 4:
return 0.0
m = arr.mean()
s = arr.std()
if s == 0:
return 0.0
return float(((arr - m) ** 4).mean() / (s ** 4) - 3.0)
def _ema(arr: np.ndarray, span: int) -> float:
if len(arr) == 0:
return 0.0
alpha = 2.0 / (span + 1.0)
out = arr[0]
for v in arr[1:]:
if not np.isfinite(v):
continue
out = alpha * v + (1.0 - alpha) * out
return float(out)
def _rsi(prices: np.ndarray, n: int) -> float:
if len(prices) <= n:
return 50.0
diffs = np.diff(prices[-(n + 1):])
gains = diffs[diffs > 0].sum()
losses = -diffs[diffs < 0].sum()
if losses == 0:
return 100.0 if gains > 0 else 50.0
rs = gains / losses
return float(100.0 - 100.0 / (1.0 + rs))
def _rank_of_last(arr: np.ndarray) -> float:
arr = arr[np.isfinite(arr)]
if len(arr) == 0:
return 0.5
last = arr[-1]
return float((arr <= last).mean())
def _autocorr(arr: np.ndarray, lag: int) -> float:
if len(arr) <= lag + 1:
return 0.0
a = arr[:-lag]
b = arr[lag:]
mask = np.isfinite(a) & np.isfinite(b)
if mask.sum() < 3:
return 0.0
a = a[mask]
b = b[mask]
sa, sb = a.std(), b.std()
if sa == 0 or sb == 0:
return 0.0
return float(np.corrcoef(a, b)[0, 1])
# ---------------------------------------------------------------------------
# Main extract
# ---------------------------------------------------------------------------
def extract(window_frame: pd.DataFrame, at_tick: int = 120) -> np.ndarray:
"""Produce 329 features aligned with FEATURE_NAMES."""
df = window_frame.iloc[: at_tick + 1]
n = len(df)
close = df["close"].to_numpy(dtype=np.float64)
open_ = df["open"].to_numpy(dtype=np.float64)
high = df["high"].to_numpy(dtype=np.float64)
low = df["low"].to_numpy(dtype=np.float64)
volume = df["volume"].to_numpy(dtype=np.float64) if "volume" in df.columns else np.zeros(n)
trades = df["trades"].to_numpy(dtype=np.float64) if "trades" in df.columns else np.zeros(n)
# Forward-fill close gaps for return math (in-case first ticks missing)
if not np.all(np.isfinite(close)):
last = np.nan
for i, v in enumerate(close):
if np.isfinite(v):
last = v
else:
close[i] = last
# if still nan at start, use the first finite
if np.isnan(close[0]):
for v in close:
if np.isfinite(v):
close[:] = np.where(np.isfinite(close), close, v)
break
close = np.nan_to_num(close, nan=0.0, posinf=0.0, neginf=0.0)
# log returns
with np.errstate(divide="ignore", invalid="ignore"):
log_close = np.where(close > 0, np.log(close), 0.0)
logrets = np.diff(log_close, prepend=log_close[0]) # first entry is 0
out: List[float] = []
# ret_{h}s (6)
for h in _RETURN_HORIZONS:
out.append(_safe_ret(close, h))
# retstd_{h}s (6) — on log-returns window
for h in _STD_HORIZONS:
out.append(_safe_std(_tail(logrets, h)))
# sharpe_{h}s (6)
for h in _SHARPE_HORIZONS:
w = _tail(logrets, h)
m = _safe_mean(w)
s = _safe_std(w)
out.append(m / s if s > 0 else 0.0)
# higher moments (4): skew, kurt at 60/180
for h in _MOMENT_HORIZONS:
w = _tail(logrets, h)
out.append(_safe_skew(w))
out.append(_safe_kurt(w))
# VWMP & VW return (2)
if volume.sum() > 0:
vwmp = float(np.nansum(close * volume) / max(np.nansum(volume), 1e-9))
else:
vwmp = _safe_mean(close)
out.append(vwmp)
out.append((close[-1] / vwmp - 1.0) if vwmp > 0 else 0.0)
# MACD (3)
ema12 = _ema(close, 12)
ema26 = _ema(close, 26)
macd = ema12 - ema26
# signal = ema9 of macd history — approximate over last 9 ticks
macd_hist_series = []
for k in range(min(26, n), n + 1):
sub = close[:k]
macd_hist_series.append(_ema(sub, 12) - _ema(sub, 26))
macd_signal = _ema(np.array(macd_hist_series[-9:]) if macd_hist_series else np.array([0.0]), 9)
out.append(macd)
out.append(macd_signal)
out.append(macd - macd_signal)
# RSI (3)
for h in _RSI_HORIZONS:
out.append(_rsi(close, h))
# Bollinger position (9): pos, width, z-score at 30/60/180
for h in _BOLL_HORIZONS:
w = _tail(close, h)
m = _safe_mean(w)
s = _safe_std(w)
mn = float(np.nanmin(w)) if len(w) else 0.0
mx = float(np.nanmax(w)) if len(w) else 0.0
denom = (mx - mn) if (mx - mn) != 0 else 1.0
pos = (close[-1] - mn) / denom
width = (mx - mn) / m if m != 0 else 0.0
z = (close[-1] - m) / s if s > 0 else 0.0
out.append(pos)
out.append(width)
out.append(z)
# Cumulative return since t=0 (1)
out.append((close[-1] / close[0] - 1.0) if close[0] > 0 else 0.0)
# Price vs window_open ratio (1)
wopen = open_[0] if np.isfinite(open_[0]) and open_[0] > 0 else close[0]
out.append((close[-1] / wopen - 1.0) if wopen > 0 else 0.0)
# Trade intensity + volume accel + taker buy ratio (9)
for h in _VOL_HORIZONS:
tw = _tail(trades, h)
vw = _tail(volume, h)
out.append(_safe_mean(tw))
# accel = latest vol vs mean vol
mv = _safe_mean(vw)
out.append((volume[-1] - mv) if np.isfinite(volume[-1]) else 0.0)
# taker_buy_ratio — stub 0.5 (not always populated on df at this tick)
out.append(0.5)
# -------- Padded features ---------
# retmin/max/mean at 6 horizons (18)
for h in _RETURN_HORIZONS:
w = _tail(logrets, h)
out.append(float(np.nanmin(w)) if len(w) else 0.0)
out.append(float(np.nanmax(w)) if len(w) else 0.0)
out.append(_safe_mean(w))
# pxmin/max/mean at 6 horizons (18)
for h in _RETURN_HORIZONS:
w = _tail(close, h)
out.append(float(np.nanmin(w)) if len(w) else 0.0)
out.append(float(np.nanmax(w)) if len(w) else 0.0)
out.append(_safe_mean(w))
# range/atr at 6 horizons (12)
for h in _RETURN_HORIZONS:
hi = _tail(high, h)
lo = _tail(low, h)
rng = (float(np.nanmax(hi)) - float(np.nanmin(lo))) if len(hi) else 0.0
atr = _safe_mean(hi - lo) if len(hi) == len(lo) else 0.0
out.append(rng)
out.append(atr)
# log-ret stats (24)
for h in _RETURN_HORIZONS:
w = _tail(logrets, h)
out.append(_safe_mean(w))
out.append(_safe_std(w))
out.append(float(np.nansum(np.abs(w))))
out.append(float(np.nansum(np.sign(w))))
# up/down counts (12)
for h in _RETURN_HORIZONS:
w = _tail(logrets, h)
out.append(float((w > 0).sum()))
out.append(float((w < 0).sum()))
# run lengths (2)
sign = np.sign(logrets)
up_run = 0
dn_run = 0
for v in sign[::-1]:
if v > 0:
if dn_run == 0:
up_run += 1
else:
break
elif v < 0:
if up_run == 0:
dn_run += 1
else:
break
else:
break
out.append(float(up_run))
out.append(float(dn_run))
# volume stats (24)
for h in _RETURN_HORIZONS:
w = _tail(volume, h)
out.append(_safe_mean(w))
out.append(_safe_std(w))
out.append(float(np.nanmax(w)) if len(w) else 0.0)
out.append(float(np.nansum(w)))
# quote volume stats (18) — if column present
qv = (
df["quote_volume"].to_numpy(dtype=np.float64)
if "quote_volume" in df.columns
else np.zeros(n)
)
for h in _RETURN_HORIZONS:
w = _tail(qv, h)
out.append(_safe_mean(w))
out.append(float(np.nansum(w)))
out.append(_safe_std(w))
# trades stats (18)
for h in _RETURN_HORIZONS:
w = _tail(trades, h)
out.append(_safe_mean(w))
out.append(float(np.nansum(w)))
out.append(_safe_std(w))
# taker buy stats (18)
tbb = (
df["taker_buy_base"].to_numpy(dtype=np.float64)
if "taker_buy_base" in df.columns
else np.zeros(n)
)
tbq = (
df["taker_buy_quote"].to_numpy(dtype=np.float64)
if "taker_buy_quote" in df.columns
else np.zeros(n)
)
for h in _RETURN_HORIZONS:
wb = _tail(tbb, h)
wq = _tail(tbq, h)
wv = _tail(volume, h)
out.append(float(np.nansum(wb)))
out.append(float(np.nansum(wq)))
denom = max(float(np.nansum(wv)), 1e-9)
out.append(float(np.nansum(wb)) / denom)
# price quantiles at 30/60/180 (15)
for h in [30, 60, 180]:
w = _tail(close, h)
if len(w):
qs = np.nanquantile(w, [0.05, 0.25, 0.5, 0.75, 0.95])
else:
qs = np.zeros(5)
out.extend(float(x) for x in qs)
# autocorr at 60/180 for lags 1/5/15 (6)
for h in [60, 180]:
w = _tail(logrets, h)
out.append(_autocorr(w, 1))
out.append(_autocorr(w, 5))
out.append(_autocorr(w, 15))
# vol & trades z at 60/180 (4)
for h in [60, 180]:
vw = _tail(volume, h)
m, s = _safe_mean(vw), _safe_std(vw)
out.append(((volume[-1] - m) / s) if s > 0 else 0.0)
tw = _tail(trades, h)
m, s = _safe_mean(tw), _safe_std(tw)
out.append(((trades[-1] - m) / s) if s > 0 else 0.0)
# EMA ratios (4)
def _ema_ratio(a, b):
e_a = _ema(close, a)
e_b = _ema(close, b)
return (e_a / e_b - 1.0) if e_b > 0 else 0.0
out.append(_ema_ratio(5, 30))
out.append(_ema_ratio(15, 60))
out.append(_ema_ratio(30, 180))
out.append(_ema_ratio(60, 180))
# accel at 5/15/60/180 (4)
for h in [5, 15, 60, 180]:
if n > 2 * h:
r_now = _safe_ret(close, h)
r_prev = (
float(close[-h - 1] / close[-2 * h - 1] - 1.0)
if close[-2 * h - 1] > 0
else 0.0
)
out.append(r_now - r_prev)
else:
out.append(0.0)
# tick positions (2)
out.append(float(at_tick))
out.append(float(at_tick) / 900.0)
# HLC patterns at 6 horizons (18)
for h in _RETURN_HORIZONS:
hi = _tail(high, h)
lo = _tail(low, h)
cl = _tail(close, h)
c = cl[-1] if len(cl) else 1.0
if c == 0 or not np.isfinite(c):
c = 1.0
hmax = float(np.nanmax(hi)) if len(hi) else c
lmin = float(np.nanmin(lo)) if len(lo) else c
out.append(hmax / c if c else 0.0)
out.append(lmin / c if c else 0.0)
out.append((hmax - lmin) / c if c else 0.0)
# absret skew/kurt at 60/180 (4)
for h in [60, 180]:
w = _tail(np.abs(logrets), h)
out.append(_safe_skew(w))
out.append(_safe_kurt(w))
# corr(vol, |ret|) at 60/180 (2)
for h in [60, 180]:
w1 = _tail(volume, h)
w2 = _tail(np.abs(logrets), h)
if len(w1) > 3 and len(w2) > 3 and w1.std() > 0 and w2.std() > 0:
out.append(float(np.corrcoef(w1, w2)[0, 1]))
else:
out.append(0.0)
# ticks since last up / dn (2)
up_idx = np.where(logrets > 0)[0]
dn_idx = np.where(logrets < 0)[0]
out.append(float(n - 1 - up_idx[-1]) if len(up_idx) else float(n))
out.append(float(n - 1 - dn_idx[-1]) if len(dn_idx) else float(n))
# max drawdown / runup at 60/180 (4)
for h in [60, 180]:
w = _tail(close, h)
if len(w) < 2:
out.append(0.0)
continue
roll_max = np.maximum.accumulate(w)
out.append(float(np.nanmin(w / roll_max - 1.0)))
for h in [60, 180]:
w = _tail(close, h)
if len(w) < 2:
out.append(0.0)
continue
roll_min = np.minimum.accumulate(w)
out.append(float(np.nanmax(w / roll_min - 1.0)))
# fraction above vwmp at 60/180 (2)
for h in [60, 180]:
w = _tail(close, h)
out.append(float((w > vwmp).mean()) if len(w) else 0.0)
# trend strength proxy at 60/180 (2)
for h in [60, 180]:
w = _tail(close, h)
if len(w) < 3:
out.append(0.0)
continue
x = np.arange(len(w), dtype=np.float64)
# slope via np.polyfit deg=1, rescaled by mean price
try:
slope = float(np.polyfit(x, w, 1)[0])
except Exception:
slope = 0.0
m = _safe_mean(w)
out.append(slope / m if m != 0 else 0.0)
# cumulative logret mean/var/skew/kurt over window so far (4)
out.append(_safe_mean(logrets))
out.append(float(np.nanvar(logrets)))
out.append(_safe_skew(logrets))
out.append(_safe_kurt(logrets))
# absret sharpe at 6 horizons (6)
for h in _RETURN_HORIZONS:
w = _tail(np.abs(logrets), h)
m = _safe_mean(w)
s = _safe_std(w)
out.append(m / s if s > 0 else 0.0)
# sortino proxy at 6 horizons (6) — mean / downside_std
for h in _RETURN_HORIZONS:
w = _tail(logrets, h)
m = _safe_mean(w)
dn = w[w < 0]
s = _safe_std(dn)
out.append(m / s if s > 0 else 0.0)
# ret rank at 6 horizons (6)
for h in _RETURN_HORIZONS:
out.append(_rank_of_last(_tail(logrets, h)))
# vol rank at 6 horizons (6)
for h in _RETURN_HORIZONS:
out.append(_rank_of_last(_tail(volume, h)))
# px rank at 6 horizons (6)
for h in _RETURN_HORIZONS:
out.append(_rank_of_last(_tail(close, h)))
# pos frac at 6 horizons (6)
for h in _RETURN_HORIZONS:
w = _tail(logrets, h)
out.append(float((w > 0).mean()) if len(w) else 0.0)
# final 6 summary ratios
r60 = _safe_ret(close, 60)
r180 = _safe_ret(close, 180)
v60 = _safe_mean(_tail(volume, 60))
v180 = _safe_mean(_tail(volume, 180))
rng60 = (
(float(np.nanmax(_tail(high, 60))) - float(np.nanmin(_tail(low, 60))))
if n >= 60
else 0.0
)
rng180 = (
(float(np.nanmax(_tail(high, 180))) - float(np.nanmin(_tail(low, 180))))
if n >= 180
else 0.0
)
t60 = _safe_mean(_tail(trades, 60))
t180 = _safe_mean(_tail(trades, 180))
out.append(r60 / r180 if r180 != 0 else 0.0)
out.append(v60 / v180 if v180 != 0 else 0.0)
out.append(rng60 / rng180 if rng180 != 0 else 0.0)
out.append(t60 / t180 if t180 != 0 else 0.0)
out.append((vwmp / close[-1] - 1.0) if close[-1] > 0 else 0.0)
out.append(float(np.sqrt(np.nansum(logrets ** 2))))
arr = np.asarray(out, dtype=np.float64)
assert arr.shape[0] == 329, f"produced {arr.shape[0]} features, expected 329"
# finalize — replace non-finite with 0
arr = np.where(np.isfinite(arr), arr, 0.0).astype(np.float32)
return arr
__all__ = ["FEATURE_NAMES", "extract"]