eth-arb-trainer / features_state.py
commanderzee's picture
v1_arb trainer code for eth
03d9e7b verified
"""
State / time / arb-awareness features for ARB-MAX. 15 features, fixed order.
"""
from __future__ import annotations
import math
from typing import List
import numpy as np
import pandas as pd
FEATURE_NAMES: List[str] = [
"tick_norm", # at_tick / 900
"seconds_remaining", # 900 - at_tick
"log_seconds_remaining", # log(900 - at_tick + 1)
"min_combined_cost_so_far", # min over ticks 0..at_tick of up_ask1+dn_ask1
"current_best_arb_opportunity", # min_combined_cost - 1.0
"leg_side_stub", # 0 at feature-extraction time
"leg_cost_stub", # 0
"leg_age_stub", # 0
"required_hedge_px_for_be_up", # 1.0 - up_ask_now - fee(up_ask_now)
"required_hedge_px_for_be_dn", # 1.0 - dn_ask_now - fee(dn_ask_now)
"time_since_best_combined_cost_seen", # ticks since min combined cost
"has_potential_arb_been_available_yet", # 1 if any min_combined_so_far < 1
"current_combined_cost", # up_ask_now + dn_ask_now
"combined_cost_minus_min", # current - min so far
"arb_potential_rank_in_window", # rank of current combined cost among history
]
assert len(FEATURE_NAMES) == 15
def _fee_shares(p: float, shares: int = 100) -> float:
if not np.isfinite(p) or p <= 0 or p >= 1:
return 0.0
return 0.072 * p * (1.0 - p) * shares
def _col(df: pd.DataFrame, name: str) -> np.ndarray:
if name in df.columns:
a = df[name].to_numpy(dtype=np.float64)
else:
a = np.full(len(df), np.nan, dtype=np.float64)
return a
def _ff(a: np.ndarray) -> np.ndarray:
out = a.copy()
last = np.nan
for i, v in enumerate(out):
if np.isfinite(v):
last = v
else:
out[i] = last
if not np.isfinite(out[0]):
first = np.nan
for v in out:
if np.isfinite(v):
first = v
break
if np.isfinite(first):
for i in range(len(out)):
if np.isfinite(out[i]):
break
out[i] = first
return np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0)
def extract(window_frame: pd.DataFrame, at_tick: int = 120) -> np.ndarray:
df = window_frame.iloc[: at_tick + 1]
n = len(df)
up_ask = _ff(_col(df, "pm_up_ask_px_1"))
dn_ask = _ff(_col(df, "pm_dn_ask_px_1"))
combined = up_ask + dn_ask
tick_norm = float(at_tick) / 900.0
seconds_remaining = float(900 - at_tick)
log_seconds_remaining = float(math.log(max(seconds_remaining + 1.0, 1.0)))
if len(combined) > 0:
min_combined = float(np.nanmin(combined))
argmin_idx = int(np.nanargmin(combined))
else:
min_combined = 0.0
argmin_idx = 0
current_best_arb = min_combined - 1.0
# per spec, these are stubs since we don't carry an open leg during feature extract
leg_side_stub = 0.0
leg_cost_stub = 0.0
leg_age_stub = 0.0
up_ask_now = float(up_ask[-1]) if len(up_ask) else 0.0
dn_ask_now = float(dn_ask[-1]) if len(dn_ask) else 0.0
# hedge-to-breakeven: buy UP at up_ask_now, need DN to cost <= 1 - up_ask_now - total_fee_share
# total fee per share ~ fee(up) + fee(hedge) / shares. Approximate fee as entry-only for simplicity.
fee_up = _fee_shares(up_ask_now) / 100.0
fee_dn = _fee_shares(dn_ask_now) / 100.0
req_hedge_up = 1.0 - up_ask_now - fee_up
req_hedge_dn = 1.0 - dn_ask_now - fee_dn
time_since_best = float(at_tick - argmin_idx)
has_arb_yet = 1.0 if min_combined < 1.0 else 0.0
current_combined = float(combined[-1]) if len(combined) else 0.0
combined_minus_min = current_combined - min_combined
if len(combined) > 1:
rank = float((combined <= current_combined).mean())
else:
rank = 0.5
out = np.array(
[
tick_norm,
seconds_remaining,
log_seconds_remaining,
min_combined,
current_best_arb,
leg_side_stub,
leg_cost_stub,
leg_age_stub,
req_hedge_up,
req_hedge_dn,
time_since_best,
has_arb_yet,
current_combined,
combined_minus_min,
rank,
],
dtype=np.float64,
)
out = np.where(np.isfinite(out), out, 0.0).astype(np.float32)
assert out.shape[0] == 15
return out
__all__ = ["FEATURE_NAMES", "extract"]