Spaces:
Sleeping
Sleeping
| """ | |
| State / time / arb-awareness features for ARB-MAX. 15 features, fixed order. | |
| """ | |
| from __future__ import annotations | |
| import math | |
| from typing import List | |
| import numpy as np | |
| import pandas as pd | |
| FEATURE_NAMES: List[str] = [ | |
| "tick_norm", # at_tick / 900 | |
| "seconds_remaining", # 900 - at_tick | |
| "log_seconds_remaining", # log(900 - at_tick + 1) | |
| "min_combined_cost_so_far", # min over ticks 0..at_tick of up_ask1+dn_ask1 | |
| "current_best_arb_opportunity", # min_combined_cost - 1.0 | |
| "leg_side_stub", # 0 at feature-extraction time | |
| "leg_cost_stub", # 0 | |
| "leg_age_stub", # 0 | |
| "required_hedge_px_for_be_up", # 1.0 - up_ask_now - fee(up_ask_now) | |
| "required_hedge_px_for_be_dn", # 1.0 - dn_ask_now - fee(dn_ask_now) | |
| "time_since_best_combined_cost_seen", # ticks since min combined cost | |
| "has_potential_arb_been_available_yet", # 1 if any min_combined_so_far < 1 | |
| "current_combined_cost", # up_ask_now + dn_ask_now | |
| "combined_cost_minus_min", # current - min so far | |
| "arb_potential_rank_in_window", # rank of current combined cost among history | |
| ] | |
| assert len(FEATURE_NAMES) == 15 | |
| def _fee_shares(p: float, shares: int = 100) -> float: | |
| if not np.isfinite(p) or p <= 0 or p >= 1: | |
| return 0.0 | |
| return 0.072 * p * (1.0 - p) * shares | |
| def _col(df: pd.DataFrame, name: str) -> np.ndarray: | |
| if name in df.columns: | |
| a = df[name].to_numpy(dtype=np.float64) | |
| else: | |
| a = np.full(len(df), np.nan, dtype=np.float64) | |
| return a | |
| def _ff(a: np.ndarray) -> np.ndarray: | |
| out = a.copy() | |
| last = np.nan | |
| for i, v in enumerate(out): | |
| if np.isfinite(v): | |
| last = v | |
| else: | |
| out[i] = last | |
| if not np.isfinite(out[0]): | |
| first = np.nan | |
| for v in out: | |
| if np.isfinite(v): | |
| first = v | |
| break | |
| if np.isfinite(first): | |
| for i in range(len(out)): | |
| if np.isfinite(out[i]): | |
| break | |
| out[i] = first | |
| return np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0) | |
| def extract(window_frame: pd.DataFrame, at_tick: int = 120) -> np.ndarray: | |
| df = window_frame.iloc[: at_tick + 1] | |
| n = len(df) | |
| up_ask = _ff(_col(df, "pm_up_ask_px_1")) | |
| dn_ask = _ff(_col(df, "pm_dn_ask_px_1")) | |
| combined = up_ask + dn_ask | |
| tick_norm = float(at_tick) / 900.0 | |
| seconds_remaining = float(900 - at_tick) | |
| log_seconds_remaining = float(math.log(max(seconds_remaining + 1.0, 1.0))) | |
| if len(combined) > 0: | |
| min_combined = float(np.nanmin(combined)) | |
| argmin_idx = int(np.nanargmin(combined)) | |
| else: | |
| min_combined = 0.0 | |
| argmin_idx = 0 | |
| current_best_arb = min_combined - 1.0 | |
| # per spec, these are stubs since we don't carry an open leg during feature extract | |
| leg_side_stub = 0.0 | |
| leg_cost_stub = 0.0 | |
| leg_age_stub = 0.0 | |
| up_ask_now = float(up_ask[-1]) if len(up_ask) else 0.0 | |
| dn_ask_now = float(dn_ask[-1]) if len(dn_ask) else 0.0 | |
| # hedge-to-breakeven: buy UP at up_ask_now, need DN to cost <= 1 - up_ask_now - total_fee_share | |
| # total fee per share ~ fee(up) + fee(hedge) / shares. Approximate fee as entry-only for simplicity. | |
| fee_up = _fee_shares(up_ask_now) / 100.0 | |
| fee_dn = _fee_shares(dn_ask_now) / 100.0 | |
| req_hedge_up = 1.0 - up_ask_now - fee_up | |
| req_hedge_dn = 1.0 - dn_ask_now - fee_dn | |
| time_since_best = float(at_tick - argmin_idx) | |
| has_arb_yet = 1.0 if min_combined < 1.0 else 0.0 | |
| current_combined = float(combined[-1]) if len(combined) else 0.0 | |
| combined_minus_min = current_combined - min_combined | |
| if len(combined) > 1: | |
| rank = float((combined <= current_combined).mean()) | |
| else: | |
| rank = 0.5 | |
| out = np.array( | |
| [ | |
| tick_norm, | |
| seconds_remaining, | |
| log_seconds_remaining, | |
| min_combined, | |
| current_best_arb, | |
| leg_side_stub, | |
| leg_cost_stub, | |
| leg_age_stub, | |
| req_hedge_up, | |
| req_hedge_dn, | |
| time_since_best, | |
| has_arb_yet, | |
| current_combined, | |
| combined_minus_min, | |
| rank, | |
| ], | |
| dtype=np.float64, | |
| ) | |
| out = np.where(np.isfinite(out), out, 0.0).astype(np.float32) | |
| assert out.shape[0] == 15 | |
| return out | |
| __all__ = ["FEATURE_NAMES", "extract"] | |