Spaces:
Sleeping
Sleeping
File size: 4,580 Bytes
03d9e7b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """
State / time / arb-awareness features for ARB-MAX. 15 features, fixed order.
"""
from __future__ import annotations
import math
from typing import List
import numpy as np
import pandas as pd
FEATURE_NAMES: List[str] = [
"tick_norm", # at_tick / 900
"seconds_remaining", # 900 - at_tick
"log_seconds_remaining", # log(900 - at_tick + 1)
"min_combined_cost_so_far", # min over ticks 0..at_tick of up_ask1+dn_ask1
"current_best_arb_opportunity", # min_combined_cost - 1.0
"leg_side_stub", # 0 at feature-extraction time
"leg_cost_stub", # 0
"leg_age_stub", # 0
"required_hedge_px_for_be_up", # 1.0 - up_ask_now - fee(up_ask_now)
"required_hedge_px_for_be_dn", # 1.0 - dn_ask_now - fee(dn_ask_now)
"time_since_best_combined_cost_seen", # ticks since min combined cost
"has_potential_arb_been_available_yet", # 1 if any min_combined_so_far < 1
"current_combined_cost", # up_ask_now + dn_ask_now
"combined_cost_minus_min", # current - min so far
"arb_potential_rank_in_window", # rank of current combined cost among history
]
assert len(FEATURE_NAMES) == 15
def _fee_shares(p: float, shares: int = 100) -> float:
if not np.isfinite(p) or p <= 0 or p >= 1:
return 0.0
return 0.072 * p * (1.0 - p) * shares
def _col(df: pd.DataFrame, name: str) -> np.ndarray:
if name in df.columns:
a = df[name].to_numpy(dtype=np.float64)
else:
a = np.full(len(df), np.nan, dtype=np.float64)
return a
def _ff(a: np.ndarray) -> np.ndarray:
out = a.copy()
last = np.nan
for i, v in enumerate(out):
if np.isfinite(v):
last = v
else:
out[i] = last
if not np.isfinite(out[0]):
first = np.nan
for v in out:
if np.isfinite(v):
first = v
break
if np.isfinite(first):
for i in range(len(out)):
if np.isfinite(out[i]):
break
out[i] = first
return np.nan_to_num(out, nan=0.0, posinf=0.0, neginf=0.0)
def extract(window_frame: pd.DataFrame, at_tick: int = 120) -> np.ndarray:
df = window_frame.iloc[: at_tick + 1]
n = len(df)
up_ask = _ff(_col(df, "pm_up_ask_px_1"))
dn_ask = _ff(_col(df, "pm_dn_ask_px_1"))
combined = up_ask + dn_ask
tick_norm = float(at_tick) / 900.0
seconds_remaining = float(900 - at_tick)
log_seconds_remaining = float(math.log(max(seconds_remaining + 1.0, 1.0)))
if len(combined) > 0:
min_combined = float(np.nanmin(combined))
argmin_idx = int(np.nanargmin(combined))
else:
min_combined = 0.0
argmin_idx = 0
current_best_arb = min_combined - 1.0
# per spec, these are stubs since we don't carry an open leg during feature extract
leg_side_stub = 0.0
leg_cost_stub = 0.0
leg_age_stub = 0.0
up_ask_now = float(up_ask[-1]) if len(up_ask) else 0.0
dn_ask_now = float(dn_ask[-1]) if len(dn_ask) else 0.0
# hedge-to-breakeven: buy UP at up_ask_now, need DN to cost <= 1 - up_ask_now - total_fee_share
# total fee per share ~ fee(up) + fee(hedge) / shares. Approximate fee as entry-only for simplicity.
fee_up = _fee_shares(up_ask_now) / 100.0
fee_dn = _fee_shares(dn_ask_now) / 100.0
req_hedge_up = 1.0 - up_ask_now - fee_up
req_hedge_dn = 1.0 - dn_ask_now - fee_dn
time_since_best = float(at_tick - argmin_idx)
has_arb_yet = 1.0 if min_combined < 1.0 else 0.0
current_combined = float(combined[-1]) if len(combined) else 0.0
combined_minus_min = current_combined - min_combined
if len(combined) > 1:
rank = float((combined <= current_combined).mean())
else:
rank = 0.5
out = np.array(
[
tick_norm,
seconds_remaining,
log_seconds_remaining,
min_combined,
current_best_arb,
leg_side_stub,
leg_cost_stub,
leg_age_stub,
req_hedge_up,
req_hedge_dn,
time_since_best,
has_arb_yet,
current_combined,
combined_minus_min,
rank,
],
dtype=np.float64,
)
out = np.where(np.isfinite(out), out, 0.0).astype(np.float32)
assert out.shape[0] == 15
return out
__all__ = ["FEATURE_NAMES", "extract"]
|