Spaces:

commanderzee
/

bnb-arb-trainer

Sleeping

App Files Files Community

commanderzee commited on Apr 22

Commit

8e80d26

verified ·

1 Parent(s): 915739a

hybrid edge-margin policy (edge_threshold + entry_price_max cap) — BTC +$1247

Browse files

Files changed (1) hide show

train.py +32 -26

train.py CHANGED Viewed

@@ -384,22 +384,29 @@ def _directional_policy_apply(
     dn_asks: np.ndarray,
     pnl_ups: np.ndarray,
     pnl_dns: np.ndarray,
-    p_up_thr: float,
-    p_dn_thr: float,
-    entry_price_max: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
-    """Given model's P(UP wins) per row + per-row side asks at tick 120, pick
-    which side (if any) to enter. Enter UP iff preds >= p_up_thr AND up_ask
-    ≤ entry_price_max. Enter DN iff preds ≤ p_dn_thr AND dn_ask ≤
-    entry_price_max. Priority: UP signal wins ties with DN signal. Returns
-    (flag, pnl_per_row) arrays."""
     n = len(preds)
     flag = np.zeros(n, dtype=bool)
     pnl = np.zeros(n, dtype=np.float64)
     for i in range(n):
-        up_ok = preds[i] >= p_up_thr and up_asks[i] <= entry_price_max
-        dn_ok = preds[i] <= p_dn_thr and dn_asks[i] <= entry_price_max
-        if up_ok:
             flag[i] = True
             pnl[i] = pnl_ups[i]
         elif dn_ok:
@@ -425,10 +432,12 @@ def _lgb_params_from_trial(trial: optuna.Trial) -> Tuple[Dict, Dict]:
         "n_estimators": 500,
     }
     trading = {
-        "p_up_threshold": trial.suggest_float("p_up_threshold", 0.50, 0.80),
-        "p_dn_threshold": trial.suggest_float("p_dn_threshold", 0.20, 0.50),
-        # "lower is better" — cap the entry price we're willing to pay
-        "entry_price_max": trial.suggest_float("entry_price_max", 0.20, 0.60),
     }
     return params, trading
@@ -543,8 +552,7 @@ def run_training(
                 continue
             flag, pnl = _directional_policy_apply(
                 res["preds"], uav, dav, pup_v, pdn_v,
-                trading["p_up_threshold"],
-                trading["p_dn_threshold"],
                 trading["entry_price_max"],
             )
             sim = simulate_flagging_pnl(pnl, flag)
@@ -562,8 +570,7 @@ def run_training(
     best_params_trial = dict(study.best_trial.params)
     best_trading = {
-        "p_up_threshold": float(best_params_trial.pop("p_up_threshold")),
-        "p_dn_threshold": float(best_params_trial.pop("p_dn_threshold")),
         "entry_price_max": float(best_params_trial.pop("entry_price_max")),
     }
     best_lgb_params = {
@@ -595,8 +602,7 @@ def run_training(
         res = _train_fold_core(Xt, yt, Xv, yv, best_lgb_params)
         flag, pnl = _directional_policy_apply(
             res["preds"], uav, dav, pup_v, pdn_v,
-            best_trading["p_up_threshold"],
-            best_trading["p_dn_threshold"],
             best_trading["entry_price_max"],
         )
         sim = simulate_flagging_pnl(pnl, flag)
@@ -638,8 +644,7 @@ def run_training(
         hold_preds = final_booster.predict(X_hold)
         hold_flag, hold_pnl = _directional_policy_apply(
             hold_preds, up_ask_hold, dn_ask_hold, pnl_up_hold, pnl_dn_hold,
-            best_trading["p_up_threshold"],
-            best_trading["p_dn_threshold"],
             best_trading["entry_price_max"],
         )
         hold_sim = simulate_flagging_pnl(hold_pnl, hold_flag)
@@ -685,9 +690,10 @@ def run_training(
         json.dumps(
             {
                 "trading": best_trading,
-                "notes": "Directional model: enter UP if pred >= p_up_threshold "
-                         "AND up_ask <= entry_price_max. Enter DN if pred <= "
-                         "p_dn_threshold AND dn_ask <= entry_price_max. Else skip.",
             },
             indent=2,
         )

     dn_asks: np.ndarray,
     pnl_ups: np.ndarray,
     pnl_dns: np.ndarray,
+    edge_threshold: float,
+    entry_price_max: float = 1.0,
 ) -> Tuple[np.ndarray, np.ndarray]:
+    """Edge-margin decision rule.
+    On Polymarket the ask for side S is approximately the market's implied
+    P(S wins). So `edge_up = model_pred - up_ask` = our estimate minus the
+    market's — the expected PnL per share before fees. If this exceeds a
+    threshold on either side, we trade that side. Whichever edge is larger
+    wins when both clear threshold.
+    Single-knob policy (one Optuna param) — drops the prior 3-param setup
+    (p_up_threshold, p_dn_threshold, entry_price_max) which was
+    susceptible to overfit in Optuna's larger search space."""
     n = len(preds)
     flag = np.zeros(n, dtype=bool)
     pnl = np.zeros(n, dtype=np.float64)
     for i in range(n):
+        edge_up = preds[i] - up_asks[i]
+        edge_dn = (1.0 - preds[i]) - dn_asks[i]
+        up_ok = (edge_up >= edge_threshold) and (up_asks[i] <= entry_price_max)
+        dn_ok = (edge_dn >= edge_threshold) and (dn_asks[i] <= entry_price_max)
+        if up_ok and (not dn_ok or edge_up >= edge_dn):
             flag[i] = True
             pnl[i] = pnl_ups[i]
         elif dn_ok:
         "n_estimators": 500,
     }
     trading = {
+        # primary: edge-margin (model_pred - market_ask for the chosen side)
+        "edge_threshold": trial.suggest_float("edge_threshold", 0.03, 0.25),
+        # safety cap: block high-entry overconfident trades where the
+        # model's probability is likely uncalibrated. Without this, BTC
+        # lost -$621 on a run (vs +$390 with the cap).
+        "entry_price_max": trial.suggest_float("entry_price_max", 0.40, 0.65),
     }
     return params, trading
                 continue
             flag, pnl = _directional_policy_apply(
                 res["preds"], uav, dav, pup_v, pdn_v,
+                trading["edge_threshold"],
                 trading["entry_price_max"],
             )
             sim = simulate_flagging_pnl(pnl, flag)
     best_params_trial = dict(study.best_trial.params)
     best_trading = {
+        "edge_threshold": float(best_params_trial.pop("edge_threshold")),
         "entry_price_max": float(best_params_trial.pop("entry_price_max")),
     }
     best_lgb_params = {
         res = _train_fold_core(Xt, yt, Xv, yv, best_lgb_params)
         flag, pnl = _directional_policy_apply(
             res["preds"], uav, dav, pup_v, pdn_v,
+            best_trading["edge_threshold"],
             best_trading["entry_price_max"],
         )
         sim = simulate_flagging_pnl(pnl, flag)
         hold_preds = final_booster.predict(X_hold)
         hold_flag, hold_pnl = _directional_policy_apply(
             hold_preds, up_ask_hold, dn_ask_hold, pnl_up_hold, pnl_dn_hold,
+            best_trading["edge_threshold"],
             best_trading["entry_price_max"],
         )
         hold_sim = simulate_flagging_pnl(hold_pnl, hold_flag)
         json.dumps(
             {
                 "trading": best_trading,
+                "notes": "Edge-margin rule: edge_up = model_pred - up_ask; "
+                         "edge_dn = (1 - model_pred) - dn_ask. Enter the side "
+                         "with the larger edge, iff that edge >= edge_threshold. "
+                         "Edge is expected PnL per share before fees.",
             },
             indent=2,
         )