Spaces:

KarlQuant
/

Quasar-Executo

Running

App Files Files Community

KarlQuant commited on Apr 3

Commit

640294f

verified ·

1 Parent(s): f1ebef0

Upload Quasar_axrvi_ranker.py

Browse files

Files changed (1) hide show

Quasar_axrvi_ranker.py +257 -104

Quasar_axrvi_ranker.py CHANGED Viewed

@@ -392,8 +392,8 @@ GAMMA           = 0.99
 LAMBDA_RANK     = 0.4
 LAMBDA_RISK     = 0.3
 REPLAY_CAPACITY = 10_000
-TRAIN_BATCH     = 16   # Reduced from 64 — fills after ~3–5 min of live trading
-TRAIN_EVERY_N   = 10   # Reduced from 50 — checks buffer every ~2 min
 # Connection
 WS_RECONNECT_DELAY   = 5
@@ -1751,9 +1751,14 @@ class STDPAdjacencyLayer(nn.Module):
             regime_weights: (B, N) optional multiplier (e.g., crash probability)
         """
         B, N, _ = asset_embeddings.shape
         # Activity proxy: L2 norm normalized across batch
-        activity = asset_embeddings.norm(dim=-1)
         mu = activity.mean(dim=0, keepdim=True)
         std = activity.std(dim=0, keepdim=True) + 1e-6
         z_activity = (activity - mu) / std
@@ -1787,8 +1792,16 @@ class STDPAdjacencyLayer(nn.Module):
         self.step_count += 1
     def get_adapted_bias(self, base_adj_bias: torch.Tensor, N: int) -> torch.Tensor:
-        """Return base bias + accumulated STDP delta."""
-        return base_adj_bias[:, :N, :N] + self.stdp_delta[:, :N, :N]
     def reset_plasticity(self) -> None:
         """Reset STDP state (call on regime shift detection)."""
@@ -2015,7 +2028,7 @@ class HyperbolicCrossAssetLayer(nn.Module):
             nn.Linear(d_model, d_model)
         )
-        # Learnable adjacency bias
         self.adj_bias = nn.Parameter(torch.zeros(num_heads, self.MAX_ASSETS, self.MAX_ASSETS))
         # STDP plasticity
@@ -2057,6 +2070,12 @@ class HyperbolicCrossAssetLayer(nn.Module):
         adapted_bias = self.stdp.get_adapted_bias(self.adj_bias, N)
         attn = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
         attn = attn + adapted_bias.unsqueeze(0)
@@ -2458,8 +2477,25 @@ class AXRVINet(nn.Module):
         [5] KANScoringHead → significance scores
     """
-    def __init__(self, num_assets: int = 5, config: AXRVIConfig = DEFAULT_CONFIG):
         super().__init__()
         self.num_assets = num_assets
         self.config = config
         self.d_model = config.d_model
@@ -2809,8 +2845,12 @@ def create_axrvi_v8(num_assets: int = 5,
 def _axrvi_config_from_ranker_config(rc: "AssetRankerConfig") -> AXRVIConfig:
     """
     Derive an ``AXRVIConfig`` from an ``AssetRankerConfig`` so all dimension
-    constants live in one place.  Used by QuasarAXRVIBridge.initialize() and
-    any stand-alone callers that have an AssetRankerConfig available.
     The mapping is:
         rc.feature_dim          → AXRVIConfig.feature_dim   (26)
@@ -3325,6 +3365,16 @@ class HybridTrainer:
         self.train_step   = 0
         self.loss_history = deque(maxlen=200)
     def train_on_batch(self, episodes: List[dict]) -> dict:
         """
         v7 training — 5-loss objective:
@@ -3369,88 +3419,84 @@ class HybridTrainer:
         imp_w      = torch.tensor([ep.get("importance_weight", 1.0) for ep in valid],
                                    dtype=torch.float32, device=device)
-        out       = self.model(seq_t)
-        scores    = out["significance_logits"]
-        value     = out["value"].squeeze(-1)       # (B, N) — median Q̂_{0.5}
-        log_var   = out["log_var"].squeeze(-1)     # (B, N)
-        quantiles = out["quantiles"]               # (B, N, n_quantiles)
-        with torch.no_grad():
-            next_out    = self.model(next_seq_t)
-            best_next_v = next_out["value"].squeeze(-1).max(dim=1).values
-        # L_rl — TD error
-        selected_v = value.gather(1, selected.unsqueeze(1)).squeeze(1)
-        td_target  = rewards + self.gamma * best_next_v
-        l_rl       = (imp_w * F.mse_loss(selected_v, td_target.detach(),
-                                          reduction="none")).mean()
-        # L_ce — [S1] Value-consistency: selected_v ≈ E[R_{t+τ} | F_t]
-        l_ce = (imp_w * F.mse_loss(selected_v, rewards.detach(),
-                                    reduction="none")).mean()
-        # L_rank — Ranking margin
-        best_idx  = pnl_arr.argmax(dim=1)
-        worst_idx = pnl_arr.argmin(dim=1)
-        l_rank = F.relu(
-            self.rank_margin
-            - (scores.gather(1, best_idx.unsqueeze(1)).squeeze(1)
-               - scores.gather(1, worst_idx.unsqueeze(1)).squeeze(1))
-        ).mean()
-        # L_risk — Uncertainty penalty
-        l_risk = torch.exp(log_var.gather(1, selected.unsqueeze(1)).squeeze(1)).mean()
-        # L_ql — Quantile / pinball loss [v7]: calibrates full return distribution
-        # For the selected asset: gather its quantile predictions (B, n_q)
-        sel_q = quantiles.gather(
-            1,
-            selected.unsqueeze(1).unsqueeze(2).expand(-1, 1, quantiles.shape[-1])
-        ).squeeze(1)                                        # (B, n_quantiles)
-        tau   = self.model.distributional.quantile_levels  # (n_quantiles,)
-        u     = rewards.unsqueeze(1) - sel_q               # (B, n_quantiles)
-        # ρ_τ(u) = u·τ if u≥0 else u·(τ−1)
-        l_ql  = torch.max(tau * u, (tau - 1.0) * u).mean()
-        # L_moe — MoE load-balance regularisation [v8]
-        # Pulls the scalar already computed in MoETemporalEncoder.forward()
-        l_moe  = out.get("moe_balance_loss",  torch.tensor(0.0, device=seq_t.device))
-        # L_gate — DendriticFFN gate-entropy regularisation [v8]
-        # Accumulated from every HyperbolicCrossAssetLayer in AXRVINet.forward()
-        l_gate = out.get("gate_entropy_loss", torch.tensor(0.0, device=seq_t.device))
-        # L_crps — distributional calibration (CRPS proper scoring rule) [v8 / Bug 4 fix]
-        # Previously computed only in v8_total_loss() which was never called.
-        # Now integrated here so the quantile head is directly calibrated during training.
-        ql_levels = self.model.distributional.quantile_levels   # (n_quantiles,)
-        l_crps = crps_loss(quantiles, ql_levels, rewards)
-        # L_rent — regime-router entropy regularisation [v8 / Bug 4 fix]
-        # Maximise regime diversity by minimising the negative entropy.
-        r_probs    = out["regime_probs"] + 1e-8              # (B, N, n_regimes)
-        regime_ent = -(r_probs * r_probs.log()).sum(-1).mean()  # positive scalar
-        l_rent     = -regime_ent   # negate: minimise loss → maximise entropy
-        # Total loss — 9-component objective [v8 complete]
-        # total = L_rl + λ_ce·L_ce + λ_rank·L_rank + λ_risk·L_risk
-        #       + λ_ql·L_ql + λ_moe·L_moe + λ_gate·L_gate
-        #       + λ_crps·L_crps + λ_rent·L_rent
-        loss = (l_rl
-                + self.lambda_ce    * l_ce
-                + self.lambda_rank  * l_rank
-                + self.lambda_risk  * l_risk
-                + self.lambda_ql    * l_ql
-                + self.lambda_moe   * l_moe
-                + self.lambda_gate  * l_gate
-                + self.lambda_crps  * l_crps
-                + self.lambda_rent  * l_rent)
         self.optimizer.zero_grad()
-        loss.backward()
         torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
-        self.optimizer.step()
         # scheduler.step() is called externally (once per training epoch / rank
         # cycle) via step_scheduler(), NOT here per batch.  Calling it here
         # compressed the entire T_max=1000 cosine schedule into ~14 hours of
@@ -4500,7 +4546,7 @@ class QuasarAXRVIBridge:
         reward_strategy: str  = "simple",
         hub_ws_url:      str  = os.environ.get("QUASAR_HUB_URL", "ws://localhost:7860/ws/subscribe"),
         enable_logging:  bool = True,
-        checkpoint_dir:  str  = "./Ranker2",   # folder for full-state checkpoints
         resume:          bool = False,         # start afresh by default; set True to resume
     ):
         self.config          = config or AssetRankerConfig()
@@ -4812,6 +4858,15 @@ class QuasarAXRVIBridge:
                     with self.position_mgr._lock:
                         rejected = self.position_mgr._open_trades.pop(trade_id, None)
                     if rejected:
                         logger.error(
                             f"❌ [{rejected.asset}] BROKER REJECTED buy | "
                             f"trade_id={trade_id} | code={code} | {message}"
@@ -4900,8 +4955,16 @@ class QuasarAXRVIBridge:
         else:
             logger.warning(
                 f"[Deriv] Buy confirmation — no trade_id for req_id={req_id} | "
-                f"contract_id={contract_id} (late or orphaned confirmation)"
             )
     def _on_poc_update(self, poc: dict, raw_msg: dict) -> None:
         """
@@ -5257,6 +5320,12 @@ class QuasarAXRVIBridge:
         for asset in departed:
             trade = self.position_mgr.get_open_trade_by_asset(asset)
             if trade:
                 streamer = self.price_streamers.get(asset)
                 price    = streamer.latest_mid if streamer else trade.entry_price
                 logger.info(
@@ -5270,11 +5339,18 @@ class QuasarAXRVIBridge:
     # ── Position monitoring ────────────────────────────────────────────────────────────
     async def _close_position(self, trade_id: str, exit_price: float) -> None:
-        # ── Early-exit sell on Deriv ──────────────────────────────────────────
-        # Retrieve contract_id BEFORE close_trade() removes the trade from
-        # _open_trades.  price=0 means "sell at best available market price".
         with self.position_mgr._lock:
             open_trade = self.position_mgr._open_trades.get(trade_id)
             cid = open_trade.contract_id if open_trade else None
         if cid and self.ws_client and self.ws_client.connected:
@@ -5356,13 +5432,75 @@ class QuasarAXRVIBridge:
         After any position is closed, if open_trade_count drops below 2, immediately
         call rank_and_gate() to refill.  This ensures the 2-trade minimum is maintained
         continuously without waiting for the next scheduled _rank_loop cycle.
         """
         sc = self.config.shreve_config
         while self.running:
             try:
                 closed_any = False  # track whether we closed a trade this tick
                 for trade in self.position_mgr.get_open_trades():
                     streamer = self.price_streamers.get(trade.asset)
                     if not streamer:
                         continue
@@ -5384,8 +5522,6 @@ class QuasarAXRVIBridge:
                             raw_log_ret = math.log(price / trade.entry_price)
                             sign        = 1.0 if trade.direction == TradeDirection.LONG else -1.0
                             # Fees for G_t must be on stake, NOT on spot price.
-                            # price * commission_rate would be e.g. 316738 * 0.001 = $316 (wrong)
-                            # stake * commission_rate is e.g. 1.0 * 0.001 = $0.001 (correct)
                             fees        = self.trade_config.amount * self.trade_config.commission_rate
                             slippage    = self.trade_config.slippage_bps / 10_000.0
                             g_t         = sign * raw_log_ret - fees / price - slippage
@@ -5932,6 +6068,21 @@ class QuasarAXRVIBridge:
         next_seq_t, _, _ = self._build_input_tensors()
         next_sequences   = next_seq_t.squeeze(0).numpy()           # (N, T, F)
         pnl_proxy = np.zeros(n_assets, dtype=np.float32)
         if asset_id in self.config.asset_symbols:
             pnl_proxy[self.config.asset_symbols.index(asset_id)] = float(reward)
@@ -5945,6 +6096,8 @@ class QuasarAXRVIBridge:
             # Girsanov priority metadata [S2/GirsanovReplayBuffer]
             "volatility":     ep["volatility"],
             "td_error":       abs(reward),
         })
         if asset_id in self.config.asset_symbols:
@@ -6038,7 +6191,7 @@ class RankerCheckpointManager:
                On load, missing local files are pulled from the repo first.
                Requires HF_TOKEN env-var with write permission.
-    Fallback : Local disk  ./Ranker2/
                Used when HF_TOKEN is absent or upload/download fails.
                All saves still succeed locally even without network access.
@@ -6092,7 +6245,7 @@ class RankerCheckpointManager:
     Usage
     ─────
-    mgr = RankerCheckpointManager(checkpoint_dir="./Ranker2",
                                    save_interval_seconds=1800)
     mgr.load(bridge)              # once after initialize()
     mgr.maybe_save(bridge)        # call frequently; respects save_interval_seconds
@@ -6108,7 +6261,7 @@ class RankerCheckpointManager:
     def __init__(
         self,
-        checkpoint_dir:         str   = "./Ranker2",
         save_interval_seconds:  float = 1800.0,   # 30 minutes
     ):
         import pathlib
@@ -6162,7 +6315,7 @@ class RankerCheckpointManager:
             )
             logger.info(
                 f"[RankerCheckpoint] ☁️  Uploaded {local_path.name} → "
-                f"hf://{self.HF_REPO_ID}/Ranker2/{local_path.name}"
             )
             return True
         except Exception as exc:
@@ -6193,7 +6346,7 @@ class RankerCheckpointManager:
             )
             logger.info(
                 f"[RankerCheckpoint] ⬇️  Downloaded {filename} from "
-                f"hf://{self.HF_REPO_ID}/Ranker2/"
             )
             return True
         except Exception as exc:
@@ -6734,7 +6887,7 @@ async def run_live_trading_system(
     hub_ws_url:      str  = "ws://localhost:7860/ws/subscribe",
     enable_logging:  bool = True,
     shreve_config:   Optional[ShreveConfig] = None,
-    checkpoint_dir:  str  = "./Ranker2",
     resume:          bool = False,   # start fresh by default
 ) -> None:
     config = AssetRankerConfig(
@@ -7046,8 +7199,8 @@ def _parse_args():
                         help="[S6/S8] Trade horizon τ in seconds (default 60)")
     parser.add_argument("--martingale-epsilon", type=float, default=0.05,
                         help="[S7] Gate E martingale deviation threshold (default 0.05)")
-    parser.add_argument("--checkpoint-dir", default="./Ranker2",
-                        help="Directory for full-state checkpoints (default ./Ranker2)")
     parser.add_argument("--resume", action="store_true",
                         help="Resume training from the latest saved checkpoint (default: start fresh)")
     return parser.parse_args(filtered)

 LAMBDA_RANK     = 0.4
 LAMBDA_RISK     = 0.3
 REPLAY_CAPACITY = 10_000
+TRAIN_BATCH     = 8    # Lowered from 16 — fills after ~8 closed trades (~2-4 min)
+TRAIN_EVERY_N   = 5   # Lowered from 10 — checks buffer every ~25s
 # Connection
 WS_RECONNECT_DELAY   = 5
             regime_weights: (B, N) optional multiplier (e.g., crash probability)
         """
         B, N, _ = asset_embeddings.shape
+        # Clamp N to max_assets — extra assets are simply not tracked
+        N = min(N, self.max_assets)
+        # Use only the first N (clamped) assets
+        asset_emb_n = asset_embeddings[:, :N, :]
         # Activity proxy: L2 norm normalized across batch
+        activity = asset_emb_n.norm(dim=-1)
         mu = activity.mean(dim=0, keepdim=True)
         std = activity.std(dim=0, keepdim=True) + 1e-6
         z_activity = (activity - mu) / std
         self.step_count += 1
     def get_adapted_bias(self, base_adj_bias: torch.Tensor, N: int) -> torch.Tensor:
+        """Return base bias + accumulated STDP delta, clamped to registered max_assets."""
+        N_clamped = min(N, self.max_assets)
+        if N > self.max_assets:
+            # Pad with zeros for the extra assets beyond max_assets
+            pad = N - self.max_assets
+            padded_delta = F.pad(self.stdp_delta[:, :N_clamped, :N_clamped],
+                                 (0, pad, 0, pad), value=0.0)
+            padded_base  = base_adj_bias[:, :N, :N]
+            return padded_base + padded_delta
+        return base_adj_bias[:, :N_clamped, :N_clamped] + self.stdp_delta[:, :N_clamped, :N_clamped]
     def reset_plasticity(self) -> None:
         """Reset STDP state (call on regime shift detection)."""
             nn.Linear(d_model, d_model)
         )
+        # Learnable adjacency bias — sized to MAX_ASSETS; dynamically padded in forward()
         self.adj_bias = nn.Parameter(torch.zeros(num_heads, self.MAX_ASSETS, self.MAX_ASSETS))
         # STDP plasticity
         adapted_bias = self.stdp.get_adapted_bias(self.adj_bias, N)
+        # If N > MAX_ASSETS the adapted_bias may be smaller; pad with zeros
+        if adapted_bias.shape[-1] < N or adapted_bias.shape[-2] < N:
+            pad_r = N - adapted_bias.shape[-1]
+            pad_c = N - adapted_bias.shape[-2]
+            adapted_bias = F.pad(adapted_bias, (0, pad_r, 0, pad_c), value=0.0)
         attn = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
         attn = attn + adapted_bias.unsqueeze(0)
         [5] KANScoringHead → significance scores
     """
+    def __init__(self, num_assets: int = 5, config: AXRVIConfig = DEFAULT_CONFIG,
+                 feature_dim: int = None, seq_len: int = None):
+        """
+        Args:
+            num_assets  : number of parallel asset streams (N dimension)
+            config      : AXRVIConfig — all hyperparameters
+            feature_dim : override config.feature_dim (backward compat)
+            seq_len     : override config.seq_len (backward compat)
+        """
         super().__init__()
+        # Apply any direct overrides so callers that pass feature_dim/seq_len
+        # directly (e.g. test_components) still get the right architecture.
+        if feature_dim is not None or seq_len is not None:
+            import copy
+            config = copy.copy(config)
+            if feature_dim is not None:
+                config.feature_dim = feature_dim
+            if seq_len is not None:
+                config.seq_len = seq_len
         self.num_assets = num_assets
         self.config = config
         self.d_model = config.d_model
 def _axrvi_config_from_ranker_config(rc: "AssetRankerConfig") -> AXRVIConfig:
     """
     Derive an ``AXRVIConfig`` from an ``AssetRankerConfig`` so all dimension
+    constants live in one place.
+    Called by ``QuasarAXRVIBridge.initialize()`` to construct the neural-net
+    config directly from the top-level ranker config, avoiding duplicated
+    constants.  Any stand-alone caller that has an ``AssetRankerConfig`` may
+    use this function as well.
     The mapping is:
         rc.feature_dim          → AXRVIConfig.feature_dim   (26)
         self.train_step   = 0
         self.loss_history = deque(maxlen=200)
+        # ── AMP (Automatic Mixed Precision) ──────────────────────────────────
+        # Reads use_amp from the model's AXRVIConfig so the flag is the single
+        # source of truth.  Falls back to False on CPU (AMP is CUDA-only).
+        _cfg = getattr(model, "config", None)
+        _use_amp = bool(getattr(_cfg, "use_amp", False))
+        _device  = next(model.parameters()).device
+        # AMP is only meaningful on CUDA; silently disable on CPU
+        self.use_amp = _use_amp and _device.type == "cuda"
+        self.scaler  = torch.cuda.amp.GradScaler(enabled=self.use_amp)
     def train_on_batch(self, episodes: List[dict]) -> dict:
         """
         v7 training — 5-loss objective:
         imp_w      = torch.tensor([ep.get("importance_weight", 1.0) for ep in valid],
                                    dtype=torch.float32, device=device)
+        # ── Forward pass (AMP-aware) ──────────────────────────────────────────
+        with torch.cuda.amp.autocast(enabled=self.use_amp):
+            out       = self.model(seq_t)
+            scores    = out["significance_logits"]
+            value     = out["value"].squeeze(-1)       # (B, N) — median Q̂_{0.5}
+            log_var   = out["log_var"].squeeze(-1)     # (B, N)
+            quantiles = out["quantiles"]               # (B, N, n_quantiles)
+            with torch.no_grad():
+                next_out    = self.model(next_seq_t)
+                best_next_v = next_out["value"].squeeze(-1).max(dim=1).values
+            # L_rl — TD error
+            selected_v = value.gather(1, selected.unsqueeze(1)).squeeze(1)
+            td_target  = rewards + self.gamma * best_next_v
+            l_rl       = (imp_w * F.mse_loss(selected_v, td_target.detach(),
+                                              reduction="none")).mean()
+            # L_ce — [S1] Value-consistency: selected_v ≈ E[R_{t+τ} | F_t]
+            l_ce = (imp_w * F.mse_loss(selected_v, rewards.detach(),
+                                        reduction="none")).mean()
+            # L_rank — Ranking margin
+            best_idx  = pnl_arr.argmax(dim=1)
+            worst_idx = pnl_arr.argmin(dim=1)
+            l_rank = F.relu(
+                self.rank_margin
+                - (scores.gather(1, best_idx.unsqueeze(1)).squeeze(1)
+                   - scores.gather(1, worst_idx.unsqueeze(1)).squeeze(1))
+            ).mean()
+            # L_risk — Uncertainty penalty
+            l_risk = torch.exp(log_var.gather(1, selected.unsqueeze(1)).squeeze(1)).mean()
+            # L_ql — Quantile / pinball loss [v7]: calibrates full return distribution
+            sel_q = quantiles.gather(
+                1,
+                selected.unsqueeze(1).unsqueeze(2).expand(-1, 1, quantiles.shape[-1])
+            ).squeeze(1)                                        # (B, n_quantiles)
+            tau   = self.model.distributional.quantile_levels  # (n_quantiles,)
+            u     = rewards.unsqueeze(1) - sel_q               # (B, n_quantiles)
+            l_ql  = torch.max(tau * u, (tau - 1.0) * u).mean()
+            # L_moe — MoE load-balance regularisation [v8]
+            l_moe  = out.get("moe_balance_loss",  torch.tensor(0.0, device=seq_t.device))
+            # L_gate — DendriticFFN gate-entropy regularisation [v8]
+            l_gate = out.get("gate_entropy_loss", torch.tensor(0.0, device=seq_t.device))
+            # L_crps — distributional calibration (CRPS proper scoring rule) [v8]
+            ql_levels = self.model.distributional.quantile_levels
+            l_crps = crps_loss(quantiles, ql_levels, rewards)
+            # L_rent — regime-router entropy regularisation [v8]
+            r_probs    = out["regime_probs"] + 1e-8              # (B, N, n_regimes)
+            regime_ent = -(r_probs * r_probs.log()).sum(-1).mean()
+            l_rent     = -regime_ent
+            # Total loss — 9-component objective [v8 complete]
+            loss = (l_rl
+                    + self.lambda_ce    * l_ce
+                    + self.lambda_rank  * l_rank
+                    + self.lambda_risk  * l_risk
+                    + self.lambda_ql    * l_ql
+                    + self.lambda_moe   * l_moe
+                    + self.lambda_gate  * l_gate
+                    + self.lambda_crps  * l_crps
+                    + self.lambda_rent  * l_rent)
+        # ── Backward pass (AMP-aware) ─────────────────────────────────────────
         self.optimizer.zero_grad()
+        self.scaler.scale(loss).backward()
+        # Unscale before clipping so grad norms are in the original fp32 scale
+        self.scaler.unscale_(self.optimizer)
         torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+        self.scaler.step(self.optimizer)
+        self.scaler.update()
         # scheduler.step() is called externally (once per training epoch / rank
         # cycle) via step_scheduler(), NOT here per batch.  Calling it here
         # compressed the entire T_max=1000 cosine schedule into ~14 hours of
         reward_strategy: str  = "simple",
         hub_ws_url:      str  = os.environ.get("QUASAR_HUB_URL", "ws://localhost:7860/ws/subscribe"),
         enable_logging:  bool = True,
+        checkpoint_dir:  str  = "./Ranker3",   # folder for full-state checkpoints
         resume:          bool = False,         # start afresh by default; set True to resume
     ):
         self.config          = config or AssetRankerConfig()
                     with self.position_mgr._lock:
                         rejected = self.position_mgr._open_trades.pop(trade_id, None)
                     if rejected:
+                        # ── Clean up all trade-level state so nothing leaks ──
+                        # _pending_episodes: episode was opened by _open_pending_episode
+                        #   but there will never be a close event — discard it.
+                        self._pending_episodes.pop(trade_id, None)
+                        # _trade_tick_counts: monitor_positions tracks this; clear it.
+                        self._trade_tick_counts.pop(trade_id, None)
+                        # portfolio_risk_mgr: committed capital was registered in
+                        #   process_axrvi_signal; release it with 0 PnL.
+                        self.portfolio_risk_mgr.register_close(trade_id, 0.0)
                         logger.error(
                             f"❌ [{rejected.asset}] BROKER REJECTED buy | "
                             f"trade_id={trade_id} | code={code} | {message}"
         else:
             logger.warning(
                 f"[Deriv] Buy confirmation — no trade_id for req_id={req_id} | "
+                f"contract_id={contract_id} (late or orphaned confirmation) — "
+                f"sending immediate SELL to avoid dangling open contract on broker side"
             )
+            # The contract is live on Deriv but we have no internal tracking for
+            # it.  Immediately sell at market so we don't accumulate un-tracked
+            # open contracts that drain the account without appearing in our books.
+            if self.ws_client and self.ws_client.connected:
+                asyncio.get_running_loop().create_task(
+                    self.ws_client.send_message({"sell": contract_id, "price": 0})
+                )
     def _on_poc_update(self, poc: dict, raw_msg: dict) -> None:
         """
         for asset in departed:
             trade = self.position_mgr.get_open_trade_by_asset(asset)
             if trade:
+                # Skip if already closing — SELL was already sent to broker
+                if trade.state == PositionState.CLOSING:
+                    logger.debug(
+                        f"[Rotation] ⏩ {asset} already CLOSING — skipping duplicate SELL"
+                    )
+                    continue
                 streamer = self.price_streamers.get(asset)
                 price    = streamer.latest_mid if streamer else trade.entry_price
                 logger.info(
     # ── Position monitoring ────────────────────────────────────────────────────────────
     async def _close_position(self, trade_id: str, exit_price: float) -> None:
+        # ── Prevent re-sending SELL for a contract already in CLOSING state ─
         with self.position_mgr._lock:
             open_trade = self.position_mgr._open_trades.get(trade_id)
+            if open_trade is None:
+                return   # already closed
+            if open_trade.state == PositionState.CLOSING:
+                # Sell already sent; don't spam the broker — just wait for POC
+                logger.debug(
+                    f"[{trade_id}] ⏳ Already CLOSING — skipping duplicate SELL | "
+                    f"contract_id={open_trade.contract_id}"
+                )
+                return
             cid = open_trade.contract_id if open_trade else None
         if cid and self.ws_client and self.ws_client.connected:
         After any position is closed, if open_trade_count drops below 2, immediately
         call rank_and_gate() to refill.  This ensures the 2-trade minimum is maintained
         continuously without waiting for the next scheduled _rank_loop cycle.
+        CLOSING TIMEOUT FIX (v6.1):
+        If a trade remains in CLOSING state for > 10 seconds without a terminal
+        event from the broker, force-close it locally to prevent stuck trades.
         """
         sc = self.config.shreve_config
+        CLOSING_TIMEOUT_SECONDS = 10.0   # Maximum time to wait for broker terminal event
         while self.running:
             try:
                 closed_any = False  # track whether we closed a trade this tick
                 for trade in self.position_mgr.get_open_trades():
+                    # ── CLOSING TIMEOUT HANDLER ──────────────────────────────────
+                    # If trade has been CLOSING for > CLOSING_TIMEOUT_SECONDS,
+                    # force-close it locally (broker never responded)
+                    if trade.state == PositionState.CLOSING:
+                        closing_duration = time.time() - trade.exit_time if trade.exit_time else 0
+                        if closing_duration > CLOSING_TIMEOUT_SECONDS:
+                            logger.warning(
+                                f"[{trade.asset}] ⚠️  CLOSING TIMEOUT | "
+                                f"trade_id={trade.trade_id} | "
+                                f"contract_id={trade.contract_id} | "
+                                f"stuck in CLOSING for {closing_duration:.1f}s — "
+                                f"forcing local close"
+                            )
+                            # Force close locally (broker never responded)
+                            # Use current price as exit price
+                            streamer = self.price_streamers.get(trade.asset)
+                            price = streamer.latest_mid if streamer else trade.entry_price
+                            # Estimate profit from broker data if available
+                            if trade.profit is not None:
+                                profit = trade.profit
+                            else:
+                                # Fallback: estimate from price movement
+                                if price > 0 and trade.entry_price > 0:
+                                    pct_move = (price - trade.entry_price) / trade.entry_price
+                                    sign = 1.0 if trade.direction == TradeDirection.LONG else -1.0
+                                    mult = ASSET_MULTIPLIER.get(trade.asset, 50)
+                                    stake = trade.buy_price if (trade.buy_price and trade.buy_price > 0) else 1.0
+                                    profit = sign * pct_move * stake * mult
+                                else:
+                                    profit = 0.0
+                            closed_trade = self.position_mgr.close_trade_from_broker(
+                                trade_id=trade.trade_id,
+                                status="timeout",
+                                profit=profit,
+                                sell_price=price,
+                                exit_tick=price,
+                            )
+                            if closed_trade:
+                                reward = self._reward_from_broker(closed_trade)
+                                self.portfolio_risk_mgr.register_close(trade.trade_id, closed_trade.realized_pnl)
+                                self._close_pending_episode(trade.trade_id, reward)
+                                self._trade_tick_counts.pop(trade.trade_id, None)
+                                self.stats["trades_closed"] += 1
+                                self.stats["total_pnl"] += closed_trade.realized_pnl
+                                closed_any = True
+                                logger.info(
+                                    f"💰 [{closed_trade.asset}] TRADE FORCE-CLOSED (timeout) | "
+                                    f"reward={reward:+.6f} | profit={profit:+.4f}"
+                                )
+                        continue  # Skip other checks for CLOSING trades
                     streamer = self.price_streamers.get(trade.asset)
                     if not streamer:
                         continue
                             raw_log_ret = math.log(price / trade.entry_price)
                             sign        = 1.0 if trade.direction == TradeDirection.LONG else -1.0
                             # Fees for G_t must be on stake, NOT on spot price.
                             fees        = self.trade_config.amount * self.trade_config.commission_rate
                             slippage    = self.trade_config.slippage_bps / 10_000.0
                             g_t         = sign * raw_log_ret - fees / price - slippage
         next_seq_t, _, _ = self._build_input_tensors()
         next_sequences   = next_seq_t.squeeze(0).numpy()           # (N, T, F)
+        # ── Episode validity check: s_t and s_{t+1} must differ ────────────
+        # If they are identical the TD error collapses to zero and training
+        # produces no gradient.  This happens when a trade opens and closes
+        # within the same rank cycle before any price ticks arrive.
+        # In that case we still push the episode but tag it so HybridTrainer
+        # can apply a lower importance weight.
+        state_diff = float(np.mean(np.abs(next_sequences - sequences)))
+        if state_diff < 1e-6:
+            logger.warning(
+                f"[_close_pending_episode] [{asset_id}] s_t ≈ s_{{t+1}} "
+                f"(diff={state_diff:.2e}) — episode may not produce useful gradient. "
+                f"Trade may have closed before a rank cycle completed."
+            )
+            # Still push — the buffer needs data; the trainer will use low importance_weight
         pnl_proxy = np.zeros(n_assets, dtype=np.float32)
         if asset_id in self.config.asset_symbols:
             pnl_proxy[self.config.asset_symbols.index(asset_id)] = float(reward)
             # Girsanov priority metadata [S2/GirsanovReplayBuffer]
             "volatility":     ep["volatility"],
             "td_error":       abs(reward),
+            # State diversity marker — used for importance weighting
+            "state_diff":     state_diff,
         })
         if asset_id in self.config.asset_symbols:
                On load, missing local files are pulled from the repo first.
                Requires HF_TOKEN env-var with write permission.
+    Fallback : Local disk  ./Ranker3/
                Used when HF_TOKEN is absent or upload/download fails.
                All saves still succeed locally even without network access.
     Usage
     ─────
+    mgr = RankerCheckpointManager(checkpoint_dir="./Ranker3",
                                    save_interval_seconds=1800)
     mgr.load(bridge)              # once after initialize()
     mgr.maybe_save(bridge)        # call frequently; respects save_interval_seconds
     def __init__(
         self,
+        checkpoint_dir:         str   = "./Ranker3",
         save_interval_seconds:  float = 1800.0,   # 30 minutes
     ):
         import pathlib
             )
             logger.info(
                 f"[RankerCheckpoint] ☁️  Uploaded {local_path.name} → "
+                f"hf://{self.HF_REPO_ID}/Ranker3/{local_path.name}"
             )
             return True
         except Exception as exc:
             )
             logger.info(
                 f"[RankerCheckpoint] ⬇️  Downloaded {filename} from "
+                f"hf://{self.HF_REPO_ID}/Ranker3/"
             )
             return True
         except Exception as exc:
     hub_ws_url:      str  = "ws://localhost:7860/ws/subscribe",
     enable_logging:  bool = True,
     shreve_config:   Optional[ShreveConfig] = None,
+    checkpoint_dir:  str  = "./Ranker3",
     resume:          bool = False,   # start fresh by default
 ) -> None:
     config = AssetRankerConfig(
                         help="[S6/S8] Trade horizon τ in seconds (default 60)")
     parser.add_argument("--martingale-epsilon", type=float, default=0.05,
                         help="[S7] Gate E martingale deviation threshold (default 0.05)")
+    parser.add_argument("--checkpoint-dir", default="./Ranker3",
+                        help="Directory for full-state checkpoints (default ./Ranker3)")
     parser.add_argument("--resume", action="store_true",
                         help="Resume training from the latest saved checkpoint (default: start fresh)")
     return parser.parse_args(filtered)