Spaces:

GoshawkVortexAI
/

Goshawk_Hedge_Pro

Sleeping

App Files Files Community

GoshawkVortexAI commited on Feb 27

Commit

39bdaba

verified ·

1 Parent(s): f952974

Create ml_filter.py

Browse files

Files changed (1) hide show

ml_filter.py +234 -0

ml_filter.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""
+ml_filter.py — Production inference wrapper for the trained probability filter.
+Integration point in the pipeline:
+    Rule Engine Output
+         │
+         ▼
+    build_feature_dict()  ← feature_builder.py
+         │
+         ▼
+    TradeFilter.predict()  ← THIS MODULE
+         │
+         ├─► prob < threshold  →  SKIP (no trade)
+         │
+         └─► prob >= threshold →  risk_engine.evaluate_risk()
+                                       │
+                                       ▼
+                                  Position sizing → Execution
+Usage:
+    filter = TradeFilter.load()
+    result = filter.predict(regime_data, volume_data, scores)
+    if result.approved:
+        risk = evaluate_risk(..., regime_confidence=result.probability)
+"""
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Any, Optional
+import numpy as np
+from ml_config import MODEL_PATH, THRESHOLD_PATH, DEFAULT_PROB_THRESHOLD, FEATURE_COLUMNS
+from feature_builder import build_feature_dict, feature_dict_to_matrix, validate_features
+logger = logging.getLogger(__name__)
+@dataclass
+class FilterResult:
+    """Structured output from the probability filter."""
+    probability: float        # P(win) from the model [0, 1]
+    threshold: float          # current threshold
+    approved: bool            # probability >= threshold
+    feature_dict: Dict        # raw features (for logging/debugging)
+    reject_reason: str = ""   # why rejected, if applicable
+    def __str__(self) -> str:
+        status = "APPROVED" if self.approved else f"REJECTED ({self.reject_reason})"
+        return f"FilterResult: p={self.probability:.4f} thresh={self.threshold:.4f} → {status}"
+class TradeFilter:
+    """
+    Singleton-friendly inference wrapper.
+    Thread-safe for read operations (predict). Not safe for concurrent reloads.
+    """
+    def __init__(self, backend, threshold: float):
+        self._backend   = backend
+        self._threshold = threshold
+        self._n_calls   = 0
+        self._n_approved = 0
+    @classmethod
+    def load(cls, model_path: Path = MODEL_PATH, threshold_path: Path = THRESHOLD_PATH) -> "TradeFilter":
+        """
+        Load model and threshold from disk.
+        Falls back to DEFAULT_PROB_THRESHOLD if threshold file missing.
+        Returns None if model file doesn't exist (not yet trained).
+        """
+        import joblib
+        if not model_path.exists():
+            logger.warning(
+                f"Model file not found at {model_path}. "
+                f"Run train.py first. TradeFilter will return None from predict()."
+            )
+            return None
+        backend = joblib.load(model_path)
+        logger.info(f"Loaded model from {model_path}")
+        threshold = DEFAULT_PROB_THRESHOLD
+        if threshold_path.exists():
+            with open(threshold_path) as f:
+                data = json.load(f)
+            threshold = float(data.get("threshold", DEFAULT_PROB_THRESHOLD))
+            logger.info(f"Loaded threshold={threshold:.4f} from {threshold_path}")
+        else:
+            logger.warning(f"Threshold file not found. Using default={threshold:.4f}")
+        return cls(backend=backend, threshold=threshold)
+    @classmethod
+    def load_or_none(cls) -> Optional["TradeFilter"]:
+        """Convenience: returns None if model not yet trained (no crash)."""
+        try:
+            return cls.load()
+        except Exception as e:
+            logger.warning(f"Could not load TradeFilter: {e}")
+            return None
+    def predict(
+        self,
+        regime_data: Dict[str, Any],
+        volume_data: Dict[str, Any],
+        scores: Dict[str, Any],
+    ) -> FilterResult:
+        """
+        Run the full inference pipeline for a single setup.
+        Args:
+            regime_data: Output of detect_regime()
+            volume_data: Output of analyze_volume()
+            scores: Output of score_token()
+        Returns:
+            FilterResult with probability and approval decision
+        """
+        self._n_calls += 1
+        # Build and validate feature vector
+        try:
+            feat = build_feature_dict(regime_data, volume_data, scores)
+        except KeyError as e:
+            logger.error(f"Feature construction failed: {e}")
+            return FilterResult(
+                probability=0.0,
+                threshold=self._threshold,
+                approved=False,
+                feature_dict={},
+                reject_reason=f"FEATURE_ERROR: {e}",
+            )
+        if not validate_features(feat):
+            return FilterResult(
+                probability=0.0,
+                threshold=self._threshold,
+                approved=False,
+                feature_dict=feat,
+                reject_reason="INVALID_FEATURES (NaN or inf detected)",
+            )
+        X = feature_dict_to_matrix(feat)
+        try:
+            prob = float(self._backend.predict_win_prob(X)[0])
+        except Exception as e:
+            logger.error(f"Model inference error: {e}")
+            return FilterResult(
+                probability=0.0,
+                threshold=self._threshold,
+                approved=False,
+                feature_dict=feat,
+                reject_reason=f"INFERENCE_ERROR: {e}",
+            )
+        approved = prob >= self._threshold
+        if approved:
+            self._n_approved += 1
+        reject_reason = "" if approved else f"prob={prob:.4f} < threshold={self._threshold:.4f}"
+        return FilterResult(
+            probability=prob,
+            threshold=self._threshold,
+            approved=approved,
+            feature_dict=feat,
+            reject_reason=reject_reason,
+        )
+    def predict_batch(
+        self,
+        feature_dicts: list,
+    ) -> np.ndarray:
+        """
+        Batch inference for 100+ symbols simultaneously.
+        Returns array of probabilities in the same order as feature_dicts.
+        Much faster than calling predict() in a loop.
+        """
+        valid_rows = []
+        valid_indices = []
+        for i, feat in enumerate(feature_dicts):
+            if validate_features(feat):
+                valid_rows.append([feat[k] for k in FEATURE_COLUMNS])
+                valid_indices.append(i)
+        probs = np.zeros(len(feature_dicts), dtype=np.float64)
+        if valid_rows:
+            X = np.array(valid_rows, dtype=np.float64)
+            batch_probs = self._backend.predict_win_prob(X)
+            for j, orig_idx in enumerate(valid_indices):
+                probs[orig_idx] = batch_probs[j]
+        return probs
+    def predict_trade_probability(self, feature_dict: Dict[str, float]) -> float:
+        """
+        Simple scalar interface: feature_dict → float.
+        Matches the interface requested in the spec.
+        Returns 0.0 on any error.
+        """
+        if not validate_features(feature_dict):
+            return 0.0
+        X = feature_dict_to_matrix(feature_dict)
+        try:
+            return float(self._backend.predict_win_prob(X)[0])
+        except Exception:
+            return 0.0
+    @property
+    def threshold(self) -> float:
+        return self._threshold
+    @threshold.setter
+    def threshold(self, value: float):
+        if not 0.0 < value < 1.0:
+            raise ValueError(f"Threshold must be in (0, 1), got {value}")
+        self._threshold = value
+    def stats(self) -> dict:
+        approval_rate = self._n_approved / self._n_calls if self._n_calls > 0 else 0.0
+        return {
+            "n_calls": self._n_calls,
+            "n_approved": self._n_approved,
+            "approval_rate": round(approval_rate, 4),
+            "threshold": self._threshold,
+        }