File size: 3,844 Bytes
a4b5ecb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Generated by Claude Code -- 2026-02-08
"""Model 2: Classical ML -- XGBoost on engineered CDM features.



Dual-head model:

  - Risk classifier (binary: high-risk vs safe)

  - Miss distance regressor (log-scale km)

"""

import pickle
import numpy as np
from pathlib import Path
from xgboost import XGBClassifier, XGBRegressor
from sklearn.preprocessing import StandardScaler


class XGBoostConjunctionModel:
    """XGBoost with engineered CDM features."""

    def __init__(self):
        self.scaler = StandardScaler()

        self.risk_classifier = XGBClassifier(
            n_estimators=500,
            max_depth=8,
            learning_rate=0.05,
            scale_pos_weight=50,  # severe class imbalance
            eval_metric="aucpr",
            tree_method="hist",
            random_state=42,
        )

        self.miss_regressor = XGBRegressor(
            n_estimators=500,
            max_depth=8,
            learning_rate=0.05,
            objective="reg:squaredlogerror",
            tree_method="hist",
            random_state=42,
        )

    def fit(

        self,

        X_train: np.ndarray,

        y_risk: np.ndarray,

        y_miss_log: np.ndarray,

        X_val: np.ndarray = None,

        y_risk_val: np.ndarray = None,

        y_miss_val: np.ndarray = None,

    ):
        """Train both heads."""
        # Scale features
        X_scaled = self.scaler.fit_transform(X_train)

        # Risk classifier
        print(f"Training risk classifier (pos_rate={y_risk.mean():.4f}) ...")
        eval_set = None
        if X_val is not None:
            eval_set = [(self.scaler.transform(X_val), y_risk_val)]
        self.risk_classifier.fit(
            X_scaled, y_risk,
            eval_set=eval_set,
            verbose=50,
        )

        # Miss distance regressor (log-scale, must be > 0 for squaredlogerror)
        y_miss_positive = np.clip(y_miss_log, 1e-6, None)
        print("Training miss distance regressor ...")
        eval_set_miss = None
        if X_val is not None:
            y_miss_val_pos = np.clip(y_miss_val, 1e-6, None)
            eval_set_miss = [(self.scaler.transform(X_val), y_miss_val_pos)]
        self.miss_regressor.fit(
            X_scaled, y_miss_positive,
            eval_set=eval_set_miss,
            verbose=50,
        )

    def predict(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        """

        Predict risk probability and miss distance.



        Returns: (risk_probs, miss_distance_km)

        """
        X_scaled = self.scaler.transform(X)
        risk_probs = self.risk_classifier.predict_proba(X_scaled)[:, 1]
        miss_log = self.miss_regressor.predict(X_scaled)
        miss_km = np.expm1(miss_log)
        return risk_probs, miss_km

    def predict_risk(self, X: np.ndarray) -> np.ndarray:
        """Predict risk probability only."""
        X_scaled = self.scaler.transform(X)
        return self.risk_classifier.predict_proba(X_scaled)[:, 1]

    def save(self, path: Path):
        """Save all components."""
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, "wb") as f:
            pickle.dump({
                "scaler": self.scaler,
                "risk_classifier": self.risk_classifier,
                "miss_regressor": self.miss_regressor,
            }, f)
        print(f"XGBoost model saved to {path}")

    @classmethod
    def load(cls, path: Path) -> "XGBoostConjunctionModel":
        """Load all components."""
        with open(path, "rb") as f:
            data = pickle.load(f)
        model = cls()
        model.scaler = data["scaler"]
        model.risk_classifier = data["risk_classifier"]
        model.miss_regressor = data["miss_regressor"]
        return model