# Generated by Claude Code -- 2026-02-08 """Model 2: Classical ML -- XGBoost on engineered CDM features. Dual-head model: - Risk classifier (binary: high-risk vs safe) - Miss distance regressor (log-scale km) """ import pickle import numpy as np from pathlib import Path from xgboost import XGBClassifier, XGBRegressor from sklearn.preprocessing import StandardScaler class XGBoostConjunctionModel: """XGBoost with engineered CDM features.""" def __init__(self): self.scaler = StandardScaler() self.risk_classifier = XGBClassifier( n_estimators=500, max_depth=8, learning_rate=0.05, scale_pos_weight=50, # severe class imbalance eval_metric="aucpr", tree_method="hist", random_state=42, ) self.miss_regressor = XGBRegressor( n_estimators=500, max_depth=8, learning_rate=0.05, objective="reg:squaredlogerror", tree_method="hist", random_state=42, ) def fit( self, X_train: np.ndarray, y_risk: np.ndarray, y_miss_log: np.ndarray, X_val: np.ndarray = None, y_risk_val: np.ndarray = None, y_miss_val: np.ndarray = None, ): """Train both heads.""" # Scale features X_scaled = self.scaler.fit_transform(X_train) # Risk classifier print(f"Training risk classifier (pos_rate={y_risk.mean():.4f}) ...") eval_set = None if X_val is not None: eval_set = [(self.scaler.transform(X_val), y_risk_val)] self.risk_classifier.fit( X_scaled, y_risk, eval_set=eval_set, verbose=50, ) # Miss distance regressor (log-scale, must be > 0 for squaredlogerror) y_miss_positive = np.clip(y_miss_log, 1e-6, None) print("Training miss distance regressor ...") eval_set_miss = None if X_val is not None: y_miss_val_pos = np.clip(y_miss_val, 1e-6, None) eval_set_miss = [(self.scaler.transform(X_val), y_miss_val_pos)] self.miss_regressor.fit( X_scaled, y_miss_positive, eval_set=eval_set_miss, verbose=50, ) def predict(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Predict risk probability and miss distance. Returns: (risk_probs, miss_distance_km) """ X_scaled = self.scaler.transform(X) risk_probs = self.risk_classifier.predict_proba(X_scaled)[:, 1] miss_log = self.miss_regressor.predict(X_scaled) miss_km = np.expm1(miss_log) return risk_probs, miss_km def predict_risk(self, X: np.ndarray) -> np.ndarray: """Predict risk probability only.""" X_scaled = self.scaler.transform(X) return self.risk_classifier.predict_proba(X_scaled)[:, 1] def save(self, path: Path): """Save all components.""" path.parent.mkdir(parents=True, exist_ok=True) with open(path, "wb") as f: pickle.dump({ "scaler": self.scaler, "risk_classifier": self.risk_classifier, "miss_regressor": self.miss_regressor, }, f) print(f"XGBoost model saved to {path}") @classmethod def load(cls, path: Path) -> "XGBoostConjunctionModel": """Load all components.""" with open(path, "rb") as f: data = pickle.load(f) model = cls() model.scaler = data["scaler"] model.risk_classifier = data["risk_classifier"] model.miss_regressor = data["miss_regressor"] return model