Spaces:
Sleeping
Sleeping
| # Generated by Claude Code -- 2026-02-08 | |
| """Model 2: Classical ML -- XGBoost on engineered CDM features. | |
| Dual-head model: | |
| - Risk classifier (binary: high-risk vs safe) | |
| - Miss distance regressor (log-scale km) | |
| """ | |
| import pickle | |
| import numpy as np | |
| from pathlib import Path | |
| from xgboost import XGBClassifier, XGBRegressor | |
| from sklearn.preprocessing import StandardScaler | |
| class XGBoostConjunctionModel: | |
| """XGBoost with engineered CDM features.""" | |
| def __init__(self): | |
| self.scaler = StandardScaler() | |
| self.risk_classifier = XGBClassifier( | |
| n_estimators=500, | |
| max_depth=8, | |
| learning_rate=0.05, | |
| scale_pos_weight=50, # severe class imbalance | |
| eval_metric="aucpr", | |
| tree_method="hist", | |
| random_state=42, | |
| ) | |
| self.miss_regressor = XGBRegressor( | |
| n_estimators=500, | |
| max_depth=8, | |
| learning_rate=0.05, | |
| objective="reg:squaredlogerror", | |
| tree_method="hist", | |
| random_state=42, | |
| ) | |
| def fit( | |
| self, | |
| X_train: np.ndarray, | |
| y_risk: np.ndarray, | |
| y_miss_log: np.ndarray, | |
| X_val: np.ndarray = None, | |
| y_risk_val: np.ndarray = None, | |
| y_miss_val: np.ndarray = None, | |
| ): | |
| """Train both heads.""" | |
| # Scale features | |
| X_scaled = self.scaler.fit_transform(X_train) | |
| # Risk classifier | |
| print(f"Training risk classifier (pos_rate={y_risk.mean():.4f}) ...") | |
| eval_set = None | |
| if X_val is not None: | |
| eval_set = [(self.scaler.transform(X_val), y_risk_val)] | |
| self.risk_classifier.fit( | |
| X_scaled, y_risk, | |
| eval_set=eval_set, | |
| verbose=50, | |
| ) | |
| # Miss distance regressor (log-scale, must be > 0 for squaredlogerror) | |
| y_miss_positive = np.clip(y_miss_log, 1e-6, None) | |
| print("Training miss distance regressor ...") | |
| eval_set_miss = None | |
| if X_val is not None: | |
| y_miss_val_pos = np.clip(y_miss_val, 1e-6, None) | |
| eval_set_miss = [(self.scaler.transform(X_val), y_miss_val_pos)] | |
| self.miss_regressor.fit( | |
| X_scaled, y_miss_positive, | |
| eval_set=eval_set_miss, | |
| verbose=50, | |
| ) | |
| def predict(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: | |
| """ | |
| Predict risk probability and miss distance. | |
| Returns: (risk_probs, miss_distance_km) | |
| """ | |
| X_scaled = self.scaler.transform(X) | |
| risk_probs = self.risk_classifier.predict_proba(X_scaled)[:, 1] | |
| miss_log = self.miss_regressor.predict(X_scaled) | |
| miss_km = np.expm1(miss_log) | |
| return risk_probs, miss_km | |
| def predict_risk(self, X: np.ndarray) -> np.ndarray: | |
| """Predict risk probability only.""" | |
| X_scaled = self.scaler.transform(X) | |
| return self.risk_classifier.predict_proba(X_scaled)[:, 1] | |
| def save(self, path: Path): | |
| """Save all components.""" | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(path, "wb") as f: | |
| pickle.dump({ | |
| "scaler": self.scaler, | |
| "risk_classifier": self.risk_classifier, | |
| "miss_regressor": self.miss_regressor, | |
| }, f) | |
| print(f"XGBoost model saved to {path}") | |
| def load(cls, path: Path) -> "XGBoostConjunctionModel": | |
| """Load all components.""" | |
| with open(path, "rb") as f: | |
| data = pickle.load(f) | |
| model = cls() | |
| model.scaler = data["scaler"] | |
| model.risk_classifier = data["risk_classifier"] | |
| model.miss_regressor = data["miss_regressor"] | |
| return model | |