Spaces:
Sleeping
Sleeping
File size: 3,844 Bytes
a4b5ecb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | # Generated by Claude Code -- 2026-02-08
"""Model 2: Classical ML -- XGBoost on engineered CDM features.
Dual-head model:
- Risk classifier (binary: high-risk vs safe)
- Miss distance regressor (log-scale km)
"""
import pickle
import numpy as np
from pathlib import Path
from xgboost import XGBClassifier, XGBRegressor
from sklearn.preprocessing import StandardScaler
class XGBoostConjunctionModel:
"""XGBoost with engineered CDM features."""
def __init__(self):
self.scaler = StandardScaler()
self.risk_classifier = XGBClassifier(
n_estimators=500,
max_depth=8,
learning_rate=0.05,
scale_pos_weight=50, # severe class imbalance
eval_metric="aucpr",
tree_method="hist",
random_state=42,
)
self.miss_regressor = XGBRegressor(
n_estimators=500,
max_depth=8,
learning_rate=0.05,
objective="reg:squaredlogerror",
tree_method="hist",
random_state=42,
)
def fit(
self,
X_train: np.ndarray,
y_risk: np.ndarray,
y_miss_log: np.ndarray,
X_val: np.ndarray = None,
y_risk_val: np.ndarray = None,
y_miss_val: np.ndarray = None,
):
"""Train both heads."""
# Scale features
X_scaled = self.scaler.fit_transform(X_train)
# Risk classifier
print(f"Training risk classifier (pos_rate={y_risk.mean():.4f}) ...")
eval_set = None
if X_val is not None:
eval_set = [(self.scaler.transform(X_val), y_risk_val)]
self.risk_classifier.fit(
X_scaled, y_risk,
eval_set=eval_set,
verbose=50,
)
# Miss distance regressor (log-scale, must be > 0 for squaredlogerror)
y_miss_positive = np.clip(y_miss_log, 1e-6, None)
print("Training miss distance regressor ...")
eval_set_miss = None
if X_val is not None:
y_miss_val_pos = np.clip(y_miss_val, 1e-6, None)
eval_set_miss = [(self.scaler.transform(X_val), y_miss_val_pos)]
self.miss_regressor.fit(
X_scaled, y_miss_positive,
eval_set=eval_set_miss,
verbose=50,
)
def predict(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
Predict risk probability and miss distance.
Returns: (risk_probs, miss_distance_km)
"""
X_scaled = self.scaler.transform(X)
risk_probs = self.risk_classifier.predict_proba(X_scaled)[:, 1]
miss_log = self.miss_regressor.predict(X_scaled)
miss_km = np.expm1(miss_log)
return risk_probs, miss_km
def predict_risk(self, X: np.ndarray) -> np.ndarray:
"""Predict risk probability only."""
X_scaled = self.scaler.transform(X)
return self.risk_classifier.predict_proba(X_scaled)[:, 1]
def save(self, path: Path):
"""Save all components."""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "wb") as f:
pickle.dump({
"scaler": self.scaler,
"risk_classifier": self.risk_classifier,
"miss_regressor": self.miss_regressor,
}, f)
print(f"XGBoost model saved to {path}")
@classmethod
def load(cls, path: Path) -> "XGBoostConjunctionModel":
"""Load all components."""
with open(path, "rb") as f:
data = pickle.load(f)
model = cls()
model.scaler = data["scaler"]
model.risk_classifier = data["risk_classifier"]
model.miss_regressor = data["miss_regressor"]
return model
|