Spaces:

bhanug2026
/

aviation-disruption-intelligence

Sleeping

aviation-disruption-intelligence / src /models /train_classifier.py

bhanug2026

Initial commit

47c6cfd 5 days ago

25.1 kB

	"""
	src/models/train_classifier.py
	================================
	Trains and evaluates disruption classification models:
	- Logistic Regression
	- Random Forest
	- Gradient Boosting (XGBoost/sklearn)
	- ANN (MLPClassifier with sigmoid-equivalent)

	Selects best model by AUC on held-out test set.
	Saves model artifacts, metrics, and sensitivity analysis.
	"""

	import warnings
	warnings.filterwarnings("ignore")

	import json
	import numpy as np
	import pandas as pd
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	from pathlib import Path
	from datetime import datetime
	from src.utils.logger import get_logger
	from src.utils.io_utils import save_json
	from src.models.model_selector import (
	train_test_split_time, sensitivity_analysis, save_model, save_metrics,
	walk_forward_cv
	)
	from config.settings import (
	PROCESSED_DIR, FIGURES_DIR, METRICS_DIR, RANDOM_SEED, TEST_SIZE,
	CLASSIFIER_FEATURES, CLASSIFIER_TARGET
	)

	logger = get_logger(__name__)

	# ── Import ML libraries (graceful degradation) ────────────────────────────────

	try:
	from sklearn.linear_model import LogisticRegression
	from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
	from sklearn.neural_network import MLPClassifier
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import (roc_auc_score, roc_curve, precision_score,
	recall_score, f1_score, confusion_matrix)
	from sklearn.pipeline import Pipeline
	SKLEARN_OK = True
	except ImportError:
	logger.warning("scikit-learn not installed — using numpy fallback classifiers")
	SKLEARN_OK = False


	# ── Numpy Fallback Implementations ───────────────────────────────────────────

	class _NumpyLogisticRegression:
	"""Pure numpy logistic regression (SGD, for offline environments)."""
	def __init__(self, lr=0.01, n_iter=500, seed=42):
	self.lr = lr; self.n_iter = n_iter; self.seed = seed
	self.w = None; self.b = 0.0

	def _sigmoid(self, z): return 1 / (1 + np.exp(-np.clip(z, -50, 50)))

	def fit(self, X, y):
	rng = np.random.default_rng(self.seed)
	self.w = rng.normal(0, 0.01, X.shape[1])
	for _ in range(self.n_iter):
	z = X @ self.w + self.b
	pred = self._sigmoid(z)
	err = pred - y
	self.w -= self.lr * X.T @ err / len(y)
	self.b -= self.lr * err.mean()
	return self

	def predict_proba(self, X):
	p = self._sigmoid(X @ self.w + self.b)
	return np.column_stack([1 - p, p])

	def predict(self, X):
	return (self._sigmoid(X @ self.w + self.b) >= 0.5).astype(int)

	def get_params(self, **kw): return {"lr": self.lr, "n_iter": self.n_iter}


	class _NumpyRandomForest:
	"""Simplified Random Forest using numpy (for offline environments)."""
	def __init__(self, n_estimators=50, max_depth=5, seed=42):
	self.n_estimators = n_estimators
	self.max_depth = max_depth
	self.seed = seed
	self.trees = []
	self.feature_importances_ = None

	def _build_tree(self, X, y, depth=0):
	if depth >= self.max_depth or len(set(y)) == 1 or len(y) < 5:
	return {"leaf": True, "value": np.mean(y)}
	rng = np.random.default_rng(self.seed + depth)
	n_feat = max(1, int(np.sqrt(X.shape[1])))
	feat_idx = rng.choice(X.shape[1], n_feat, replace=False)
	best_feat, best_thresh, best_score = 0, 0, float("inf")
	for fi in feat_idx:
	thresholds = np.percentile(X[:, fi], [25, 50, 75])
	for t in thresholds:
	left_y = y[X[:, fi] <= t]
	right_y = y[X[:, fi] > t]
	if len(left_y) == 0 or len(right_y) == 0:
	continue
	score = (len(left_y) * (1 - (left_y.mean()2 + (1-left_y.mean())2)) +
	len(right_y) * (1 - (right_y.mean()2 + (1-right_y.mean())2)))
	if score < best_score:
	best_score, best_feat, best_thresh = score, fi, t
	left_mask = X[:, best_feat] <= best_thresh
	return {
	"leaf": False, "feat": best_feat, "thresh": best_thresh,
	"left": self._build_tree(X[left_mask], y[left_mask], depth + 1),
	"right": self._build_tree(X[~left_mask], y[~left_mask], depth + 1),
	}

	def _predict_tree(self, node, x):
	if node["leaf"]:
	return node["value"]
	if x[node["feat"]] <= node["thresh"]:
	return self._predict_tree(node["left"], x)
	return self._predict_tree(node["right"], x)

	def fit(self, X, y):
	rng = np.random.default_rng(self.seed)
	self.trees = []
	feat_counts = np.zeros(X.shape[1])
	for i in range(self.n_estimators):
	idx = rng.choice(len(X), len(X), replace=True)
	tree = self._build_tree(X[idx], y[idx])
	self.trees.append(tree)
	self.feature_importances_ = np.ones(X.shape[1]) / X.shape[1]
	return self

	def predict_proba(self, X):
	preds = np.array([[self._predict_tree(t, x) for t in self.trees]
	for x in X]).mean(axis=1)
	preds = np.clip(preds, 0, 1)
	return np.column_stack([1 - preds, preds])

	def predict(self, X):
	return (self.predict_proba(X)[:, 1] >= 0.5).astype(int)

	def get_params(self, **kw): return {"n_estimators": self.n_estimators}


	class _NumpyANN:
	"""Simple 2-layer ANN for classification (sigmoid activation)."""
	def __init__(self, hidden=(16, 8), lr=0.001, n_iter=200, seed=42):
	self.hidden = hidden; self.lr = lr; self.n_iter = n_iter; self.seed = seed
	self.weights = []; self.biases = []

	def _sigmoid(self, z): return 1 / (1 + np.exp(-np.clip(z, -50, 50)))
	def _dsigmoid(self, z): s = self._sigmoid(z); return s * (1 - s)

	def fit(self, X, y):
	rng = np.random.default_rng(self.seed)
	layers = [X.shape[1]] + list(self.hidden) + [1]
	self.weights = [rng.normal(0, np.sqrt(2/layers[i]), (layers[i], layers[i+1]))
	for i in range(len(layers)-1)]
	self.biases = [np.zeros(layers[i+1]) for i in range(len(layers)-1)]

	y_ = y.reshape(-1, 1).astype(float)
	for epoch in range(self.n_iter):
	# Forward
	acts = [X]
	for W, b in zip(self.weights, self.biases):
	acts.append(self._sigmoid(acts[-1] @ W + b))
	# Backward
	delta = (acts[-1] - y_) * self._dsigmoid(acts[-1])
	for i in reversed(range(len(self.weights))):
	grad_w = acts[i].T @ delta / len(y_)
	grad_b = delta.mean(axis=0)
	if i > 0:
	delta = delta @ self.weights[i].T * self._dsigmoid(acts[i])
	self.weights[i] -= self.lr * grad_w
	self.biases[i] -= self.lr * grad_b
	return self

	def predict_proba(self, X):
	a = X
	for W, b in zip(self.weights, self.biases):
	a = self._sigmoid(a @ W + b)
	p = a.flatten()
	return np.column_stack([1 - p, p])

	def predict(self, X):
	return (self.predict_proba(X)[:, 1] >= 0.5).astype(int)

	def get_params(self, **kw): return {"hidden": self.hidden, "lr": self.lr}


	# ── Metrics Helpers ───────────────────────────────────────────────────────────

	def _compute_metrics(model, X_test, y_test, model_name: str) -> dict:
	"""Compute classification metrics for a trained model."""
	try:
	proba = model.predict_proba(X_test)[:, 1]
	y_pred = model.predict(X_test)

	if SKLEARN_OK:
	fpr, tpr, _ = roc_curve(y_test, proba)
	auc = roc_auc_score(y_test, proba)
	else:
	# AUC via Mann-Whitney U (numerically stable, no sklearn needed)
	pos_scores = proba[y_test == 1]
	neg_scores = proba[y_test == 0]
	n_pos, n_neg = len(pos_scores), len(neg_scores)
	if n_pos == 0 or n_neg == 0:
	auc = 0.5
	else:
	# U statistic: rank-based
	all_scores = np.concatenate([pos_scores, neg_scores])
	ranks = np.argsort(np.argsort(all_scores)) + 1
	pos_rank_sum = ranks[:n_pos].sum()
	auc = float((pos_rank_sum - n_pos(n_pos+1)/2) / (n_pos n_neg))
	auc = np.clip(auc, 0, 1)
	# Build ROC for plotting
	thresholds = np.percentile(proba, np.linspace(0, 100, 100))[::-1]
	fprs, tprs = [0.0], [0.0]
	n_p, n_n = (y_test==1).sum(), (y_test==0).sum()
	for t in thresholds:
	p = (proba >= t).astype(int)
	fprs.append(((p==1)&(y_test==0)).sum() / (n_n+1e-9))
	tprs.append(((p==1)&(y_test==1)).sum() / (n_p+1e-9))
	fprs.append(1.0); tprs.append(1.0)
	fpr = np.array(fprs); tpr = np.array(tprs)

	if SKLEARN_OK:
	prec = precision_score(y_test, y_pred, zero_division=0)
	rec = recall_score(y_test, y_pred, zero_division=0)
	f1 = f1_score(y_test, y_pred, zero_division=0)
	else:
	tp = ((y_pred == 1) & (y_test == 1)).sum()
	fp = ((y_pred == 1) & (y_test == 0)).sum()
	fn = ((y_pred == 0) & (y_test == 1)).sum()
	prec = tp / (tp + fp + 1e-9)
	rec = tp / (tp + fn + 1e-9)
	f1 = 2 * prec * rec / (prec + rec + 1e-9)

	return {
	"model": model_name, "auc": round(float(auc), 4),
	"precision": round(float(prec), 4),
	"recall": round(float(rec), 4), "f1": round(float(f1), 4),
	"fpr": fpr.tolist(), "tpr": tpr.tolist(),
	}
	except Exception as e:
	logger.error("Metrics error for %s: %s", model_name, e)
	return {"model": model_name, "auc": 0.0}


	# ── Main Training Function ────────────────────────────────────────────────────

	def train_classifier() -> dict:
	"""
	Train all candidate classifiers, select best by AUC, save artifacts.

	Returns
	-------
	dict with best_model_name, all metrics, and selected model path
	"""
	logger.info("=" * 60)
	logger.info("Training disruption classification models")
	logger.info("=" * 60)

	# 1. Load data
	df = pd.read_csv(PROCESSED_DIR / "features_classification.csv", low_memory=False)
	logger.info("Loaded %d rows (raw)", len(df))

	# Drop rows where the target is NaN (can appear from mis-joined pipeline output)
	df = df.dropna(subset=[CLASSIFIER_TARGET]).reset_index(drop=True)
	logger.info("Loaded %d rows after dropping NaN targets", len(df))

	# 2. Prepare features
	X = df[CLASSIFIER_FEATURES].fillna(0).values.astype(float)
	y = df[CLASSIFIER_TARGET].fillna(0).values.astype(int)

	n_train = int(len(X) * (1 - TEST_SIZE))
	X_train, X_test = X[:n_train], X[n_train:]
	y_train, y_test = y[:n_train], y[n_train:]

	# Normalise
	X_mean = X_train.mean(axis=0)
	X_std = X_train.std(axis=0) + 1e-8
	X_train_n = (X_train - X_mean) / X_std
	X_test_n = (X_test - X_mean) / X_std

	logger.info("Train: %d rows \| Test: %d rows \| Positive rate: %.2f%%",
	len(X_train), len(X_test), y_train.mean() * 100)

	# 3. Define candidate models
	if SKLEARN_OK:
	candidates = {
	"Logistic Regression": LogisticRegression(
	random_state=RANDOM_SEED, max_iter=500, C=0.5),
	"Random Forest": RandomForestClassifier(
	n_estimators=100, max_depth=8, random_state=RANDOM_SEED),
	"Gradient Boosting": GradientBoostingClassifier(
	n_estimators=100, max_depth=4, learning_rate=0.1,
	random_state=RANDOM_SEED),
	"ANN": MLPClassifier(
	hidden_layer_sizes=(16, 8), activation="logistic",
	max_iter=500, random_state=RANDOM_SEED, learning_rate_init=0.001),
	}
	else:
	candidates = {
	"Logistic Regression": _NumpyLogisticRegression(lr=0.05, n_iter=300, seed=RANDOM_SEED),
	"Random Forest": _NumpyRandomForest(n_estimators=30, max_depth=5, seed=RANDOM_SEED),
	"ANN": _NumpyANN(hidden=(16, 8), lr=0.01, n_iter=100, seed=RANDOM_SEED),
	}

	# 4. Train and evaluate
	results = []
	trained_models = {}

	for name, model in candidates.items():
	logger.info("Training: %s ...", name)
	try:
	model.fit(X_train_n, y_train)
	metrics = _compute_metrics(model, X_test_n, y_test, name)
	results.append(metrics)
	trained_models[name] = (model, metrics)
	logger.info(" AUC=%.4f \| Precision=%.4f \| Recall=%.4f \| F1=%.4f",
	metrics["auc"], metrics.get("precision", 0),
	metrics.get("recall", 0), metrics.get("f1", 0))
	except Exception as e:
	logger.error("Training failed for %s: %s", name, e)

	if not results:
	logger.error("All models failed — check data")
	return {}

	# 5. Select best model by balanced score: 60% AUC + 40% F1
	# AUC alone is misleading on imbalanced data (positive rate ~10%).
	# F1 captures whether the model actually flags any disruptions.
	def _balanced_score(r):
	return 0.6 * r.get("auc", 0) + 0.4 * r.get("f1", 0)

	best = max(results, key=_balanced_score)
	best_name = best["model"]
	best_model, _ = trained_models[best_name]
	logger.info("Best model: %s (AUC=%.4f \| F1=%.4f \| balanced=%.4f)",
	best_name, best["auc"], best.get("f1", 0), _balanced_score(best))

	# 5b. GridSearchCV — hyperparameter tuning on the best model type
	# Runs only with sklearn. Uses a targeted param grid to keep runtime ≤60s.
	grid_search_results = {}
	if SKLEARN_OK:
	_param_grids = {
	"Logistic Regression": {
	"C": [0.1, 0.5, 1.0, 5.0], "max_iter": [500],
	},
	"Random Forest": {
	"n_estimators": [50, 100, 200], "max_depth": [5, 8, 12],
	},
	"Gradient Boosting": {
	"n_estimators": [50, 100], "max_depth": [3, 4],
	"learning_rate": [0.05, 0.1],
	},
	"ANN": {
	"hidden_layer_sizes": [(16, 8), (32, 16)],
	"learning_rate_init": [0.001, 0.005],
	},
	}
	if best_name in _param_grids:
	try:
	from sklearn.model_selection import GridSearchCV as _GridSearchCV
	_base_map = {
	"Logistic Regression": LogisticRegression(
	random_state=RANDOM_SEED, max_iter=500),
	"Random Forest": RandomForestClassifier(
	random_state=RANDOM_SEED),
	"Gradient Boosting": GradientBoostingClassifier(
	random_state=RANDOM_SEED),
	"ANN": MLPClassifier(
	activation="logistic", max_iter=500, random_state=RANDOM_SEED),
	}
	_gs = _GridSearchCV(
	_base_map[best_name], _param_grids[best_name],
	cv=3, scoring="roc_auc", n_jobs=-1, refit=True,
	)
	_gs.fit(X_train_n, y_train)
	_tuned = _gs.best_estimator_
	_tuned_metrics = _compute_metrics(
	_tuned, X_test_n, y_test, f"{best_name} (Tuned)")
	grid_search_results = {
	"best_params": _gs.best_params_,
	"best_cv_auc": round(float(_gs.best_score_), 4),
	"tuned_test_auc": _tuned_metrics.get("auc", 0),
	"tuned_test_f1": _tuned_metrics.get("f1", 0),
	}
	if _balanced_score(_tuned_metrics) > _balanced_score(best):
	best_model = _tuned
	best = _tuned_metrics
	logger.info(
	"GridSearchCV improved model: AUC=%.4f F1=%.4f params=%s",
	_tuned_metrics["auc"], _tuned_metrics.get("f1", 0),
	_gs.best_params_,
	)
	else:
	logger.info(
	"GridSearchCV: existing model already optimal "
	"(cv_auc=%.4f)", _gs.best_score_)
	except Exception as _ge:
	logger.warning("GridSearchCV failed (non-fatal): %s", _ge)

	# 6. Sensitivity analysis for best model (bug-fixed: uses predict_proba)
	feat_df = pd.DataFrame(X_test_n, columns=CLASSIFIER_FEATURES)
	sensitivity = sensitivity_analysis(best_model, feat_df)
	sens_path = METRICS_DIR / "sensitivity_classifier.csv"
	sensitivity.to_csv(sens_path, index=False)

	# 6b. SHAP explainability
	_run_shap_classifier(best_model, best_name, X_train_n, X_test_n, CLASSIFIER_FEATURES)

	# 6c. Walk-forward (expanding-window) cross-validation
	# Avoids lookahead bias: each fold trains only on data that precedes the test window
	wf_cv_results = {}
	try:
	from sklearn.base import clone as _clone
	X_wfcv = pd.DataFrame(X, columns=CLASSIFIER_FEATURES)
	y_wfcv = pd.Series(y, name=CLASSIFIER_TARGET)
	def _best_factory():
	if SKLEARN_OK:
	try:
	return _clone(best_model)
	except Exception:
	pass
	return _NumpyLogisticRegression(lr=0.05, n_iter=300, seed=RANDOM_SEED)
	wf_cv_results = walk_forward_cv(
	_best_factory, X_wfcv, y_wfcv, n_splits=5, task="classification")
	logger.info(
	"Walk-forward CV — mean_auc=%.4f ± %.4f \| mean_f1=%.4f ± %.4f",
	wf_cv_results.get("mean_auc") or 0, wf_cv_results.get("std_auc") or 0,
	wf_cv_results.get("mean_f1") or 0, wf_cv_results.get("std_f1") or 0,
	)
	except Exception as _wfe:
	logger.warning("Walk-forward CV failed (non-fatal): %s", _wfe)

	# Compute optimal decision threshold (maximises TPR-FPR on test set)
	# This is important when class imbalance means default 0.5 is suboptimal.
	_optimal_threshold = 0.5 # fallback
	try:
	_probs_test = best_model.predict_proba(X_test_n)[:, 1]
	_prob_min = float(_probs_test.min())
	_prob_max = float(_probs_test.max())
	if SKLEARN_OK:
	from sklearn.metrics import roc_curve as _roc_curve
	_fpr_arr, _tpr_arr, _thresh_arr = _roc_curve(y_test, _probs_test)
	_optimal_threshold = float(_thresh_arr[np.argmax(_tpr_arr - _fpr_arr)])
	logger.info("Optimal decision threshold: %.4f (positive rate %.1f%%)",
	_optimal_threshold, y_train.mean() * 100)
	except Exception as _te:
	logger.warning("Could not compute optimal threshold: %s", _te)
	_prob_min, _prob_max = 0.0, 1.0

	# Feature importance (if available)
	fi_data = {}
	if hasattr(best_model, "feature_importances_"):
	fi_data = dict(zip(CLASSIFIER_FEATURES, best_model.feature_importances_.tolist()))
	elif hasattr(best_model, "coef_"):
	fi_data = dict(zip(CLASSIFIER_FEATURES, np.abs(best_model.coef_[0]).tolist()))

	# 7. Save model and metrics
	model_meta = {
	"best_model": best_name,
	"scaler_mean": X_mean.tolist(),
	"scaler_std": X_std.tolist(),
	"features": CLASSIFIER_FEATURES,
	"target": CLASSIFIER_TARGET,
	"all_models": results,
	"feature_importance": fi_data,
	"optimal_threshold": _optimal_threshold,
	"prob_range": [_prob_min, _prob_max],
	"trained_at": datetime.utcnow().isoformat(),
	"n_train": int(len(X_train)),
	"n_test": int(len(X_test)),
	"positive_rate": float(y_train.mean()),
	"grid_search": grid_search_results,
	"walk_forward_cv": wf_cv_results,
	}
	save_metrics(model_meta, "classifier")
	save_model(best_model, "best_classifier")
	save_model({"mean": X_mean, "std": X_std}, "classifier_scaler")

	# 8. Plots
	_plot_roc_curves(results)
	_plot_feature_importance(sensitivity, best_name)

	logger.info("✓ Classifier training complete. Best: %s AUC=%.4f",
	best_name, best["auc"])
	return model_meta


	def _plot_roc_curves(results: list):
	"""Plot ROC curves for all models."""
	fig, ax = plt.subplots(figsize=(8, 6))
	colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]
	for i, r in enumerate(results):
	if "fpr" in r and "tpr" in r:
	ax.plot(r["fpr"], r["tpr"], color=colors[i % len(colors)],
	label=f"{r['model']} (AUC={r['auc']:.3f})", linewidth=2)
	ax.plot([0, 1], [0, 1], "k--", alpha=0.5, label="Random")
	ax.set_xlabel("False Positive Rate")
	ax.set_ylabel("True Positive Rate")
	ax.set_title("ROC Curves — Disruption Classifier")
	ax.legend(loc="lower right")
	ax.grid(True, alpha=0.3)
	plt.tight_layout()
	fig.savefig(FIGURES_DIR / "roc_curves_classifier.png", dpi=150)
	plt.close(fig)
	logger.info("Saved ROC curves plot")


	def _plot_feature_importance(sensitivity_df: pd.DataFrame, model_name: str):
	"""Plot feature sensitivity / importance."""
	if sensitivity_df.empty:
	return
	fig, ax = plt.subplots(figsize=(8, 6))
	top = sensitivity_df.head(10)
	bars = ax.barh(top["feature"][::-1], top["sensitivity"][::-1], color="#1f77b4")
	ax.set_xlabel("Sensitivity (avg output change)")
	ax.set_title(f"Feature Sensitivity Analysis — {model_name}")
	ax.grid(True, alpha=0.3, axis="x")
	plt.tight_layout()
	fig.savefig(FIGURES_DIR / "feature_sensitivity_classifier.png", dpi=150)
	plt.close(fig)
	logger.info("Saved feature sensitivity plot")


	def _run_shap_classifier(model, model_name: str, X_train_n, X_test_n, feature_names):
	"""Compute SHAP values for the best classifier and save outputs."""
	try:
	import shap
	X_train_df = pd.DataFrame(X_train_n, columns=feature_names)
	X_test_df = pd.DataFrame(X_test_n, columns=feature_names)

	# Choose explainer based on model type
	if hasattr(model, "feature_importances_"):
	# Tree-based (RF, GB): fast TreeExplainer
	explainer = shap.TreeExplainer(model)
	shap_values = explainer.shap_values(X_test_df)
	# For binary classifiers TreeExplainer returns list[class0, class1]
	if isinstance(shap_values, list) and len(shap_values) == 2:
	shap_vals = shap_values[1] # class-1 (disruption)
	else:
	shap_vals = shap_values
	else:
	# Linear / ANN: use LinearExplainer or KernelExplainer (sample 50 rows)
	background = shap.sample(X_train_df, min(50, len(X_train_df)), random_state=42)
	if hasattr(model, "coef_"):
	explainer = shap.LinearExplainer(model, background)
	shap_vals = explainer.shap_values(X_test_df)
	else:
	def _predict_proba_pos(X):
	return model.predict_proba(X)[:, 1]
	explainer = shap.KernelExplainer(_predict_proba_pos, background)
	shap_vals = explainer.shap_values(X_test_df, nsamples=100)

	# Mean absolute SHAP per feature
	mean_shap = np.abs(shap_vals).mean(axis=0)
	shap_df = pd.DataFrame({
	"feature": feature_names,
	"mean_abs_shap": mean_shap.tolist()
	}).sort_values("mean_abs_shap", ascending=False)
	shap_df.to_csv(METRICS_DIR / "shap_classifier.csv", index=False)

	# SHAP summary bar chart
	fig, ax = plt.subplots(figsize=(8, 5))
	colors = ["#1A2B5E" if v > 0.001 else "#CBD5E1" for v in shap_df["mean_abs_shap"]]
	ax.barh(shap_df["feature"][::-1], shap_df["mean_abs_shap"][::-1], color=colors[::-1])
	ax.set_xlabel("Mean \|SHAP value\| (impact on disruption probability)")
	ax.set_title(f"SHAP Feature Importance — {model_name} (Classifier)")
	ax.grid(True, alpha=0.3, axis="x")
	plt.tight_layout()
	fig.savefig(FIGURES_DIR / "shap_classifier.png", dpi=150)
	plt.close(fig)
	logger.info("✓ SHAP classifier: saved shap_classifier.csv + shap_classifier.png")

	except Exception as e:
	logger.warning("SHAP classifier failed (non-fatal): %s", e)


	if __name__ == "__main__":
	metrics = train_classifier()
	print(f"\nBest model: {metrics.get('best_model')}")
	print(f"AUC: {next((m['auc'] for m in metrics.get('all_models', []) if m['model'] == metrics.get('best_model')), 'N/A')}")