Spaces:

emp-admin
/

headache-predictor-xgboost

Running

App Files Files Community

headache-predictor-xgboost / run_training.py

emp-admin

Upload 9 files

56f192b verified about 1 month ago

raw

history blame contribute delete

26.1 kB

	"""
	═══════════════════════════════════════════════════════════════════════════
	PHOEBE HEADACHE PREDICTOR v3.0 — Production Training Pipeline
	EmpedocLabs © 2025
	═══════════════════════════════════════════════════════════════════════════

	Clinical-grade synthetic data with user archetypes:
	- Chronic migraineur (high baseline, medication dependent)
	- Episodic tension-type (stress/sleep driven)
	- Menstrual migraine (hormonal cycle dominant)
	- Weather-sensitive (barometric pressure dominant)
	- Mixed/general (moderate baseline)

	Leak-free: predicts day T headache using day T weather + day T-1 health/diary.

	38 features matching the iOS DailySnapshotDTO:
	WeatherKit forecast (6) — pressure, Δp, \|Δp\|, humidity, temp, drop flag
	HealthKit yesterday (7) — sleep h/deep/rem, rhr, hrv, workout, menstrual
	Diary yesterday (6) — headache, severity, duration, mood, #symptoms, #triggers
	Diary 2-days-ago (3) — headache, severity, duration
	Temporal (7) — dow sin/cos, month sin/cos, doy sin/cos, weekend
	User context (3) — age, is_europe, is_tropical
	Interactions (6) — sleep×pressure, low_hrv, sleep_deficit,
	high_humidity, streak_2d, consecutive_days
	"""

	import os, sys, math, random, pickle, json, warnings
	import numpy as np
	import pandas as pd
	from datetime import datetime, timedelta
	from sklearn.model_selection import GroupShuffleSplit
	from sklearn.ensemble import HistGradientBoostingClassifier
	from sklearn.calibration import CalibratedClassifierCV
	from sklearn.metrics import (
	classification_report, roc_auc_score, f1_score,
	precision_recall_curve, average_precision_score, confusion_matrix,
	)

	warnings.filterwarnings("ignore")

	# ═══════════════════════════════════════════════════════════════════════
	# FEATURE SCHEMA
	# ═══════════════════════════════════════════════════════════════════════

	FEATURE_NAMES = [
	"pressure_mb", "pressure_change_24h", "pressure_volatility",
	"humidity_pct", "temperature_c", "is_pressure_drop",
	"sleep_total_hours", "deep_sleep_min", "rem_sleep_min",
	"resting_hr", "hrv_avg_ms", "workout_min", "menstrual_flow_flag",
	"had_headache_1d", "severity_1d", "duration_1d",
	"mood_1d", "symptom_count_1d", "trigger_count_1d",
	"had_headache_2d", "severity_2d", "duration_2d",
	"dow_sin", "dow_cos", "month_sin", "month_cos",
	"doy_sin", "doy_cos", "is_weekend",
	"age_midpoint", "is_europe", "is_tropical",
	"sleep_x_pressure", "low_hrv_flag", "sleep_deficit",
	"high_humidity_flag", "headache_streak_2d", "consecutive_headache_days",
	]
	NUM_FEATURES = len(FEATURE_NAMES) # 38

	# ═══════════════════════════════════════════════════════════════════════
	# USER ARCHETYPES (based on migraine clinical literature)
	# ═══════════════════════════════════════════════════════════════════════

	ARCHETYPES = {
	"chronic_migraine": {
	"weight": 0.20,
	"base_rate": 0.40, # ~12 headache days/month
	"sensitivity": 0.3,
	"pressure_coeff": 0.5,
	"sleep_coeff": 0.6,
	"hrv_coeff": 0.3,
	"menstrual_coeff": 0.4,
	"humidity_coeff": 0.2,
	"rebound_coeff": 0.7, # strong rebound/cluster effect
	"weekend_coeff": 0.1,
	"temp_coeff": 0.15,
	},
	"episodic_tension": {
	"weight": 0.25,
	"base_rate": 0.12,
	"sensitivity": 0.0,
	"pressure_coeff": 0.2,
	"sleep_coeff": 0.9, # very sleep-dependent
	"hrv_coeff": 0.7, # very stress-dependent
	"menstrual_coeff": 0.1,
	"humidity_coeff": 0.1,
	"rebound_coeff": 0.2,
	"weekend_coeff": 0.25, # "weekend headache" pattern
	"temp_coeff": 0.1,
	},
	"menstrual_migraine": {
	"weight": 0.20,
	"base_rate": 0.15,
	"sensitivity": 0.1,
	"pressure_coeff": 0.3,
	"sleep_coeff": 0.4,
	"hrv_coeff": 0.3,
	"menstrual_coeff": 1.2, # dominant factor
	"humidity_coeff": 0.15,
	"rebound_coeff": 0.4,
	"weekend_coeff": 0.05,
	"temp_coeff": 0.1,
	},
	"weather_sensitive": {
	"weight": 0.15,
	"base_rate": 0.15,
	"sensitivity": 0.1,
	"pressure_coeff": 1.0, # dominant factor
	"sleep_coeff": 0.3,
	"hrv_coeff": 0.2,
	"menstrual_coeff": 0.2,
	"humidity_coeff": 0.6, # also weather
	"rebound_coeff": 0.3,
	"weekend_coeff": 0.05,
	"temp_coeff": 0.4, # temperature sensitive too
	},
	"mixed_general": {
	"weight": 0.20,
	"base_rate": 0.18,
	"sensitivity": 0.0,
	"pressure_coeff": 0.4,
	"sleep_coeff": 0.5,
	"hrv_coeff": 0.4,
	"menstrual_coeff": 0.3,
	"humidity_coeff": 0.2,
	"rebound_coeff": 0.35,
	"weekend_coeff": 0.12,
	"temp_coeff": 0.15,
	},
	}


	def _logistic(x):
	return 1.0 / (1.0 + math.exp(-max(-20, min(20, x))))


	def _cyclic(val, period):
	a = 2 * math.pi * val / period
	return math.sin(a), math.cos(a)


	# ═══════════════════════════════════════════════════════════════════════
	# USER CLASS
	# ═══════════════════════════════════════════════════════════════════════

	class User:
	def __init__(self, uid):
	self.uid = uid

	# Pick archetype by weights
	names = list(ARCHETYPES.keys())
	weights = [ARCHETYPES[n]["weight"] for n in names]
	self.archetype_name = random.choices(names, weights=weights)[0]
	self.arch = ARCHETYPES[self.archetype_name]

	age_lo = random.choice([18, 20, 25, 30, 35, 40, 45, 50, 55, 60])
	self.age_mid = age_lo + 4.5
	self.region = random.choices(
	["europe", "americas", "asia", "tropical"],
	weights=[40, 30, 20, 10],
	)[0]

	# Menstrual migraine archetype → always female
	if self.archetype_name == "menstrual_migraine":
	self.is_female = True
	else:
	self.is_female = random.random() < 0.65

	# Personal baselines with variance
	self.base_hr = random.gauss(65, 8)
	self.base_hrv = random.gauss(45, 15)
	self.base_sleep = random.gauss(7.0, 0.8)
	self.personal_noise = random.gauss(0, 0.2)

	# Cycle params
	self.cycle_len = random.randint(26, 32) if self.is_female else 0
	self.cycle_off = random.randint(0, 30)


	# ═══════════════════════════════════════════════════════════════════════
	# DATA GENERATION — per user
	# ═══════════════════════════════════════════════════════════════════════

	def generate_user(user: User, n_days: int, start: datetime):
	"""Generate n_days of raw data, return leak-free (features, labels)."""
	a = user.arch

	# Weather random walk
	pressure = random.gauss(1013.25, 8)
	temp = random.gauss(15, 10)
	humidity = random.gauss(60, 15)

	raw = []

	for d in range(n_days):
	dt = start + timedelta(days=d)
	month = dt.month

	# ── Weather with realistic autocorrelation ───────────────────
	seasonal_t = 15 + 14 * math.sin(2 * math.pi * (month - 4) / 12)
	seasonal_h = 55 + 20 * math.sin(2 * math.pi * (month - 7) / 12)

	# Pressure: occasional fronts (sudden drops)
	if random.random() < 0.08: # ~3x/month cold front
	p_change = random.gauss(-8, 3)
	elif random.random() < 0.05: # occasional rapid rise
	p_change = random.gauss(6, 2)
	else:
	p_change = random.gauss(0, 2.5) + 0.12 * (1013.25 - pressure)
	pressure += p_change
	pressure = max(970, min(1050, pressure))

	temp += random.gauss(0, 1.8) + 0.15 * (seasonal_t - temp)
	humidity += random.gauss(0, 4) + 0.08 * (seasonal_h - humidity)
	humidity = max(15, min(98, humidity))

	# ── HealthKit ────────────────────────────────────────────────
	# Sleep varies: weekends slightly longer, bad days shorter
	base_s = user.base_sleep + (0.5 if dt.weekday() >= 5 else 0)
	sleep_h = max(2.5, random.gauss(base_s, 1.0))

	# If had headache yesterday → worse sleep tonight
	if d > 0 and raw[d-1]["headache"]:
	sleep_h = max(2.5, sleep_h - random.gauss(0.8, 0.5))

	deep = max(0, random.gauss(75 + sleep_h * 3, 18))
	rem = max(0, random.gauss(80 + sleep_h * 5, 22))

	rhr = max(45, random.gauss(user.base_hr, 4))
	# HRV: stress lowers it, good sleep raises it
	hrv_base = user.base_hrv + (sleep_h - 7) * 3
	hrv = max(8, random.gauss(hrv_base, 8))

	workout = max(0, int(random.gauss(25, 18)))
	# Less workout on headache days
	if d > 0 and raw[d-1]["headache"]:
	workout = max(0, workout - 15)

	cycle_day = 0
	menstrual = False
	if user.is_female and user.cycle_len > 0:
	cycle_day = ((d + user.cycle_off) % user.cycle_len) + 1
	menstrual = cycle_day <= 4

	# ── Headache probability ─────────────────────────────────────
	base_rate = a["base_rate"]
	lo = math.log(base_rate / (1 - base_rate))
	lo += user.personal_noise

	# Barometric pressure: graded response
	if p_change < -8:
	lo += a["pressure_coeff"] * 1.2
	elif p_change < -5:
	lo += a["pressure_coeff"] * 0.8
	elif p_change < -3:
	lo += a["pressure_coeff"] * 0.4
	elif p_change > 8:
	lo += a["pressure_coeff"] * 0.5 # rapid rise also triggers

	# Sleep: graded response
	if sleep_h < 4:
	lo += a["sleep_coeff"] * 1.2
	elif sleep_h < 5:
	lo += a["sleep_coeff"] * 0.8
	elif sleep_h < 6:
	lo += a["sleep_coeff"] * 0.4
	elif sleep_h > 9:
	lo += a["sleep_coeff"] * 0.3 # oversleep trigger

	# HRV (stress proxy): graded
	if hrv < 20:
	lo += a["hrv_coeff"] * 1.0
	elif hrv < 30:
	lo += a["hrv_coeff"] * 0.6
	elif hrv < 35:
	lo += a["hrv_coeff"] * 0.2

	# Menstrual: perimenstrual window (days 1-3, 26-28)
	if user.is_female and user.cycle_len > 0:
	if cycle_day <= 3:
	lo += a["menstrual_coeff"] * 1.0
	elif cycle_day <= 5:
	lo += a["menstrual_coeff"] * 0.4
	elif cycle_day >= user.cycle_len - 2:
	lo += a["menstrual_coeff"] * 0.7 # premenstrual

	# Humidity
	if humidity > 85:
	lo += a["humidity_coeff"] * 0.8
	elif humidity > 75:
	lo += a["humidity_coeff"] * 0.3

	# Temperature extremes
	if temp > 32 or temp < -8:
	lo += a["temp_coeff"] * 0.8
	elif temp > 28 or temp < -3:
	lo += a["temp_coeff"] * 0.3

	# Rebound / cluster effect
	if d > 0 and raw[d-1]["headache"]:
	lo += a["rebound_coeff"] * 0.6
	if d > 1 and raw[d-2]["headache"]:
	lo += a["rebound_coeff"] * 0.3 # 2-day streak

	# Weekend "let-down" headache
	if dt.weekday() == 5: # Saturday
	lo += a["weekend_coeff"]
	elif dt.weekday() == 6:
	lo += a["weekend_coeff"] * 0.6

	# Small random noise (less than before — let signal dominate)
	lo += random.gauss(0, 0.15)

	prob = _logistic(lo)
	headache = random.random() < prob

	# ── Diary details ────────────────────────────────────────────
	if headache:
	severity = random.choices([1,2,3,4,5], weights=[8,22,38,22,10])[0]
	duration = round(max(0.5, random.gauss(2.5 + severity * 1.0, 1.5)), 1)
	n_symp = random.randint(1, min(5, 1 + severity))
	n_trig = random.randint(0, min(4, severity))
	mood = random.choices([1,2,3,4,5], weights=[30,35,25,8,2])[0]
	else:
	severity, duration, n_symp, n_trig = 0, 0.0, 0, 0
	mood = random.choices([1,2,3,4,5], weights=[3,8,25,38,26])[0]

	raw.append({
	"dt": dt, "pressure": pressure, "p_change": p_change,
	"humidity": humidity, "temp": temp,
	"sleep_h": round(sleep_h, 1), "deep": round(deep, 0),
	"rem": round(rem, 0), "rhr": round(rhr, 0),
	"hrv": round(hrv, 1), "workout": workout,
	"menstrual": menstrual,
	"headache": headache, "severity": severity,
	"duration": duration, "mood": mood,
	"n_symp": n_symp, "n_trig": n_trig,
	})

	# ── Build feature vectors (leak-free) ────────────────────────────
	rows, labels = [], []
	consec = 0

	for i in range(2, n_days):
	t = raw[i] # target day
	y = raw[i - 1] # yesterday
	p = raw[i - 2] # 2 days ago
	dt = t["dt"]
	f = []

	# Weather target (6)
	f.append(t["pressure"])
	f.append(t["p_change"])
	f.append(abs(t["p_change"]))
	f.append(t["humidity"])
	f.append(t["temp"])
	f.append(1.0 if t["p_change"] < -5 else 0.0)

	# HealthKit yesterday (7)
	f.append(y["sleep_h"])
	f.append(y["deep"])
	f.append(y["rem"])
	f.append(y["rhr"])
	f.append(y["hrv"])
	f.append(float(y["workout"]))
	f.append(1.0 if y["menstrual"] else 0.0)

	# Diary yesterday (6)
	f.append(1.0 if y["headache"] else 0.0)
	f.append(float(y["severity"]))
	f.append(float(y["duration"]))
	f.append(float(y["mood"]))
	f.append(float(y["n_symp"]))
	f.append(float(y["n_trig"]))

	# Diary 2d ago (3)
	f.append(1.0 if p["headache"] else 0.0)
	f.append(float(p["severity"]))
	f.append(float(p["duration"]))

	# Temporal (7)
	dw_s, dw_c = _cyclic(dt.weekday(), 7)
	mn_s, mn_c = _cyclic(dt.month - 1, 12)
	dy_s, dy_c = _cyclic(dt.timetuple().tm_yday, 365)
	f.extend([dw_s, dw_c, mn_s, mn_c, dy_s, dy_c])
	f.append(1.0 if dt.weekday() >= 5 else 0.0)

	# User context (3)
	f.append(user.age_mid)
	f.append(1.0 if "europe" in user.region else 0.0)
	f.append(1.0 if "tropical" in user.region else 0.0)

	# Interactions (6)
	f.append(y["sleep_h"] * abs(t["p_change"]))
	f.append(1.0 if y["hrv"] < 25 else 0.0)
	f.append(max(0.0, 6.0 - y["sleep_h"]))
	f.append(1.0 if t["humidity"] > 80 else 0.0)
	streak = (1.0 if y["headache"] else 0.0) + (1.0 if p["headache"] else 0.0)
	f.append(streak)
	consec = (consec + 1) if y["headache"] else 0
	f.append(float(min(consec, 7)))

	rows.append(f)
	labels.append(1 if t["headache"] else 0)

	return np.array(rows, dtype=np.float32), np.array(labels, dtype=np.int32)


	# ═══════════════════════════════════════════════════════════════════════
	# DATASET ASSEMBLY
	# ═══════════════════════════════════════════════════════════════════════

	def generate_dataset(n_users=2000, days=365, seed=42):
	random.seed(seed)
	np.random.seed(seed)

	all_X, all_y, all_uid, all_arch = [], [], [], []
	start = datetime(2023, 6, 1)

	arch_counts = {}

	for uid in range(n_users):
	user = User(uid)
	arch_counts[user.archetype_name] = arch_counts.get(user.archetype_name, 0) + 1
	X_u, y_u = generate_user(user, days, start)
	all_X.append(X_u)
	all_y.append(y_u)
	all_uid.extend([uid] * len(y_u))
	all_arch.extend([user.archetype_name] * len(y_u))
	if (uid + 1) % 200 == 0:
	print(f" {uid + 1}/{n_users} users generated")

	X = np.vstack(all_X)
	y = np.concatenate(all_y)

	df = pd.DataFrame(X, columns=FEATURE_NAMES)
	df["headache"] = y
	df["user_id"] = all_uid
	df["archetype"] = all_arch

	print(f"\n✅ Dataset: {df.shape[0]:,} rows × {NUM_FEATURES} features")
	print(f" Headache rate: {y.mean():.1%}")
	print(f" Archetypes: {arch_counts}")
	return df


	# ═══════════════════════════════════════════════════════════════════════
	# TRAINING
	# ═══════════════════════════════════════════════════════════════════════

	def group_split(df, test_f=0.12, val_f=0.12, seed=42):
	gss = GroupShuffleSplit(n_splits=1, test_size=test_f, random_state=seed)
	i_tv, i_te = next(gss.split(df, groups=df["user_id"]))
	df_tv, df_test = df.iloc[i_tv], df.iloc[i_te]
	rel_val = val_f / (1 - test_f)
	gss2 = GroupShuffleSplit(n_splits=1, test_size=rel_val, random_state=seed)
	i_tr, i_v = next(gss2.split(df_tv, groups=df_tv["user_id"]))
	return df_tv.iloc[i_tr], df_tv.iloc[i_v], df_test


	def tune_threshold(y_true, y_prob):
	prec, rec, thr = precision_recall_curve(y_true, y_prob)
	f1 = 2 * prec * rec / (prec + rec + 1e-8)
	best = np.argmax(f1)
	return float(thr[min(best, len(thr)-1)]), float(f1[best])


	def evaluate(y_true, y_prob, thr, label):
	y_pred = (y_prob >= thr).astype(int)
	print(f"\n{'═'*60}")
	print(f" {label} (threshold={thr:.3f})")
	print(f"{'═'*60}")
	print(classification_report(y_true, y_pred,
	target_names=["No headache", "Headache"], zero_division=0))
	auc = roc_auc_score(y_true, y_prob)
	ap = average_precision_score(y_true, y_prob)
	f1 = f1_score(y_true, y_pred, zero_division=0)
	cm = confusion_matrix(y_true, y_pred)
	print(f" ROC-AUC : {auc:.4f}")
	print(f" PR-AUC : {ap:.4f}")
	print(f" F1 : {f1:.4f}")
	print(f" Confusion:\n{cm}")
	return {"roc_auc": round(auc,4), "pr_auc": round(ap,4), "f1": round(f1,4)}


	def main():
	print("=" * 62)
	print(" PHOEBE HEADACHE PREDICTOR v3.0 — Production Training")
	print(" EmpedocLabs \| Beta Release Build")
	print("=" * 62)

	# ── Generate ─────────────────────────────────────────────────────
	print("\n📊 Generating clinical-grade synthetic data (2000 users × 365 days)...")
	df = generate_dataset(n_users=2000, days=365, seed=42)

	# ── Split ────────────────────────────────────────────────────────
	df_train, df_val, df_test = group_split(df)
	print(f"\n📂 Split: Train={len(df_train):,} Val={len(df_val):,} Test={len(df_test):,}")

	X_tr = df_train[FEATURE_NAMES].values.astype(np.float32)
	y_tr = df_train["headache"].values.astype(np.int32)
	X_va = df_val[FEATURE_NAMES].values.astype(np.float32)
	y_va = df_val["headache"].values.astype(np.int32)
	X_te = df_test[FEATURE_NAMES].values.astype(np.float32)
	y_te = df_test["headache"].values.astype(np.int32)

	neg, pos = np.bincount(y_tr)
	print(f" Class: neg={neg:,} pos={pos:,} ratio={neg/pos:.2f}")

	# ── Train ────────────────────────────────────────────────────────
	print("\n🚀 Training HistGradientBoosting (production config)...")
	model = HistGradientBoostingClassifier(
	max_iter=800,
	max_depth=6,
	learning_rate=0.03,
	min_samples_leaf=25,
	max_leaf_nodes=48,
	l2_regularization=0.8,
	max_features=0.85,
	early_stopping=True,
	validation_fraction=0.08,
	n_iter_no_change=50,
	scoring="loss",
	class_weight="balanced",
	random_state=42,
	)
	model.fit(X_tr, y_tr)
	print(f" Iterations: {model.n_iter_}")

	# ── Calibrate ────────────────────────────────────────────────────
	print("\n📐 Probability calibration (isotonic, 5-fold)...")
	calibrated = CalibratedClassifierCV(model, method="isotonic", cv=5)
	calibrated.fit(X_tr, y_tr)

	# ── Threshold ────────────────────────────────────────────────────
	vp = calibrated.predict_proba(X_va)[:, 1]
	opt_thr, vf1 = tune_threshold(y_va, vp)
	print(f"\n🎯 Optimal threshold: {opt_thr:.3f} (val F1={vf1:.4f})")

	# ── Evaluate ─────────────────────────────────────────────────────
	val_m = evaluate(y_va, vp, opt_thr, "VALIDATION")
	tp = calibrated.predict_proba(X_te)[:, 1]
	test_m = evaluate(y_te, tp, opt_thr, "TEST")

	# ── Per-archetype eval ───────────────────────────────────────────
	print(f"\n📊 Per-archetype performance:")
	for arch in ARCHETYPES:
	mask = df_test["archetype"] == arch
	if mask.sum() < 50:
	continue
	a_y = y_te[mask.values]
	a_p = tp[mask.values]
	try:
	a_auc = roc_auc_score(a_y, a_p)
	a_f1 = f1_score(a_y, (a_p >= opt_thr).astype(int), zero_division=0)
	rate = a_y.mean()
	print(f" {arch:25s} n={mask.sum():6,} rate={rate:.1%} AUC={a_auc:.3f} F1={a_f1:.3f}")
	except:
	pass

	# ── Feature importance ───────────────────────────────────────────
	print(f"\n📊 Permutation feature importance...")
	base_auc = roc_auc_score(y_te, tp)
	imps = np.zeros(NUM_FEATURES)
	rng = np.random.RandomState(42)
	for fi in range(NUM_FEATURES):
	Xp = X_te.copy()
	Xp[:, fi] = rng.permutation(Xp[:, fi])
	pp = calibrated.predict_proba(Xp)[:, 1]
	imps[fi] = base_auc - roc_auc_score(y_te, pp)

	top_idx = np.argsort(imps)[-15:][::-1]
	print(f"\n Top features:")
	mx = max(imps.max(), 1e-6)
	for r, i in enumerate(top_idx, 1):
	bar = "█" * max(1, int(imps[i] / mx * 35)) if imps[i] > 0 else "·"
	print(f" {r:2d}. {FEATURE_NAMES[i]:30s} ΔAUC={imps[i]:+.4f} {bar}")

	# ── Save ─────────────────────────────────────────────────────────
	os.makedirs("model", exist_ok=True)

	model_data = {
	"model": calibrated,
	"raw_model": model,
	"optimal_threshold": opt_thr,
	"feature_names": FEATURE_NAMES,
	"num_features": NUM_FEATURES,
	"model_version": "3.0.0",
	"trained_at": datetime.now().isoformat(),
	"test_metrics": test_m,
	"val_metrics": val_m,
	"training_rows": len(df_train),
	"total_users": 2000,
	"feature_importances": {
	FEATURE_NAMES[i]: round(float(imps[i]), 6)
	for i in top_idx if imps[i] > 0
	},
	}

	with open("model/model.pkl", "wb") as f:
	pickle.dump(model_data, f)
	sz = os.path.getsize("model/model.pkl") / 1024
	print(f"\n💾 model/model.pkl ({sz:.0f} KB)")

	meta = {k: v for k, v in model_data.items() if k not in ("model", "raw_model")}
	with open("model/metadata.json", "w") as f:
	json.dump(meta, f, indent=2, default=str)
	print(f"📋 model/metadata.json")

	os.makedirs("data", exist_ok=True)
	df.to_parquet("data/training_data.parquet", index=False)
	print(f"📁 data/training_data.parquet")

	print(f"\n{'═'*62}")
	print(f" ✅ PRODUCTION MODEL READY — v3.0.0")
	print(f" Test ROC-AUC: {test_m['roc_auc']}")
	print(f" Test F1: {test_m['f1']}")
	print(f" Threshold: {opt_thr:.3f}")
	print(f"{'═'*62}")


	if __name__ == "__main__":
	main()