""" Bioweather Model Training v2.0 EmpedocLabs © 2025 Trains: 1. Risk regressor (0-100 score) 2. Advice classifier (15 weather conditions) Both use HistGradientBoosting (sklearn) — no XGBoost dependency needed. """ import os import pickle import json import numpy as np import pandas as pd from datetime import datetime from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import ( mean_absolute_error, mean_squared_error, r2_score, classification_report, accuracy_score, f1_score, ) from generate_data import generate_production_data FEATURE_COLS = [ "temp_c", "pressure_hpa", "humidity", "wind_kph", "uv_index", "pressure_drop", "temp_change", ] CONDITION_NAMES = { 0: "Clear Skies", 1: "Rapid Pressure Drop", 2: "Pressure Squeeze", 3: "Sauna Effect", 4: "High Wind", 5: "High UV Glare", 6: "Bitter Cold", 7: "Drastic Temp Drop", 8: "Heat Shock", 9: "Heavy Dampness", 10: "Mild Pressure Dip", 11: "Mild Pressure Rise", 12: "Breezy Pollen", 13: "Dry Air", 14: "Stagnant & Gloomy", } def main(): print("=" * 60) print(" BIOWEATHER v2.0 — Production Training") print(" EmpedocLabs") print("=" * 60) # ── 1. Generate data ───────────────────────────────────────── print("\n📊 Generating training data...") df = generate_production_data(n=25000, seed=42) X = df[FEATURE_COLS].values y_risk = df["risk_score"].values y_advice = df["advice_label"].values # ── 2. Split ───────────────────────────────────────────────── X_train, X_test, yr_train, yr_test, ya_train, ya_test = train_test_split( X, y_risk, y_advice, test_size=0.15, random_state=42, ) print(f"\n📂 Split: Train={len(X_train):,} Test={len(X_test):,}") # ── 3. Train risk regressor ────────────────────────────────── print("\n🚀 Training risk regressor...") risk_model = HistGradientBoostingRegressor( max_iter=400, max_depth=6, learning_rate=0.05, min_samples_leaf=15, l2_regularization=0.5, early_stopping=True, validation_fraction=0.1, n_iter_no_change=30, random_state=42, ) risk_model.fit(X_train, yr_train) print(f" Iterations: {risk_model.n_iter_}") yr_pred = risk_model.predict(X_test) yr_pred = np.clip(yr_pred, 0, 100) mae = mean_absolute_error(yr_test, yr_pred) rmse = np.sqrt(mean_squared_error(yr_test, yr_pred)) r2 = r2_score(yr_test, yr_pred) print(f" MAE: {mae:.2f}") print(f" RMSE: {rmse:.2f}") print(f" R²: {r2:.4f}") # ── 4. Train advice classifier ─────────────────────────────── print("\n🚀 Training advice classifier (15 conditions)...") advice_model = HistGradientBoostingClassifier( max_iter=400, max_depth=6, learning_rate=0.05, min_samples_leaf=10, l2_regularization=0.3, early_stopping=True, validation_fraction=0.1, n_iter_no_change=30, random_state=42, ) advice_model.fit(X_train, ya_train) print(f" Iterations: {advice_model.n_iter_}") ya_pred = advice_model.predict(X_test) acc = accuracy_score(ya_test, ya_pred) f1_macro = f1_score(ya_test, ya_pred, average="macro", zero_division=0) print(f" Accuracy: {acc:.4f}") print(f" F1 macro: {f1_macro:.4f}") print("\n Per-condition report:") target_names = [CONDITION_NAMES.get(i, f"Cond_{i}") for i in sorted(set(ya_test) | set(ya_pred))] print(classification_report(ya_test, ya_pred, target_names=target_names, zero_division=0)) # ── 5. Save models ─────────────────────────────────────────── os.makedirs("model", exist_ok=True) with open("model/risk_model.pkl", "wb") as f: pickle.dump(risk_model, f) with open("model/advice_model.pkl", "wb") as f: pickle.dump(advice_model, f) metadata = { "version": "2.0.0", "trained_at": datetime.now().isoformat(), "training_samples": len(X_train), "features": FEATURE_COLS, "num_conditions": 15, "risk_metrics": {"mae": round(mae, 2), "rmse": round(rmse, 2), "r2": round(r2, 4)}, "advice_metrics": {"accuracy": round(acc, 4), "f1_macro": round(f1_macro, 4)}, } with open("model/metadata.json", "w") as f: json.dump(metadata, f, indent=2) print(f"\n💾 model/risk_model.pkl ({os.path.getsize('model/risk_model.pkl') // 1024} KB)") print(f"💾 model/advice_model.pkl ({os.path.getsize('model/advice_model.pkl') // 1024} KB)") print(f"📋 model/metadata.json") print(f"\n{'=' * 60}") print(f" ✅ BIOWEATHER v2.0 READY") print(f" Risk: MAE={mae:.2f}, R²={r2:.4f}") print(f" Advice: Acc={acc:.4f}, F1={f1_macro:.4f}") print(f"{'=' * 60}") if __name__ == "__main__": main()