bioweather / train.py
emp-admin's picture
Upload 9 files
5f98f88 verified
"""
Bioweather Model Training v2.0
EmpedocLabs Β© 2025
Trains:
1. Risk regressor (0-100 score)
2. Advice classifier (15 weather conditions)
Both use HistGradientBoosting (sklearn) β€” no XGBoost dependency needed.
"""
import os
import pickle
import json
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
mean_absolute_error, mean_squared_error, r2_score,
classification_report, accuracy_score, f1_score,
)
from generate_data import generate_production_data
FEATURE_COLS = [
"temp_c", "pressure_hpa", "humidity", "wind_kph",
"uv_index", "pressure_drop", "temp_change",
]
CONDITION_NAMES = {
0: "Clear Skies", 1: "Rapid Pressure Drop", 2: "Pressure Squeeze",
3: "Sauna Effect", 4: "High Wind", 5: "High UV Glare",
6: "Bitter Cold", 7: "Drastic Temp Drop", 8: "Heat Shock",
9: "Heavy Dampness", 10: "Mild Pressure Dip", 11: "Mild Pressure Rise",
12: "Breezy Pollen", 13: "Dry Air", 14: "Stagnant & Gloomy",
}
def main():
print("=" * 60)
print(" BIOWEATHER v2.0 β€” Production Training")
print(" EmpedocLabs")
print("=" * 60)
# ── 1. Generate data ─────────────────────────────────────────
print("\nπŸ“Š Generating training data...")
df = generate_production_data(n=25000, seed=42)
X = df[FEATURE_COLS].values
y_risk = df["risk_score"].values
y_advice = df["advice_label"].values
# ── 2. Split ─────────────────────────────────────────────────
X_train, X_test, yr_train, yr_test, ya_train, ya_test = train_test_split(
X, y_risk, y_advice, test_size=0.15, random_state=42,
)
print(f"\nπŸ“‚ Split: Train={len(X_train):,} Test={len(X_test):,}")
# ── 3. Train risk regressor ──────────────────────────────────
print("\nπŸš€ Training risk regressor...")
risk_model = HistGradientBoostingRegressor(
max_iter=400,
max_depth=6,
learning_rate=0.05,
min_samples_leaf=15,
l2_regularization=0.5,
early_stopping=True,
validation_fraction=0.1,
n_iter_no_change=30,
random_state=42,
)
risk_model.fit(X_train, yr_train)
print(f" Iterations: {risk_model.n_iter_}")
yr_pred = risk_model.predict(X_test)
yr_pred = np.clip(yr_pred, 0, 100)
mae = mean_absolute_error(yr_test, yr_pred)
rmse = np.sqrt(mean_squared_error(yr_test, yr_pred))
r2 = r2_score(yr_test, yr_pred)
print(f" MAE: {mae:.2f}")
print(f" RMSE: {rmse:.2f}")
print(f" RΒ²: {r2:.4f}")
# ── 4. Train advice classifier ───────────────────────────────
print("\nπŸš€ Training advice classifier (15 conditions)...")
advice_model = HistGradientBoostingClassifier(
max_iter=400,
max_depth=6,
learning_rate=0.05,
min_samples_leaf=10,
l2_regularization=0.3,
early_stopping=True,
validation_fraction=0.1,
n_iter_no_change=30,
random_state=42,
)
advice_model.fit(X_train, ya_train)
print(f" Iterations: {advice_model.n_iter_}")
ya_pred = advice_model.predict(X_test)
acc = accuracy_score(ya_test, ya_pred)
f1_macro = f1_score(ya_test, ya_pred, average="macro", zero_division=0)
print(f" Accuracy: {acc:.4f}")
print(f" F1 macro: {f1_macro:.4f}")
print("\n Per-condition report:")
target_names = [CONDITION_NAMES.get(i, f"Cond_{i}") for i in sorted(set(ya_test) | set(ya_pred))]
print(classification_report(ya_test, ya_pred, target_names=target_names, zero_division=0))
# ── 5. Save models ───────────────────────────────────────────
os.makedirs("model", exist_ok=True)
with open("model/risk_model.pkl", "wb") as f:
pickle.dump(risk_model, f)
with open("model/advice_model.pkl", "wb") as f:
pickle.dump(advice_model, f)
metadata = {
"version": "2.0.0",
"trained_at": datetime.now().isoformat(),
"training_samples": len(X_train),
"features": FEATURE_COLS,
"num_conditions": 15,
"risk_metrics": {"mae": round(mae, 2), "rmse": round(rmse, 2), "r2": round(r2, 4)},
"advice_metrics": {"accuracy": round(acc, 4), "f1_macro": round(f1_macro, 4)},
}
with open("model/metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print(f"\nπŸ’Ύ model/risk_model.pkl ({os.path.getsize('model/risk_model.pkl') // 1024} KB)")
print(f"πŸ’Ύ model/advice_model.pkl ({os.path.getsize('model/advice_model.pkl') // 1024} KB)")
print(f"πŸ“‹ model/metadata.json")
print(f"\n{'=' * 60}")
print(f" βœ… BIOWEATHER v2.0 READY")
print(f" Risk: MAE={mae:.2f}, RΒ²={r2:.4f}")
print(f" Advice: Acc={acc:.4f}, F1={f1_macro:.4f}")
print(f"{'=' * 60}")
if __name__ == "__main__":
main()