Spaces:
Running
Running
File size: 5,344 Bytes
5f98f88 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | """
Bioweather Model Training v2.0
EmpedocLabs Β© 2025
Trains:
1. Risk regressor (0-100 score)
2. Advice classifier (15 weather conditions)
Both use HistGradientBoosting (sklearn) β no XGBoost dependency needed.
"""
import os
import pickle
import json
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
mean_absolute_error, mean_squared_error, r2_score,
classification_report, accuracy_score, f1_score,
)
from generate_data import generate_production_data
FEATURE_COLS = [
"temp_c", "pressure_hpa", "humidity", "wind_kph",
"uv_index", "pressure_drop", "temp_change",
]
CONDITION_NAMES = {
0: "Clear Skies", 1: "Rapid Pressure Drop", 2: "Pressure Squeeze",
3: "Sauna Effect", 4: "High Wind", 5: "High UV Glare",
6: "Bitter Cold", 7: "Drastic Temp Drop", 8: "Heat Shock",
9: "Heavy Dampness", 10: "Mild Pressure Dip", 11: "Mild Pressure Rise",
12: "Breezy Pollen", 13: "Dry Air", 14: "Stagnant & Gloomy",
}
def main():
print("=" * 60)
print(" BIOWEATHER v2.0 β Production Training")
print(" EmpedocLabs")
print("=" * 60)
# ββ 1. Generate data βββββββββββββββββββββββββββββββββββββββββ
print("\nπ Generating training data...")
df = generate_production_data(n=25000, seed=42)
X = df[FEATURE_COLS].values
y_risk = df["risk_score"].values
y_advice = df["advice_label"].values
# ββ 2. Split βββββββββββββββββββββββββββββββββββββββββββββββββ
X_train, X_test, yr_train, yr_test, ya_train, ya_test = train_test_split(
X, y_risk, y_advice, test_size=0.15, random_state=42,
)
print(f"\nπ Split: Train={len(X_train):,} Test={len(X_test):,}")
# ββ 3. Train risk regressor ββββββββββββββββββββββββββββββββββ
print("\nπ Training risk regressor...")
risk_model = HistGradientBoostingRegressor(
max_iter=400,
max_depth=6,
learning_rate=0.05,
min_samples_leaf=15,
l2_regularization=0.5,
early_stopping=True,
validation_fraction=0.1,
n_iter_no_change=30,
random_state=42,
)
risk_model.fit(X_train, yr_train)
print(f" Iterations: {risk_model.n_iter_}")
yr_pred = risk_model.predict(X_test)
yr_pred = np.clip(yr_pred, 0, 100)
mae = mean_absolute_error(yr_test, yr_pred)
rmse = np.sqrt(mean_squared_error(yr_test, yr_pred))
r2 = r2_score(yr_test, yr_pred)
print(f" MAE: {mae:.2f}")
print(f" RMSE: {rmse:.2f}")
print(f" RΒ²: {r2:.4f}")
# ββ 4. Train advice classifier βββββββββββββββββββββββββββββββ
print("\nπ Training advice classifier (15 conditions)...")
advice_model = HistGradientBoostingClassifier(
max_iter=400,
max_depth=6,
learning_rate=0.05,
min_samples_leaf=10,
l2_regularization=0.3,
early_stopping=True,
validation_fraction=0.1,
n_iter_no_change=30,
random_state=42,
)
advice_model.fit(X_train, ya_train)
print(f" Iterations: {advice_model.n_iter_}")
ya_pred = advice_model.predict(X_test)
acc = accuracy_score(ya_test, ya_pred)
f1_macro = f1_score(ya_test, ya_pred, average="macro", zero_division=0)
print(f" Accuracy: {acc:.4f}")
print(f" F1 macro: {f1_macro:.4f}")
print("\n Per-condition report:")
target_names = [CONDITION_NAMES.get(i, f"Cond_{i}") for i in sorted(set(ya_test) | set(ya_pred))]
print(classification_report(ya_test, ya_pred, target_names=target_names, zero_division=0))
# ββ 5. Save models βββββββββββββββββββββββββββββββββββββββββββ
os.makedirs("model", exist_ok=True)
with open("model/risk_model.pkl", "wb") as f:
pickle.dump(risk_model, f)
with open("model/advice_model.pkl", "wb") as f:
pickle.dump(advice_model, f)
metadata = {
"version": "2.0.0",
"trained_at": datetime.now().isoformat(),
"training_samples": len(X_train),
"features": FEATURE_COLS,
"num_conditions": 15,
"risk_metrics": {"mae": round(mae, 2), "rmse": round(rmse, 2), "r2": round(r2, 4)},
"advice_metrics": {"accuracy": round(acc, 4), "f1_macro": round(f1_macro, 4)},
}
with open("model/metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
print(f"\nπΎ model/risk_model.pkl ({os.path.getsize('model/risk_model.pkl') // 1024} KB)")
print(f"πΎ model/advice_model.pkl ({os.path.getsize('model/advice_model.pkl') // 1024} KB)")
print(f"π model/metadata.json")
print(f"\n{'=' * 60}")
print(f" β
BIOWEATHER v2.0 READY")
print(f" Risk: MAE={mae:.2f}, RΒ²={r2:.4f}")
print(f" Advice: Acc={acc:.4f}, F1={f1_macro:.4f}")
print(f"{'=' * 60}")
if __name__ == "__main__":
main()
|