Spaces:
Running
Running
| """ | |
| Bioweather Model Training v2.0 | |
| EmpedocLabs Β© 2025 | |
| Trains: | |
| 1. Risk regressor (0-100 score) | |
| 2. Advice classifier (15 weather conditions) | |
| Both use HistGradientBoosting (sklearn) β no XGBoost dependency needed. | |
| """ | |
| import os | |
| import pickle | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| from datetime import datetime | |
| from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import ( | |
| mean_absolute_error, mean_squared_error, r2_score, | |
| classification_report, accuracy_score, f1_score, | |
| ) | |
| from generate_data import generate_production_data | |
| FEATURE_COLS = [ | |
| "temp_c", "pressure_hpa", "humidity", "wind_kph", | |
| "uv_index", "pressure_drop", "temp_change", | |
| ] | |
| CONDITION_NAMES = { | |
| 0: "Clear Skies", 1: "Rapid Pressure Drop", 2: "Pressure Squeeze", | |
| 3: "Sauna Effect", 4: "High Wind", 5: "High UV Glare", | |
| 6: "Bitter Cold", 7: "Drastic Temp Drop", 8: "Heat Shock", | |
| 9: "Heavy Dampness", 10: "Mild Pressure Dip", 11: "Mild Pressure Rise", | |
| 12: "Breezy Pollen", 13: "Dry Air", 14: "Stagnant & Gloomy", | |
| } | |
| def main(): | |
| print("=" * 60) | |
| print(" BIOWEATHER v2.0 β Production Training") | |
| print(" EmpedocLabs") | |
| print("=" * 60) | |
| # ββ 1. Generate data βββββββββββββββββββββββββββββββββββββββββ | |
| print("\nπ Generating training data...") | |
| df = generate_production_data(n=25000, seed=42) | |
| X = df[FEATURE_COLS].values | |
| y_risk = df["risk_score"].values | |
| y_advice = df["advice_label"].values | |
| # ββ 2. Split βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| X_train, X_test, yr_train, yr_test, ya_train, ya_test = train_test_split( | |
| X, y_risk, y_advice, test_size=0.15, random_state=42, | |
| ) | |
| print(f"\nπ Split: Train={len(X_train):,} Test={len(X_test):,}") | |
| # ββ 3. Train risk regressor ββββββββββββββββββββββββββββββββββ | |
| print("\nπ Training risk regressor...") | |
| risk_model = HistGradientBoostingRegressor( | |
| max_iter=400, | |
| max_depth=6, | |
| learning_rate=0.05, | |
| min_samples_leaf=15, | |
| l2_regularization=0.5, | |
| early_stopping=True, | |
| validation_fraction=0.1, | |
| n_iter_no_change=30, | |
| random_state=42, | |
| ) | |
| risk_model.fit(X_train, yr_train) | |
| print(f" Iterations: {risk_model.n_iter_}") | |
| yr_pred = risk_model.predict(X_test) | |
| yr_pred = np.clip(yr_pred, 0, 100) | |
| mae = mean_absolute_error(yr_test, yr_pred) | |
| rmse = np.sqrt(mean_squared_error(yr_test, yr_pred)) | |
| r2 = r2_score(yr_test, yr_pred) | |
| print(f" MAE: {mae:.2f}") | |
| print(f" RMSE: {rmse:.2f}") | |
| print(f" RΒ²: {r2:.4f}") | |
| # ββ 4. Train advice classifier βββββββββββββββββββββββββββββββ | |
| print("\nπ Training advice classifier (15 conditions)...") | |
| advice_model = HistGradientBoostingClassifier( | |
| max_iter=400, | |
| max_depth=6, | |
| learning_rate=0.05, | |
| min_samples_leaf=10, | |
| l2_regularization=0.3, | |
| early_stopping=True, | |
| validation_fraction=0.1, | |
| n_iter_no_change=30, | |
| random_state=42, | |
| ) | |
| advice_model.fit(X_train, ya_train) | |
| print(f" Iterations: {advice_model.n_iter_}") | |
| ya_pred = advice_model.predict(X_test) | |
| acc = accuracy_score(ya_test, ya_pred) | |
| f1_macro = f1_score(ya_test, ya_pred, average="macro", zero_division=0) | |
| print(f" Accuracy: {acc:.4f}") | |
| print(f" F1 macro: {f1_macro:.4f}") | |
| print("\n Per-condition report:") | |
| target_names = [CONDITION_NAMES.get(i, f"Cond_{i}") for i in sorted(set(ya_test) | set(ya_pred))] | |
| print(classification_report(ya_test, ya_pred, target_names=target_names, zero_division=0)) | |
| # ββ 5. Save models βββββββββββββββββββββββββββββββββββββββββββ | |
| os.makedirs("model", exist_ok=True) | |
| with open("model/risk_model.pkl", "wb") as f: | |
| pickle.dump(risk_model, f) | |
| with open("model/advice_model.pkl", "wb") as f: | |
| pickle.dump(advice_model, f) | |
| metadata = { | |
| "version": "2.0.0", | |
| "trained_at": datetime.now().isoformat(), | |
| "training_samples": len(X_train), | |
| "features": FEATURE_COLS, | |
| "num_conditions": 15, | |
| "risk_metrics": {"mae": round(mae, 2), "rmse": round(rmse, 2), "r2": round(r2, 4)}, | |
| "advice_metrics": {"accuracy": round(acc, 4), "f1_macro": round(f1_macro, 4)}, | |
| } | |
| with open("model/metadata.json", "w") as f: | |
| json.dump(metadata, f, indent=2) | |
| print(f"\nπΎ model/risk_model.pkl ({os.path.getsize('model/risk_model.pkl') // 1024} KB)") | |
| print(f"πΎ model/advice_model.pkl ({os.path.getsize('model/advice_model.pkl') // 1024} KB)") | |
| print(f"π model/metadata.json") | |
| print(f"\n{'=' * 60}") | |
| print(f" β BIOWEATHER v2.0 READY") | |
| print(f" Risk: MAE={mae:.2f}, RΒ²={r2:.4f}") | |
| print(f" Advice: Acc={acc:.4f}, F1={f1_macro:.4f}") | |
| print(f"{'=' * 60}") | |
| if __name__ == "__main__": | |
| main() | |