Spaces:

emp-admin
/

bioweather

Running

App Files Files Community

emp-admin commited on Mar 12

Commit

5f98f88

verified ·

1 Parent(s): 3ea4062

Upload 9 files

Browse files

Files changed (9) hide show

Dockerfile +6 -13
README.md +81 -5
advice_model.pkl +2 -2
app.py +225 -182
generate_data.py +197 -0
metadata.json +24 -0
requirements.txt +6 -7
risk_model.pkl +2 -2
train.py +145 -0

Dockerfile CHANGED Viewed

@@ -1,14 +1,7 @@
-FROM python:3.9
-WORKDIR /code
-COPY ./requirements.txt /code/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 COPY . .
-# Grant permissions to models
-RUN chmod 777 risk_model.pkl advice_model.pkl
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,86 @@
 ---
-title: Bioweather
-emoji: 📊
-colorFrom: gray
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Phoebe Bioweather API v2
+emoji: 🌤️
+colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
+license: mit
+app_port: 7860
 ---
+# 🌤️ Phoebe Bioweather API v2.0
+**Weather-driven headache risk scoring** for the [Phoebe](https://empedoclabs.com) iOS app by **EmpedocLabs**.
+## What It Does
+Takes 7 weather parameters → returns a 0-100 risk score, one of 15 biometeo conditions, and personalized actionable advice with 3 severity tiers.
+## Endpoints
+| Method | Path | Description |
+|---|---|---|
+| `GET` | `/` | Status |
+| `GET` | `/health` | Model health |
+| `POST` | `/predict` | Risk score + condition + actions |
+## Request
+```json
+{
+  "temp_c": 28.5,
+  "pressure_hpa": 1005.3,
+  "humidity": 88,
+  "wind_kph": 12,
+  "uv_index": 7,
+  "pressure_drop": -7.2,
+  "temp_change": 3.5
+}
+```
+## Response
+```json
+{
+  "risk_score": 72,
+  "risk_level": "High",
+  "condition": {
+    "id": 1,
+    "title": "Rapid Pressure Drop",
+    "emoji": "📉",
+    "text": "A sharp pressure drop is one of today's main headache drivers...",
+    "actions": [
+      "Reduce stimulation for the next few hours...",
+      "Lower sensory load: dim lights, shorter screen blocks...",
+      "Keep hydration steady and avoid skipped meals."
+    ]
+  }
+}
+```
+## 15 Biometeo Conditions
+| ID | Condition | Primary Trigger |
+|---|---|---|
+| 0 | Clear Skies | No weather trigger |
+| 1 | Rapid Pressure Drop | Barometric drop > 5 hPa |
+| 2 | Pressure Squeeze | Barometric rise > 5 hPa |
+| 3 | Sauna Effect | Heat + humidity |
+| 4 | High Wind | Wind > 35 km/h |
+| 5 | High UV Glare | UV index ≥ 7 |
+| 6 | Bitter Cold | Temperature < 0°C |
+| 7 | Drastic Temp Drop | 24h temp change < -7°C |
+| 8 | Heat Shock | 24h temp change > +7°C |
+| 9 | Heavy Dampness | Humidity > 88% + calm |
+| 10 | Mild Barometric Dip | Pressure drop 2-5 hPa |
+| 11 | Mild Pressure Squeeze | Pressure rise 2-5 hPa |
+| 12 | Breezy Pollen Risk | Moderate wind + warm |
+| 13 | Dry Air Warning | Humidity < 30% |
+| 14 | Stagnant & Gloomy | Low UV + high humidity + calm |
+## Model Details
+- Risk regressor: HistGradientBoosting, MAE=2.52, R²=0.977
+- Advice classifier: HistGradientBoosting, Accuracy=98.6%, F1=0.971
+- Rule-based coherence layer ensures physically-impossible outputs never reach the user

advice_model.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:263d9593bab57d91a586d9adadad063eb77f1dd017684154ed5568d3dc77c783
-size 3006513

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc520fad21be7a84e2f10c835fb57bb7e1afc2fcebfb5246c25c528489cf3a5c
+size 5642415

app.py CHANGED Viewed

@@ -1,20 +1,87 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
 import pickle
 import pandas as pd
-# Load Models
-with open("risk_model.pkl", "rb") as f:
-    risk_model = pickle.load(f)
-with open("advice_model.pkl", "rb") as f:
-    advice_model = pickle.load(f)
-app = FastAPI()
-# Deterministic, risk-aware advice library.
-# The classifier predicts the condition ID.
-# The risk model determines which text severity to use.
 ADVICE_LIBRARY = {
     0: {
         "title": "Clear Skies, Clear Head",
@@ -36,7 +103,7 @@ ADVICE_LIBRARY = {
         "texts": {
             "Low": "Pressure is dipping, but the signal is still mild. Very sensitive users may notice slight heaviness behind the eyes or a drop in energy.",
             "Moderate": "Pressure is falling fast enough to lower your migraine threshold. This is a day to reduce other triggers and keep hydration steady.",
-            "High": "A sharp pressure drop is one of today’s main headache drivers. Lower sensory load, keep rescue medication accessible if prescribed, and avoid overexertion."
         },
         "actions": [
             "Lower sensory load: dim lights, shorter screen blocks, less noise.",
@@ -190,7 +257,7 @@ ADVICE_LIBRARY = {
         "texts": {
             "Low": "Air movement may be stirring light environmental irritation, especially if you already have mild allergy sensitivity.",
             "Moderate": "Breezy conditions can carry pollen and dust that push sinus and histamine-related headaches. Keep indoor air cleaner and limit exposure if needed.",
-            "High": "Wind-driven allergen exposure is likely one of today’s main triggers. Protect your airways, keep windows controlled, and manage the histamine load early."
         },
         "actions": [
             "Keep windows closed if pollen is a usual issue.",
@@ -225,225 +292,201 @@ ADVICE_LIBRARY = {
             "Improve indoor lighting if screens feel heavy on the eyes.",
             "Watch posture and avoid collapsing into the desk."
         ]
-    }
 }
 class WeatherInput(BaseModel):
-    temp_c: float
-    pressure_hpa: float
-    humidity: float
-    wind_kph: float
-    uv_index: int
-    pressure_drop: float
-    temp_change: float
-def clamp_risk_score(value) -> int:
     try:
         return int(max(0, min(100, round(float(value)))))
     except Exception:
         return 0
-def get_risk_level(risk_score: int) -> str:
-    if risk_score > 55:
-        return "High"
-    if risk_score > 30:
-        return "Moderate"
     return "Low"
-def infer_rule_based_condition(row: dict) -> tuple[int, int]:
-    """
-    Rule-based coherence layer.
-    This does NOT replace the ML classifier.
-    It only corrects obviously weak/misaligned condition picks.
-    Returns: (condition_id, strength)
-    """
-    temp_c = float(row["temp_c"])
-    humidity = float(row["humidity"])
-    wind_kph = float(row["wind_kph"])
-    uv_index = int(row["uv_index"])
-    pressure_delta = float(row["pressure_drop"])
-    temp_change = float(row["temp_change"])
-    candidates = []
-    def add(condition_id: int, strength: int):
-        candidates.append((condition_id, strength))
-    # Pressure movement
-    if pressure_delta <= -8:
-        add(1, 95)   # Rapid drop
-    elif pressure_delta <= -4:
-        add(10, 72)  # Mild dip
-    if pressure_delta >= 8:
-        add(2, 95)   # Rapid rise
-    elif pressure_delta >= 4:
-        add(11, 72)  # Mild rise
-    # Heat / humidity
-    if temp_c >= 29 and humidity >= 70:
-        add(3, 92)
-    # Wind / pollen
-    if wind_kph >= 40:
-        add(4, 90)
-    elif wind_kph >= 20:
-        add(12, 66)
-    # UV
-    if uv_index >= 8:
-        add(5, 88)
-    # Cold
-    if temp_c <= 0:
-        add(6, 84)
-    # Temp shock
-    if temp_change <= -7:
-        add(7, 89)
-    elif temp_change >= 7:
-        add(8, 89)
-    # Dampness / stagnation
-    if humidity >= 92 and wind_kph <= 12:
-        add(9, 76)
-    # Dryness
-    if humidity <= 30:
-        add(13, 78)
-    # Gloomy / stagnant
-    if uv_index <= 2 and humidity >= 75 and wind_kph <= 10:
-        add(14, 64)
-    if not candidates:
         return 0, 0
-    return max(candidates, key=lambda x: x[1])
-def select_condition_id(model_condition_id: int, row: dict, risk_score: int) -> int:
-    """
-    Use ML first, then correct obvious nonsense deterministically.
-    Example: model says 'clear skies' while risk is high and pressure is crashing.
-    """
-    rule_condition_id, rule_strength = infer_rule_based_condition(row)
-    if model_condition_id not in ADVICE_LIBRARY:
-        return rule_condition_id if rule_strength > 0 else 0
-    # If the rule signal is very strong, trust the weather pattern.
-    if rule_strength >= 90 and model_condition_id != rule_condition_id:
-        return rule_condition_id
-    # If model says "all clear" but risk is elevated and rules see a meaningful trigger, override.
-    if model_condition_id == 0 and risk_score >= 45 and rule_strength >= 65:
-        return rule_condition_id
-    return model_condition_id
-def dedupe_keep_order(items: list[str]) -> list[str]:
-    seen = set()
-    result = []
-    for item in items:
-        if item and item not in seen:
-            seen.add(item)
-            result.append(item)
-    return result
-def build_actions(condition_id: int, risk_score: int, row: dict) -> list[str]:
-    risk_level = get_risk_level(risk_score)
-    actions = []
-    # Risk-level actions first
-    if risk_level == "High":
-        actions.extend([
             "Reduce stimulation for the next few hours: dim lights, lower audio, and shorten screen sessions.",
             "Keep hydration, food intake, and routine stable today.",
-            "If you have a clinician-approved rescue plan, keep it accessible."
         ])
-    elif risk_level == "Moderate":
-        actions.extend([
             "Protect the basics early: hydration, meals, and shorter screen blocks.",
-            "Avoid stacking other triggers like dehydration, long fasting, or poor posture."
         ])
     else:
-        actions.extend([
-            "No need to overreact, but stay consistent with hydration and meals.",
-        ])
-    # Condition-specific actions
-    actions.extend(ADVICE_LIBRARY[condition_id]["actions"])
-    # Feature-derived actions
     if row["uv_index"] >= 7:
-        actions.append("Use sunglasses outdoors and reduce glare indoors.")
     if row["humidity"] >= 70 and row["temp_c"] >= 27:
-        actions.append("Prioritize electrolytes and cooler environments.")
     if row["humidity"] <= 30:
-        actions.append("Support dry sinuses with humidity or saline if needed.")
     if row["wind_kph"] >= 25:
-        actions.append("Protect your ears and neck when outside.")
     if abs(row["temp_change"]) >= 7:
-        actions.append("Avoid abrupt indoor/outdoor temperature swings; transition gradually.")
     if abs(row["pressure_drop"]) >= 4:
-        actions.append("Keep the rest of the day trigger-light: no skipped meals, no dehydration, no unnecessary strain.")
-    actions = dedupe_keep_order(actions)
-    return actions[:5]
 @app.get("/")
 def home():
-    return {"status": "Biometeorology AI is Active"}
-@app.post("/predict")
-def predict(input_data: WeatherInput):
-    data_dict = input_data.model_dump() if hasattr(input_data, "model_dump") else input_data.dict()
-    df = pd.DataFrame([data_dict])
-    if hasattr(risk_model, "feature_names_in_"):
-        expected_cols = list(risk_model.feature_names_in_)
-        missing = set(expected_cols) - set(df.columns)
-        if missing:
-            return {"error": f"Missing features required by model: {sorted(missing)}"}
-        df = df[expected_cols]
-    row = df.iloc[0].to_dict()
-    # 1. Predict risk
     risk_pred = risk_model.predict(df)[0]
-    risk_score = clamp_risk_score(risk_pred)
     risk_level = get_risk_level(risk_score)
-    # 2. Predict condition from ML
-    model_condition_id = int(advice_model.predict(df)[0])
-    # 3. Deterministic coherence layer
-    final_condition_id = select_condition_id(model_condition_id, row, risk_score)
-    content = ADVICE_LIBRARY.get(final_condition_id, ADVICE_LIBRARY[0])
-    # 4. Deterministic text selection by risk level
     text = content["texts"][risk_level]
-    # 5. Deterministic action list
-    actions = build_actions(final_condition_id, risk_score, row)
-    return {
-        "risk_score": risk_score,
-        "risk_level": risk_level,
-        "condition": {
-            "id": final_condition_id,
-            "title": content["title"],
-            "emoji": content["emoji"],
-            "text": text,
-            "actions": actions
-        }
-    }

+"""
+═══════════════════════════════════════════════════════════════════════
+  Phoebe Bioweather API v2.0
+  EmpedocLabs © 2025
+  Weather-driven headache risk scoring + actionable clinical advice.
+  Designed for the Phoebe iOS app.
+  GET  /         → Status
+  GET  /health   → Model status
+  POST /predict  → Risk score + condition + personalized actions
+═══════════════════════════════════════════════════════════════════════
+"""
+import logging
+import os
 import pickle
+import numpy as np
 import pandas as pd
+from typing import List
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+# ── Logging ──────────────────────────────────────────────────────────
+logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
+logger = logging.getLogger("bioweather")
+# ── App ──────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="Phoebe Bioweather API",
+    version="2.0.0",
+    description="Weather-driven headache risk scoring for the Phoebe iOS app by EmpedocLabs.",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ── Models ───────────────────────────────────────────────────────────
+risk_model = None
+advice_model = None
+FEATURE_COLS = ["temp_c", "pressure_hpa", "humidity", "wind_kph",
+                "uv_index", "pressure_drop", "temp_change"]
+@app.on_event("startup")
+async def load_models():
+    global risk_model, advice_model
+    for name, filename in [("risk", "risk_model.pkl"), ("advice", "advice_model.pkl")]:
+        path = filename
+        if not os.path.exists(path):
+            path = os.path.join("model", filename)
+        if not os.path.exists(path):
+            path = os.path.join(os.path.dirname(__file__), filename)
+        try:
+            with open(path, "rb") as f:
+                if name == "risk":
+                    risk_model = pickle.load(f)
+                else:
+                    advice_model = pickle.load(f)
+            logger.info(f"✅ {name}_model loaded from {path}")
+        except Exception as e:
+            logger.error(f"❌ Failed to load {name}_model: {e}")
+    if risk_model and advice_model:
+        logger.info("✅ Bioweather v2.0 ready")
+# ═══════════════════════════════════════════════════════════════════════
+# ADVICE LIBRARY — 15 biometeo conditions with 3 severity tiers each
+# ═══════════════════════════════════════════════════════════════════════
 ADVICE_LIBRARY = {
     0: {
         "title": "Clear Skies, Clear Head",
         "texts": {
             "Low": "Pressure is dipping, but the signal is still mild. Very sensitive users may notice slight heaviness behind the eyes or a drop in energy.",
             "Moderate": "Pressure is falling fast enough to lower your migraine threshold. This is a day to reduce other triggers and keep hydration steady.",
+            "High": "A sharp pressure drop is one of today's main headache drivers. Lower sensory load, keep rescue medication accessible if prescribed, and avoid overexertion."
         },
         "actions": [
             "Lower sensory load: dim lights, shorter screen blocks, less noise.",
         "texts": {
             "Low": "Air movement may be stirring light environmental irritation, especially if you already have mild allergy sensitivity.",
             "Moderate": "Breezy conditions can carry pollen and dust that push sinus and histamine-related headaches. Keep indoor air cleaner and limit exposure if needed.",
+            "High": "Wind-driven allergen exposure is likely one of today's main triggers. Protect your airways, keep windows controlled, and manage the histamine load early."
         },
         "actions": [
             "Keep windows closed if pollen is a usual issue.",
             "Improve indoor lighting if screens feel heavy on the eyes.",
             "Watch posture and avoid collapsing into the desk."
         ]
+    },
 }
+# ═══════════════════════════════════════════════════════════════════════
+# REQUEST / RESPONSE
+# ═══════════════════════════════════════════════════════════════════════
 class WeatherInput(BaseModel):
+    temp_c: float = Field(..., description="Temperature in Celsius")
+    pressure_hpa: float = Field(..., description="Barometric pressure in hPa/mbar")
+    humidity: float = Field(..., description="Relative humidity %")
+    wind_kph: float = Field(..., description="Wind speed km/h")
+    uv_index: int = Field(..., ge=0, le=11, description="UV index 0-11")
+    pressure_drop: float = Field(..., description="24h pressure change in hPa (negative = drop)")
+    temp_change: float = Field(..., description="24h temperature change in °C")
+class ConditionResponse(BaseModel):
+    id: int
+    title: str
+    emoji: str
+    text: str
+    actions: List[str]
+class PredictResponse(BaseModel):
+    risk_score: int
+    risk_level: str
+    condition: ConditionResponse
+# ═══════════════════════════════════════════════════════════════════════
+# LOGIC
+# ═══════════════════════════════════════════════════════════════════════
+def clamp_risk(value) -> int:
     try:
         return int(max(0, min(100, round(float(value)))))
     except Exception:
         return 0
+def get_risk_level(score: int) -> str:
+    if score > 55: return "High"
+    if score > 30: return "Moderate"
     return "Low"
+def infer_rule_condition(row: dict) -> tuple:
+    """Rule-based coherence — corrects ML when physics is obvious."""
+    temp = float(row["temp_c"])
+    hum = float(row["humidity"])
+    wind = float(row["wind_kph"])
+    uv = int(row["uv_index"])
+    pd_ = float(row["pressure_drop"])
+    tc = float(row["temp_change"])
+    cands = []
+    if pd_ <= -8:     cands.append((1, 95))
+    elif pd_ <= -4:   cands.append((10, 72))
+    if pd_ >= 8:      cands.append((2, 95))
+    elif pd_ >= 4:    cands.append((11, 72))
+    if temp >= 29 and hum >= 70: cands.append((3, 92))
+    if wind >= 40:    cands.append((4, 90))
+    elif wind >= 20:  cands.append((12, 66))
+    if uv >= 8:       cands.append((5, 88))
+    if temp <= 0:     cands.append((6, 84))
+    if tc <= -7:      cands.append((7, 89))
+    elif tc >= 7:     cands.append((8, 89))
+    if hum >= 92 and wind <= 12: cands.append((9, 76))
+    if hum <= 30:     cands.append((13, 78))
+    if uv <= 2 and hum >= 75 and wind <= 10: cands.append((14, 64))
+    if not cands:
         return 0, 0
+    return max(cands, key=lambda x: x[1])
+def select_condition(ml_id: int, row: dict, risk: int) -> int:
+    """ML first, rules correct obvious mismatches."""
+    rule_id, strength = infer_rule_condition(row)
+    if ml_id not in ADVICE_LIBRARY:
+        return rule_id if strength > 0 else 0
+    if strength >= 90 and ml_id != rule_id:
+        return rule_id
+    if ml_id == 0 and risk >= 45 and strength >= 65:
+        return rule_id
+    return ml_id
+def build_actions(cond_id: int, risk: int, row: dict) -> List[str]:
+    level = get_risk_level(risk)
+    acts = []
+    if level == "High":
+        acts.extend([
             "Reduce stimulation for the next few hours: dim lights, lower audio, and shorten screen sessions.",
             "Keep hydration, food intake, and routine stable today.",
+            "If you have a clinician-approved rescue plan, keep it accessible.",
         ])
+    elif level == "Moderate":
+        acts.extend([
             "Protect the basics early: hydration, meals, and shorter screen blocks.",
+            "Avoid stacking other triggers like dehydration, long fasting, or poor posture.",
         ])
     else:
+        acts.append("No need to overreact, but stay consistent with hydration and meals.")
+    acts.extend(ADVICE_LIBRARY[cond_id]["actions"])
     if row["uv_index"] >= 7:
+        acts.append("Use sunglasses outdoors and reduce glare indoors.")
     if row["humidity"] >= 70 and row["temp_c"] >= 27:
+        acts.append("Prioritize electrolytes and cooler environments.")
     if row["humidity"] <= 30:
+        acts.append("Support dry sinuses with humidity or saline if needed.")
     if row["wind_kph"] >= 25:
+        acts.append("Protect your ears and neck when outside.")
     if abs(row["temp_change"]) >= 7:
+        acts.append("Avoid abrupt indoor/outdoor temperature swings; transition gradually.")
     if abs(row["pressure_drop"]) >= 4:
+        acts.append("Keep the rest of the day trigger-light: no skipped meals, no dehydration, no unnecessary strain.")
+    # Dedupe keeping order
+    seen = set()
+    unique = []
+    for a in acts:
+        if a not in seen:
+            seen.add(a)
+            unique.append(a)
+    return unique[:6]
+# ═══════════════════════════════════════════════════════════════════════
+# ENDPOINTS
+# ═══════════════════════════════════════════════════════════════════════
 @app.get("/")
 def home():
+    return {
+        "service": "Phoebe Bioweather API",
+        "version": "2.0.0",
+        "by": "EmpedocLabs",
+        "status": "running" if risk_model and advice_model else "models_not_loaded",
+    }
+@app.get("/health")
+def health():
+    return {
+        "status": "healthy" if risk_model and advice_model else "degraded",
+        "risk_model_loaded": risk_model is not None,
+        "advice_model_loaded": advice_model is not None,
+    }
+@app.post("/predict", response_model=PredictResponse)
+def predict(input_data: WeatherInput):
+    if not risk_model or not advice_model:
+        raise HTTPException(503, "Models not loaded")
+    row = input_data.model_dump()
+    df = pd.DataFrame([row])[FEATURE_COLS]
+    # 1. Risk score
     risk_pred = risk_model.predict(df)[0]
+    risk_score = clamp_risk(risk_pred)
     risk_level = get_risk_level(risk_score)
+    # 2. Condition from ML
+    ml_condition = int(advice_model.predict(df)[0])
+    # 3. Deterministic coherence
+    condition_id = select_condition(ml_condition, row, risk_score)
+    content = ADVICE_LIBRARY.get(condition_id, ADVICE_LIBRARY[0])
+    # 4. Text by risk level
     text = content["texts"][risk_level]
+    # 5. Actions
+    actions = build_actions(condition_id, risk_score, row)
+    logger.info(f"Predict: risk={risk_score} ({risk_level}), cond={condition_id} ({content['title']})")
+    return PredictResponse(
+        risk_score=risk_score,
+        risk_level=risk_level,
+        condition=ConditionResponse(
+            id=condition_id,
+            title=content["title"],
+            emoji=content["emoji"],
+            text=text,
+            actions=actions,
+        ),
+    )

generate_data.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+Bioweather Production Data Generator v2.0
+EmpedocLabs © 2025
+Generates clinically-plausible weather → headache risk data with:
+  - 15 distinct biometeo conditions
+  - Seasonal/geographic variation
+  - Multi-trigger overlap scoring
+  - Graded risk (not just if/else buckets)
+  - 20,000+ samples for robust training
+"""
+import numpy as np
+import pandas as pd
+def generate_production_data(n: int = 25000, seed: int = 42) -> pd.DataFrame:
+    rng = np.random.default_rng(seed)
+    rows = []
+    for _ in range(n):
+        # ── Base weather with seasonal coherence ─────────────────────
+        season = rng.choice(["winter", "spring", "summer", "autumn"],
+                            p=[0.25, 0.25, 0.25, 0.25])
+        if season == "winter":
+            temp = rng.normal(-2, 8)
+            humidity = rng.normal(70, 15)
+            uv = rng.integers(0, 4)
+            wind = abs(rng.normal(15, 12))
+        elif season == "spring":
+            temp = rng.normal(14, 7)
+            humidity = rng.normal(55, 18)
+            uv = rng.integers(2, 8)
+            wind = abs(rng.normal(18, 10))
+        elif season == "summer":
+            temp = rng.normal(28, 6)
+            humidity = rng.normal(55, 20)
+            uv = rng.integers(5, 11)
+            wind = abs(rng.normal(12, 8))
+        else:  # autumn
+            temp = rng.normal(12, 8)
+            humidity = rng.normal(65, 15)
+            uv = rng.integers(1, 6)
+            wind = abs(rng.normal(16, 10))
+        temp = np.clip(temp, -15, 45)
+        humidity = np.clip(humidity, 8, 99)
+        uv = int(np.clip(uv, 0, 11))
+        wind = np.clip(wind, 0, 70)
+        pressure = rng.normal(1013, 12)
+        pressure = np.clip(pressure, 970, 1050)
+        # Pressure change: occasional fronts
+        if rng.random() < 0.10:
+            p_drop = rng.normal(-8, 3)       # cold front
+        elif rng.random() < 0.08:
+            p_drop = rng.normal(7, 2.5)      # high pressure ridge
+        else:
+            p_drop = rng.normal(0, 2.5)
+        p_drop = np.clip(p_drop, -15, 15)
+        # Temp change: some days have big swings
+        if rng.random() < 0.07:
+            t_change = rng.choice([-1, 1]) * abs(rng.normal(10, 3))
+        else:
+            t_change = rng.normal(0, 3)
+        t_change = np.clip(t_change, -15, 15)
+        # ── Additive risk scoring (multiple triggers stack) ──────────
+        risk = 5.0  # baseline
+        condition_scores = {}  # condition_id → contribution
+        # 1. Pressure drop (strongest weather trigger per literature)
+        if p_drop <= -8:
+            contribution = 35 + abs(p_drop) * 1.5
+            condition_scores[1] = contribution
+            risk += contribution
+        elif p_drop <= -4:
+            contribution = 15 + abs(p_drop) * 1.2
+            condition_scores[10] = contribution
+            risk += contribution
+        elif p_drop <= -2:
+            contribution = 8 + abs(p_drop) * 0.8
+            condition_scores[10] = contribution
+            risk += contribution
+        # 2. Pressure rise
+        if p_drop >= 8:
+            contribution = 25 + p_drop * 1.0
+            condition_scores[2] = contribution
+            risk += contribution
+        elif p_drop >= 4:
+            contribution = 12 + p_drop * 0.7
+            condition_scores[11] = contribution
+            risk += contribution
+        elif p_drop >= 2:
+            contribution = 6 + p_drop * 0.5
+            condition_scores[11] = contribution
+            risk += contribution
+        # 3. Sauna effect (heat + humidity)
+        if temp >= 28 and humidity >= 65:
+            strength = (temp - 28) * 2 + (humidity - 65) * 0.5
+            condition_scores[3] = strength
+            risk += strength
+        # 4. Wind
+        if wind >= 40:
+            condition_scores[4] = 25 + (wind - 40) * 0.8
+            risk += condition_scores[4]
+        elif wind >= 20:
+            condition_scores[12] = 10 + (wind - 20) * 0.3
+            risk += condition_scores[12]
+        # 5. UV glare
+        if uv >= 8:
+            condition_scores[5] = 20 + (uv - 8) * 3
+            risk += condition_scores[5]
+        elif uv >= 6 and temp > 15:
+            condition_scores[5] = 8 + (uv - 6) * 2
+            risk += condition_scores[5]
+        # 6. Bitter cold
+        if temp <= -5:
+            condition_scores[6] = 25 + abs(temp + 5) * 2
+            risk += condition_scores[6]
+        elif temp <= 2:
+            condition_scores[6] = 10 + abs(temp - 2) * 1.5
+            risk += condition_scores[6]
+        # 7. Drastic temp drop
+        if t_change <= -8:
+            condition_scores[7] = 30 + abs(t_change) * 1.5
+            risk += condition_scores[7]
+        elif t_change <= -5:
+            condition_scores[7] = 12 + abs(t_change) * 0.8
+            risk += condition_scores[7]
+        # 8. Heat shock
+        if t_change >= 8:
+            condition_scores[8] = 28 + t_change * 1.2
+            risk += condition_scores[8]
+        elif t_change >= 5:
+            condition_scores[8] = 10 + t_change * 0.7
+            risk += condition_scores[8]
+        # 9. Heavy dampness
+        if humidity >= 88 and wind <= 12:
+            condition_scores[9] = 15 + (humidity - 88) * 0.8
+            risk += condition_scores[9]
+        # 13. Dry air
+        if humidity <= 25:
+            condition_scores[13] = 18 + (25 - humidity) * 0.8
+            risk += condition_scores[13]
+        elif humidity <= 32:
+            condition_scores[13] = 8 + (32 - humidity) * 0.5
+            risk += condition_scores[13]
+        # 14. Stagnant & gloomy
+        if uv <= 2 and humidity >= 72 and wind <= 10 and temp < 18:
+            condition_scores[14] = 10 + (humidity - 72) * 0.3
+            risk += condition_scores[14]
+        # ── Determine primary condition ──────────────────────────────
+        if condition_scores:
+            label = max(condition_scores, key=condition_scores.get)
+        else:
+            label = 0  # clear skies
+        # ── Add realistic noise ──────────────────────────────────────
+        risk += rng.normal(0, 2.5)
+        risk = int(np.clip(round(risk), 0, 100))
+        rows.append([
+            round(temp, 1), round(pressure, 1), round(humidity, 1),
+            round(wind, 1), uv, round(p_drop, 2), round(t_change, 2),
+            risk, label,
+        ])
+    df = pd.DataFrame(rows, columns=[
+        "temp_c", "pressure_hpa", "humidity", "wind_kph", "uv_index",
+        "pressure_drop", "temp_change", "risk_score", "advice_label",
+    ])
+    print(f"✅ Generated {len(df):,} samples")
+    print(f"   Risk: mean={df['risk_score'].mean():.1f}, std={df['risk_score'].std():.1f}")
+    print(f"   Conditions: {df['advice_label'].value_counts().sort_index().to_dict()}")
+    return df
+if __name__ == "__main__":
+    df = generate_production_data()
+    df.to_csv("smart_weather_data.csv", index=False)
+    print(f"💾 Saved → smart_weather_data.csv")

metadata.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "version": "2.0.0",
+  "trained_at": "2026-03-12T12:07:07.026616",
+  "training_samples": 21250,
+  "features": [
+    "temp_c",
+    "pressure_hpa",
+    "humidity",
+    "wind_kph",
+    "uv_index",
+    "pressure_drop",
+    "temp_change"
+  ],
+  "num_conditions": 15,
+  "risk_metrics": {
+    "mae": 2.52,
+    "rmse": 3.28,
+    "r2": 0.9773
+  },
+  "advice_metrics": {
+    "accuracy": 0.9859,
+    "f1_macro": 0.9714
+  }
+}

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
-fastapi
-uvicorn
-pydantic
-pandas
-numpy
-xgboost
-scikit-learn

+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+pydantic>=2.5.0
+numpy>=1.24.0
+pandas>=2.0.0
+scikit-learn>=1.3.0

risk_model.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7c9b2874de4ad95960419b3f234f6637708840f8cafccb4c49c76591a68631b
-size 1122479

 version https://git-lfs.github.com/spec/v1
+oid sha256:b916115815b51272b6e47b185ba99bfbc2dcb2f9c0c456c6cdec11ad0150e44b
+size 1398959

train.py ADDED Viewed

	@@ -0,0 +1,145 @@

+"""
+Bioweather Model Training v2.0
+EmpedocLabs © 2025
+Trains:
+  1. Risk regressor (0-100 score)
+  2. Advice classifier (15 weather conditions)
+Both use HistGradientBoosting (sklearn) — no XGBoost dependency needed.
+"""
+import os
+import pickle
+import json
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import (
+    mean_absolute_error, mean_squared_error, r2_score,
+    classification_report, accuracy_score, f1_score,
+)
+from generate_data import generate_production_data
+FEATURE_COLS = [
+    "temp_c", "pressure_hpa", "humidity", "wind_kph",
+    "uv_index", "pressure_drop", "temp_change",
+]
+CONDITION_NAMES = {
+    0: "Clear Skies", 1: "Rapid Pressure Drop", 2: "Pressure Squeeze",
+    3: "Sauna Effect", 4: "High Wind", 5: "High UV Glare",
+    6: "Bitter Cold", 7: "Drastic Temp Drop", 8: "Heat Shock",
+    9: "Heavy Dampness", 10: "Mild Pressure Dip", 11: "Mild Pressure Rise",
+    12: "Breezy Pollen", 13: "Dry Air", 14: "Stagnant & Gloomy",
+}
+def main():
+    print("=" * 60)
+    print("  BIOWEATHER v2.0 — Production Training")
+    print("  EmpedocLabs")
+    print("=" * 60)
+    # ── 1. Generate data ─────────────────────────────────────────
+    print("\n📊 Generating training data...")
+    df = generate_production_data(n=25000, seed=42)
+    X = df[FEATURE_COLS].values
+    y_risk = df["risk_score"].values
+    y_advice = df["advice_label"].values
+    # ── 2. Split ─────────────────────────────────────────────────
+    X_train, X_test, yr_train, yr_test, ya_train, ya_test = train_test_split(
+        X, y_risk, y_advice, test_size=0.15, random_state=42,
+    )
+    print(f"\n📂 Split: Train={len(X_train):,}  Test={len(X_test):,}")
+    # ── 3. Train risk regressor ──────────────────────────────────
+    print("\n🚀 Training risk regressor...")
+    risk_model = HistGradientBoostingRegressor(
+        max_iter=400,
+        max_depth=6,
+        learning_rate=0.05,
+        min_samples_leaf=15,
+        l2_regularization=0.5,
+        early_stopping=True,
+        validation_fraction=0.1,
+        n_iter_no_change=30,
+        random_state=42,
+    )
+    risk_model.fit(X_train, yr_train)
+    print(f"   Iterations: {risk_model.n_iter_}")
+    yr_pred = risk_model.predict(X_test)
+    yr_pred = np.clip(yr_pred, 0, 100)
+    mae = mean_absolute_error(yr_test, yr_pred)
+    rmse = np.sqrt(mean_squared_error(yr_test, yr_pred))
+    r2 = r2_score(yr_test, yr_pred)
+    print(f"   MAE:  {mae:.2f}")
+    print(f"   RMSE: {rmse:.2f}")
+    print(f"   R²:   {r2:.4f}")
+    # ── 4. Train advice classifier ───────────────────────────────
+    print("\n🚀 Training advice classifier (15 conditions)...")
+    advice_model = HistGradientBoostingClassifier(
+        max_iter=400,
+        max_depth=6,
+        learning_rate=0.05,
+        min_samples_leaf=10,
+        l2_regularization=0.3,
+        early_stopping=True,
+        validation_fraction=0.1,
+        n_iter_no_change=30,
+        random_state=42,
+    )
+    advice_model.fit(X_train, ya_train)
+    print(f"   Iterations: {advice_model.n_iter_}")
+    ya_pred = advice_model.predict(X_test)
+    acc = accuracy_score(ya_test, ya_pred)
+    f1_macro = f1_score(ya_test, ya_pred, average="macro", zero_division=0)
+    print(f"   Accuracy: {acc:.4f}")
+    print(f"   F1 macro: {f1_macro:.4f}")
+    print("\n   Per-condition report:")
+    target_names = [CONDITION_NAMES.get(i, f"Cond_{i}") for i in sorted(set(ya_test) | set(ya_pred))]
+    print(classification_report(ya_test, ya_pred, target_names=target_names, zero_division=0))
+    # ── 5. Save models ───────────────────────────────────────────
+    os.makedirs("model", exist_ok=True)
+    with open("model/risk_model.pkl", "wb") as f:
+        pickle.dump(risk_model, f)
+    with open("model/advice_model.pkl", "wb") as f:
+        pickle.dump(advice_model, f)
+    metadata = {
+        "version": "2.0.0",
+        "trained_at": datetime.now().isoformat(),
+        "training_samples": len(X_train),
+        "features": FEATURE_COLS,
+        "num_conditions": 15,
+        "risk_metrics": {"mae": round(mae, 2), "rmse": round(rmse, 2), "r2": round(r2, 4)},
+        "advice_metrics": {"accuracy": round(acc, 4), "f1_macro": round(f1_macro, 4)},
+    }
+    with open("model/metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    print(f"\n💾 model/risk_model.pkl ({os.path.getsize('model/risk_model.pkl') // 1024} KB)")
+    print(f"💾 model/advice_model.pkl ({os.path.getsize('model/advice_model.pkl') // 1024} KB)")
+    print(f"📋 model/metadata.json")
+    print(f"\n{'=' * 60}")
+    print(f"  ✅ BIOWEATHER v2.0 READY")
+    print(f"  Risk:   MAE={mae:.2f}, R²={r2:.4f}")
+    print(f"  Advice: Acc={acc:.4f}, F1={f1_macro:.4f}")
+    print(f"{'=' * 60}")
+if __name__ == "__main__":
+    main()