Spaces:
Running
Running
| """ | |
| Bioweather Production Data Generator v2.0 | |
| EmpedocLabs Β© 2025 | |
| Generates clinically-plausible weather β headache risk data with: | |
| - 15 distinct biometeo conditions | |
| - Seasonal/geographic variation | |
| - Multi-trigger overlap scoring | |
| - Graded risk (not just if/else buckets) | |
| - 20,000+ samples for robust training | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| def generate_production_data(n: int = 25000, seed: int = 42) -> pd.DataFrame: | |
| rng = np.random.default_rng(seed) | |
| rows = [] | |
| for _ in range(n): | |
| # ββ Base weather with seasonal coherence βββββββββββββββββββββ | |
| season = rng.choice(["winter", "spring", "summer", "autumn"], | |
| p=[0.25, 0.25, 0.25, 0.25]) | |
| if season == "winter": | |
| temp = rng.normal(-2, 8) | |
| humidity = rng.normal(70, 15) | |
| uv = rng.integers(0, 4) | |
| wind = abs(rng.normal(15, 12)) | |
| elif season == "spring": | |
| temp = rng.normal(14, 7) | |
| humidity = rng.normal(55, 18) | |
| uv = rng.integers(2, 8) | |
| wind = abs(rng.normal(18, 10)) | |
| elif season == "summer": | |
| temp = rng.normal(28, 6) | |
| humidity = rng.normal(55, 20) | |
| uv = rng.integers(5, 11) | |
| wind = abs(rng.normal(12, 8)) | |
| else: # autumn | |
| temp = rng.normal(12, 8) | |
| humidity = rng.normal(65, 15) | |
| uv = rng.integers(1, 6) | |
| wind = abs(rng.normal(16, 10)) | |
| temp = np.clip(temp, -15, 45) | |
| humidity = np.clip(humidity, 8, 99) | |
| uv = int(np.clip(uv, 0, 11)) | |
| wind = np.clip(wind, 0, 70) | |
| pressure = rng.normal(1013, 12) | |
| pressure = np.clip(pressure, 970, 1050) | |
| # Pressure change: occasional fronts | |
| if rng.random() < 0.10: | |
| p_drop = rng.normal(-8, 3) # cold front | |
| elif rng.random() < 0.08: | |
| p_drop = rng.normal(7, 2.5) # high pressure ridge | |
| else: | |
| p_drop = rng.normal(0, 2.5) | |
| p_drop = np.clip(p_drop, -15, 15) | |
| # Temp change: some days have big swings | |
| if rng.random() < 0.07: | |
| t_change = rng.choice([-1, 1]) * abs(rng.normal(10, 3)) | |
| else: | |
| t_change = rng.normal(0, 3) | |
| t_change = np.clip(t_change, -15, 15) | |
| # ββ Additive risk scoring (multiple triggers stack) ββββββββββ | |
| risk = 5.0 # baseline | |
| condition_scores = {} # condition_id β contribution | |
| # 1. Pressure drop (strongest weather trigger per literature) | |
| if p_drop <= -8: | |
| contribution = 35 + abs(p_drop) * 1.5 | |
| condition_scores[1] = contribution | |
| risk += contribution | |
| elif p_drop <= -4: | |
| contribution = 15 + abs(p_drop) * 1.2 | |
| condition_scores[10] = contribution | |
| risk += contribution | |
| elif p_drop <= -2: | |
| contribution = 8 + abs(p_drop) * 0.8 | |
| condition_scores[10] = contribution | |
| risk += contribution | |
| # 2. Pressure rise | |
| if p_drop >= 8: | |
| contribution = 25 + p_drop * 1.0 | |
| condition_scores[2] = contribution | |
| risk += contribution | |
| elif p_drop >= 4: | |
| contribution = 12 + p_drop * 0.7 | |
| condition_scores[11] = contribution | |
| risk += contribution | |
| elif p_drop >= 2: | |
| contribution = 6 + p_drop * 0.5 | |
| condition_scores[11] = contribution | |
| risk += contribution | |
| # 3. Sauna effect (heat + humidity) | |
| if temp >= 28 and humidity >= 65: | |
| strength = (temp - 28) * 2 + (humidity - 65) * 0.5 | |
| condition_scores[3] = strength | |
| risk += strength | |
| # 4. Wind | |
| if wind >= 40: | |
| condition_scores[4] = 25 + (wind - 40) * 0.8 | |
| risk += condition_scores[4] | |
| elif wind >= 20: | |
| condition_scores[12] = 10 + (wind - 20) * 0.3 | |
| risk += condition_scores[12] | |
| # 5. UV glare | |
| if uv >= 8: | |
| condition_scores[5] = 20 + (uv - 8) * 3 | |
| risk += condition_scores[5] | |
| elif uv >= 6 and temp > 15: | |
| condition_scores[5] = 8 + (uv - 6) * 2 | |
| risk += condition_scores[5] | |
| # 6. Bitter cold | |
| if temp <= -5: | |
| condition_scores[6] = 25 + abs(temp + 5) * 2 | |
| risk += condition_scores[6] | |
| elif temp <= 2: | |
| condition_scores[6] = 10 + abs(temp - 2) * 1.5 | |
| risk += condition_scores[6] | |
| # 7. Drastic temp drop | |
| if t_change <= -8: | |
| condition_scores[7] = 30 + abs(t_change) * 1.5 | |
| risk += condition_scores[7] | |
| elif t_change <= -5: | |
| condition_scores[7] = 12 + abs(t_change) * 0.8 | |
| risk += condition_scores[7] | |
| # 8. Heat shock | |
| if t_change >= 8: | |
| condition_scores[8] = 28 + t_change * 1.2 | |
| risk += condition_scores[8] | |
| elif t_change >= 5: | |
| condition_scores[8] = 10 + t_change * 0.7 | |
| risk += condition_scores[8] | |
| # 9. Heavy dampness | |
| if humidity >= 88 and wind <= 12: | |
| condition_scores[9] = 15 + (humidity - 88) * 0.8 | |
| risk += condition_scores[9] | |
| # 13. Dry air | |
| if humidity <= 25: | |
| condition_scores[13] = 18 + (25 - humidity) * 0.8 | |
| risk += condition_scores[13] | |
| elif humidity <= 32: | |
| condition_scores[13] = 8 + (32 - humidity) * 0.5 | |
| risk += condition_scores[13] | |
| # 14. Stagnant & gloomy | |
| if uv <= 2 and humidity >= 72 and wind <= 10 and temp < 18: | |
| condition_scores[14] = 10 + (humidity - 72) * 0.3 | |
| risk += condition_scores[14] | |
| # ββ Determine primary condition ββββββββββββββββββββββββββββββ | |
| if condition_scores: | |
| label = max(condition_scores, key=condition_scores.get) | |
| else: | |
| label = 0 # clear skies | |
| # ββ Add realistic noise ββββββββββββββββββββββββββββββββββββββ | |
| risk += rng.normal(0, 2.5) | |
| risk = int(np.clip(round(risk), 0, 100)) | |
| rows.append([ | |
| round(temp, 1), round(pressure, 1), round(humidity, 1), | |
| round(wind, 1), uv, round(p_drop, 2), round(t_change, 2), | |
| risk, label, | |
| ]) | |
| df = pd.DataFrame(rows, columns=[ | |
| "temp_c", "pressure_hpa", "humidity", "wind_kph", "uv_index", | |
| "pressure_drop", "temp_change", "risk_score", "advice_label", | |
| ]) | |
| print(f"β Generated {len(df):,} samples") | |
| print(f" Risk: mean={df['risk_score'].mean():.1f}, std={df['risk_score'].std():.1f}") | |
| print(f" Conditions: {df['advice_label'].value_counts().sort_index().to_dict()}") | |
| return df | |
| if __name__ == "__main__": | |
| df = generate_production_data() | |
| df.to_csv("smart_weather_data.csv", index=False) | |
| print(f"πΎ Saved β smart_weather_data.csv") | |