Spaces:
Running
Running
File size: 7,107 Bytes
5f98f88 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | """
Bioweather Production Data Generator v2.0
EmpedocLabs Β© 2025
Generates clinically-plausible weather β headache risk data with:
- 15 distinct biometeo conditions
- Seasonal/geographic variation
- Multi-trigger overlap scoring
- Graded risk (not just if/else buckets)
- 20,000+ samples for robust training
"""
import numpy as np
import pandas as pd
def generate_production_data(n: int = 25000, seed: int = 42) -> pd.DataFrame:
rng = np.random.default_rng(seed)
rows = []
for _ in range(n):
# ββ Base weather with seasonal coherence βββββββββββββββββββββ
season = rng.choice(["winter", "spring", "summer", "autumn"],
p=[0.25, 0.25, 0.25, 0.25])
if season == "winter":
temp = rng.normal(-2, 8)
humidity = rng.normal(70, 15)
uv = rng.integers(0, 4)
wind = abs(rng.normal(15, 12))
elif season == "spring":
temp = rng.normal(14, 7)
humidity = rng.normal(55, 18)
uv = rng.integers(2, 8)
wind = abs(rng.normal(18, 10))
elif season == "summer":
temp = rng.normal(28, 6)
humidity = rng.normal(55, 20)
uv = rng.integers(5, 11)
wind = abs(rng.normal(12, 8))
else: # autumn
temp = rng.normal(12, 8)
humidity = rng.normal(65, 15)
uv = rng.integers(1, 6)
wind = abs(rng.normal(16, 10))
temp = np.clip(temp, -15, 45)
humidity = np.clip(humidity, 8, 99)
uv = int(np.clip(uv, 0, 11))
wind = np.clip(wind, 0, 70)
pressure = rng.normal(1013, 12)
pressure = np.clip(pressure, 970, 1050)
# Pressure change: occasional fronts
if rng.random() < 0.10:
p_drop = rng.normal(-8, 3) # cold front
elif rng.random() < 0.08:
p_drop = rng.normal(7, 2.5) # high pressure ridge
else:
p_drop = rng.normal(0, 2.5)
p_drop = np.clip(p_drop, -15, 15)
# Temp change: some days have big swings
if rng.random() < 0.07:
t_change = rng.choice([-1, 1]) * abs(rng.normal(10, 3))
else:
t_change = rng.normal(0, 3)
t_change = np.clip(t_change, -15, 15)
# ββ Additive risk scoring (multiple triggers stack) ββββββββββ
risk = 5.0 # baseline
condition_scores = {} # condition_id β contribution
# 1. Pressure drop (strongest weather trigger per literature)
if p_drop <= -8:
contribution = 35 + abs(p_drop) * 1.5
condition_scores[1] = contribution
risk += contribution
elif p_drop <= -4:
contribution = 15 + abs(p_drop) * 1.2
condition_scores[10] = contribution
risk += contribution
elif p_drop <= -2:
contribution = 8 + abs(p_drop) * 0.8
condition_scores[10] = contribution
risk += contribution
# 2. Pressure rise
if p_drop >= 8:
contribution = 25 + p_drop * 1.0
condition_scores[2] = contribution
risk += contribution
elif p_drop >= 4:
contribution = 12 + p_drop * 0.7
condition_scores[11] = contribution
risk += contribution
elif p_drop >= 2:
contribution = 6 + p_drop * 0.5
condition_scores[11] = contribution
risk += contribution
# 3. Sauna effect (heat + humidity)
if temp >= 28 and humidity >= 65:
strength = (temp - 28) * 2 + (humidity - 65) * 0.5
condition_scores[3] = strength
risk += strength
# 4. Wind
if wind >= 40:
condition_scores[4] = 25 + (wind - 40) * 0.8
risk += condition_scores[4]
elif wind >= 20:
condition_scores[12] = 10 + (wind - 20) * 0.3
risk += condition_scores[12]
# 5. UV glare
if uv >= 8:
condition_scores[5] = 20 + (uv - 8) * 3
risk += condition_scores[5]
elif uv >= 6 and temp > 15:
condition_scores[5] = 8 + (uv - 6) * 2
risk += condition_scores[5]
# 6. Bitter cold
if temp <= -5:
condition_scores[6] = 25 + abs(temp + 5) * 2
risk += condition_scores[6]
elif temp <= 2:
condition_scores[6] = 10 + abs(temp - 2) * 1.5
risk += condition_scores[6]
# 7. Drastic temp drop
if t_change <= -8:
condition_scores[7] = 30 + abs(t_change) * 1.5
risk += condition_scores[7]
elif t_change <= -5:
condition_scores[7] = 12 + abs(t_change) * 0.8
risk += condition_scores[7]
# 8. Heat shock
if t_change >= 8:
condition_scores[8] = 28 + t_change * 1.2
risk += condition_scores[8]
elif t_change >= 5:
condition_scores[8] = 10 + t_change * 0.7
risk += condition_scores[8]
# 9. Heavy dampness
if humidity >= 88 and wind <= 12:
condition_scores[9] = 15 + (humidity - 88) * 0.8
risk += condition_scores[9]
# 13. Dry air
if humidity <= 25:
condition_scores[13] = 18 + (25 - humidity) * 0.8
risk += condition_scores[13]
elif humidity <= 32:
condition_scores[13] = 8 + (32 - humidity) * 0.5
risk += condition_scores[13]
# 14. Stagnant & gloomy
if uv <= 2 and humidity >= 72 and wind <= 10 and temp < 18:
condition_scores[14] = 10 + (humidity - 72) * 0.3
risk += condition_scores[14]
# ββ Determine primary condition ββββββββββββββββββββββββββββββ
if condition_scores:
label = max(condition_scores, key=condition_scores.get)
else:
label = 0 # clear skies
# ββ Add realistic noise ββββββββββββββββββββββββββββββββββββββ
risk += rng.normal(0, 2.5)
risk = int(np.clip(round(risk), 0, 100))
rows.append([
round(temp, 1), round(pressure, 1), round(humidity, 1),
round(wind, 1), uv, round(p_drop, 2), round(t_change, 2),
risk, label,
])
df = pd.DataFrame(rows, columns=[
"temp_c", "pressure_hpa", "humidity", "wind_kph", "uv_index",
"pressure_drop", "temp_change", "risk_score", "advice_label",
])
print(f"β
Generated {len(df):,} samples")
print(f" Risk: mean={df['risk_score'].mean():.1f}, std={df['risk_score'].std():.1f}")
print(f" Conditions: {df['advice_label'].value_counts().sort_index().to_dict()}")
return df
if __name__ == "__main__":
df = generate_production_data()
df.to_csv("smart_weather_data.csv", index=False)
print(f"πΎ Saved β smart_weather_data.csv")
|