Jompatron commited on
Commit
ee00e7c
·
1 Parent(s): e10336a

new models

Browse files
Files changed (1) hide show
  1. app.py +95 -189
app.py CHANGED
@@ -34,8 +34,7 @@ DISPLAY_TO_INTERNAL = {v: k for k, v in SENSOR_LABELS.items()}
34
  # HOPSWORKS + MODEL LOADING (LAZY)
35
  # -------------------------
36
 
37
- _resources_v1 = None
38
- _resources_v2 = None
39
 
40
 
41
  def _login_hopsworks():
@@ -49,44 +48,22 @@ def _login_hopsworks():
49
  return project
50
 
51
 
52
- def load_resources_v1():
53
- """Connect to Hopsworks and load model + feature view for v1."""
54
- project = _login_hopsworks()
55
- fs = project.get_feature_store()
56
-
57
- fv = fs.get_feature_view("air_quality_fv", version=1)
58
- fv.init_batch_scoring(1)
59
-
60
- weather_fg = fs.get_feature_group("weather", version=1)
61
-
62
- mr = project.get_model_registry()
63
- model_obj = mr.get_model("air_quality_xgboost_model", version=1)
64
- model_dir = model_obj.download()
65
-
66
- model = XGBRegressor()
67
- model.load_model(model_dir + "/model.json")
68
-
69
- return {
70
- "project": project,
71
- "fs": fs,
72
- "fv": fv,
73
- "weather_fg": weather_fg,
74
- "model": model,
75
- }
76
-
77
 
78
- def load_resources_v2():
79
- """Connect to Hopsworks and load model + feature view for v2."""
80
  project = _login_hopsworks()
81
  fs = project.get_feature_store()
82
 
83
- fv = fs.get_feature_view("air_quality_fv", version=2)
84
  fv.init_batch_scoring(1)
85
 
86
- weather_fg = fs.get_feature_group("weather", version=1)
87
 
88
  mr = project.get_model_registry()
89
- model_obj = mr.get_model("air_quality_xgboost_model", version=2)
90
  model_dir = model_obj.download()
91
 
92
  model = XGBRegressor()
@@ -101,25 +78,16 @@ def load_resources_v2():
101
  }
102
 
103
 
104
- def get_resources(model_version: str):
105
- """Lazy loader for resources, cached per model version."""
106
- global _resources_v1, _resources_v2
107
-
108
- if model_version == "v1":
109
- if _resources_v1 is None:
110
- _resources_v1 = load_resources_v1()
111
- return _resources_v1
112
-
113
- if model_version == "v2":
114
- if _resources_v2 is None:
115
- _resources_v2 = load_resources_v2()
116
- return _resources_v2
117
-
118
- raise ValueError(f"Unknown model version: {model_version}")
119
 
120
 
121
  # -------------------------
122
- # HELPER: AQI CATEGORY
123
  # -------------------------
124
 
125
  def pm25_to_aqi_category(pm25: float) -> str:
@@ -138,15 +106,15 @@ def pm25_to_aqi_category(pm25: float) -> str:
138
 
139
 
140
  # -------------------------
141
- # FORECAST LOGIC
142
  # -------------------------
143
 
144
- def generate_forecast(model_version: str, sensor_internal: str, days: int) -> str:
145
  """
146
- Generate forecast PNG path for given model + sensor.
147
- Returns path to saved PNG.
148
  """
149
- resources = get_resources(model_version)
150
  model = resources["model"]
151
  weather_fg = resources["weather_fg"]
152
  project = resources["project"]
@@ -157,63 +125,48 @@ def generate_forecast(model_version: str, sensor_internal: str, days: int) -> st
157
  df_future = weather_fg.read().sort_values("date")
158
  df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
159
 
160
- # For v2 we also need PM2.5 history for this sensor
161
  aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1)
162
  aq_df = aq_fg.read()
163
  aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date
164
  aq_df = aq_df[aq_df["sensor"] == sensor_internal].sort_values("date")
165
 
 
 
 
 
 
 
 
 
166
  preds = []
167
 
168
- if model_version == "v1":
169
- for offset in range(1, days + 1):
170
- target_date = today + timedelta(days=offset)
171
- row = df_future[df_future["date"] == target_date]
172
- if len(row) == 0:
173
- continue
174
-
175
- X = row[[
176
- "temperature_2m_mean",
177
- "precipitation_sum",
178
- "wind_speed_10m_max",
179
- "wind_direction_10m_dominant",
180
- ]]
181
- pred = float(model.predict(X)[0])
182
- preds.append({"date": target_date, "predicted_pm25": pred})
183
-
184
- elif model_version == "v2":
185
- pm25_history = list(aq_df["pm25"].values[-3:])
186
- if len(pm25_history) < 3:
187
- # Not enough history, bail gracefully
188
- return None
189
-
190
- for offset in range(1, days + 1):
191
- target_date = today + timedelta(days=offset)
192
- row = df_future[df_future["date"] == target_date]
193
- if len(row) == 0:
194
- continue
195
-
196
- lag1, lag2, lag3 = pm25_history[-1], pm25_history[-2], pm25_history[-3]
197
- roll_mean = float(np.mean(pm25_history[-3:]))
198
- roll_std = float(np.std(pm25_history[-3:]))
199
-
200
- X = pd.DataFrame({
201
- "temperature_2m_mean": [row.iloc[0]["temperature_2m_mean"]],
202
- "precipitation_sum": [row.iloc[0]["precipitation_sum"]],
203
- "wind_speed_10m_max": [row.iloc[0]["wind_speed_10m_max"]],
204
- "wind_direction_10m_dominant": [row.iloc[0]["wind_direction_10m_dominant"]],
205
- "pm25_lag1": [lag1],
206
- "pm25_lag2": [lag2],
207
- "pm25_lag3": [lag3],
208
- "pm25_roll3_mean": [roll_mean],
209
- "pm25_roll3_std": [roll_std],
210
- })
211
-
212
- pred = float(model.predict(X)[0])
213
- preds.append({"date": target_date, "predicted_pm25": pred})
214
- pm25_history.append(pred)
215
- else:
216
- raise ValueError("Unknown model version")
217
 
218
  if not preds:
219
  return None
@@ -235,17 +188,16 @@ def generate_forecast(model_version: str, sensor_internal: str, days: int) -> st
235
 
236
 
237
  # -------------------------
238
- # HINDCAST LOGIC
239
  # -------------------------
240
 
241
- def generate_hindcast(model_version: str, sensor_internal: str, days: int) -> str:
242
  """
243
- Generate hindcast PNG path for given model + sensor.
244
- Returns path to saved PNG.
245
  """
246
- resources = get_resources(model_version)
247
  model = resources["model"]
248
- fv = resources["fv"]
249
  weather_fg = resources["weather_fg"]
250
  project = resources["project"]
251
 
@@ -273,72 +225,35 @@ def generate_hindcast(model_version: str, sensor_internal: str, days: int) -> st
273
  if aq_df.empty:
274
  return None
275
 
276
- if model_version == "v1":
277
- # Use feature view training data + actual pm25
278
- features_df, _ = fv.training_data(
279
- start_time=start_date,
280
- end_time=end_date,
281
- statistics_config=False,
282
- )
283
- features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
284
 
285
- # Filter by sensor if it exists in fv
286
- if "sensor" in features_df.columns:
287
- features_df = features_df[features_df["sensor"] == sensor_internal]
 
 
288
 
289
- merged = pd.merge(
290
- features_df,
291
- aq_df[["date", "pm25"]],
292
- on="date",
293
- how="inner",
294
- )
295
 
296
- if merged.empty:
297
- return None
298
-
299
- X = merged[[
300
- "temperature_2m_mean",
301
- "precipitation_sum",
302
- "wind_speed_10m_max",
303
- "wind_direction_10m_dominant",
304
- ]]
305
- merged["predicted_pm25"] = model.predict(X)
306
-
307
- df_hind = merged
308
-
309
- elif model_version == "v2":
310
- # Merge weather + actual pm25
311
- df = pd.merge(weather_df, aq_df, on="date")
312
- if df.empty:
313
- return None
314
-
315
- df["pm25_lag1"] = df["pm25"].shift(1)
316
- df["pm25_lag2"] = df["pm25"].shift(2)
317
- df["pm25_lag3"] = df["pm25"].shift(3)
318
- df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean()
319
- df["pm25_roll3_std"] = df["pm25"].rolling(3).std()
320
-
321
- df = df.dropna().tail(days)
322
- if df.empty:
323
- return None
324
-
325
- X = df[[
326
- "temperature_2m_mean",
327
- "precipitation_sum",
328
- "wind_speed_10m_max",
329
- "wind_direction_10m_dominant",
330
- "pm25_lag1",
331
- "pm25_lag2",
332
- "pm25_lag3",
333
- "pm25_roll3_mean",
334
- "pm25_roll3_std",
335
- ]]
336
-
337
- df["predicted_pm25"] = model.predict(X)
338
- df_hind = df
339
-
340
- else:
341
- raise ValueError("Unknown model version")
342
 
343
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
344
  sensor_label = SENSOR_LABELS[sensor_internal]
@@ -355,20 +270,18 @@ def generate_hindcast(model_version: str, sensor_internal: str, days: int) -> st
355
 
356
 
357
  # -------------------------
358
- # GRADIO UI (MAX VIBES)
359
  # -------------------------
360
 
361
- def run_dashboard(sensor_display: str, model_choice: str, forecast_days: int, hindcast_days: int):
362
  try:
363
  sensor_internal = DISPLAY_TO_INTERNAL[sensor_display]
364
  except KeyError:
365
  return None, None, f"Unknown sensor: {sensor_display}"
366
 
367
- model_version = "v1" if "v1" in model_choice else "v2"
368
-
369
  try:
370
- forecast_path = generate_forecast(model_version, sensor_internal, forecast_days)
371
- hindcast_path = generate_hindcast(model_version, sensor_internal, hindcast_days)
372
  except Exception as e:
373
  # Don't explode the UI; show error text and empty images
374
  return None, None, f"⚠️ Something went wrong: {str(e)}"
@@ -376,12 +289,10 @@ def run_dashboard(sensor_display: str, model_choice: str, forecast_days: int, hi
376
  if forecast_path is None and hindcast_path is None:
377
  return None, None, "No data available for this sensor/time range yet."
378
 
379
- # Simple AQI summary from latest forecast
380
  summary_text = ""
381
  if forecast_path is not None:
382
- # We don't have the df here anymore, so just show generic text
383
  summary_text = (
384
- f"✅ Forecast generated for **{sensor_display}** using **{model_choice}**.\n\n"
385
  f"City: **{CITY_NAME}** \n"
386
  f"Last updated: **{datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}**"
387
  )
@@ -393,9 +304,9 @@ with gr.Blocks(theme="soft") as demo:
393
  gr.Markdown(
394
  """
395
  # 🌤️ Dundee Air Quality Dashboard
396
- Compare air quality forecasts and hindcasts for different sensors across Dundee.
397
 
398
- Select a **sensor**, choose a **model**, set your horizons, and hit **Update**.
399
  """
400
  )
401
 
@@ -406,11 +317,6 @@ with gr.Blocks(theme="soft") as demo:
406
  label="Sensor",
407
  info="Choose which sensor in Dundee to analyze.",
408
  )
409
- model_radio = gr.Radio(
410
- choices=["Model v1 (no lag features)", "Model v2 (lag-aware)"],
411
- value="Model v2 (lag-aware)",
412
- label="Model",
413
- )
414
 
415
  with gr.Row():
416
  forecast_days_slider = gr.Slider(
@@ -438,7 +344,7 @@ with gr.Blocks(theme="soft") as demo:
438
 
439
  update_btn.click(
440
  fn=run_dashboard,
441
- inputs=[sensor_dropdown, model_radio, forecast_days_slider, hindcast_days_slider],
442
  outputs=[forecast_img, hindcast_img, summary_box],
443
  )
444
 
 
34
  # HOPSWORKS + MODEL LOADING (LAZY)
35
  # -------------------------
36
 
37
+ _resources = None
 
38
 
39
 
40
  def _login_hopsworks():
 
48
  return project
49
 
50
 
51
+ def load_resources():
52
+ """
53
+ Connect to Hopsworks and load model + feature view + weather FG.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ This is our SINGLE lag-aware model setup.
56
+ """
57
  project = _login_hopsworks()
58
  fs = project.get_feature_store()
59
 
60
+ fv = fs.get_feature_view("dundee_fv", version=3) # lag-feature FeatureView
61
  fv.init_batch_scoring(1)
62
 
63
+ weather_fg = fs.get_feature_group("dundee_weather_fg", version=1)
64
 
65
  mr = project.get_model_registry()
66
+ model_obj = mr.get_model("dundee_pm25_xgboostl", version=2)
67
  model_dir = model_obj.download()
68
 
69
  model = XGBRegressor()
 
78
  }
79
 
80
 
81
+ def get_resources():
82
+ """Lazy loader for the single model configuration."""
83
+ global _resources
84
+ if _resources is None:
85
+ _resources = load_resources()
86
+ return _resources
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  # -------------------------
90
+ # HELPER: AQI CATEGORY (optional, not yet used in UI)
91
  # -------------------------
92
 
93
  def pm25_to_aqi_category(pm25: float) -> str:
 
106
 
107
 
108
  # -------------------------
109
+ # FORECAST LOGIC (lag-aware model)
110
  # -------------------------
111
 
112
+ def generate_forecast(sensor_internal: str, days: int) -> str | None:
113
  """
114
+ Generate forecast PNG path for given sensor.
115
+ Returns path to saved PNG or None if no data.
116
  """
117
+ resources = get_resources()
118
  model = resources["model"]
119
  weather_fg = resources["weather_fg"]
120
  project = resources["project"]
 
125
  df_future = weather_fg.read().sort_values("date")
126
  df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
127
 
128
+ # PM2.5 history for this sensor
129
  aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1)
130
  aq_df = aq_fg.read()
131
  aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date
132
  aq_df = aq_df[aq_df["sensor"] == sensor_internal].sort_values("date")
133
 
134
+ if aq_df.empty:
135
+ return None
136
+
137
+ pm25_history = list(aq_df["pm25"].values[-3:])
138
+ if len(pm25_history) < 3:
139
+ # Not enough history, bail gracefully
140
+ return None
141
+
142
  preds = []
143
 
144
+ for offset in range(1, days + 1):
145
+ target_date = today + timedelta(days=offset)
146
+ row = df_future[df_future["date"] == target_date]
147
+ if len(row) == 0:
148
+ # no weather data for that date
149
+ continue
150
+
151
+ lag1, lag2, lag3 = pm25_history[-1], pm25_history[-2], pm25_history[-3]
152
+ roll_mean = float(np.mean(pm25_history[-3:]))
153
+ roll_std = float(np.std(pm25_history[-3:]))
154
+
155
+ X = pd.DataFrame({
156
+ "temperature_2m_mean": [row.iloc[0]["temperature_2m_mean"]],
157
+ "precipitation_sum": [row.iloc[0]["precipitation_sum"]],
158
+ "wind_speed_10m_max": [row.iloc[0]["wind_speed_10m_max"]],
159
+ "wind_direction_10m_dominant": [row.iloc[0]["wind_direction_10m_dominant"]],
160
+ "pm25_lag1": [lag1],
161
+ "pm25_lag2": [lag2],
162
+ "pm25_lag3": [lag3],
163
+ "pm25_roll3_mean": [roll_mean],
164
+ "pm25_roll3_std": [roll_std],
165
+ })
166
+
167
+ pred = float(model.predict(X)[0])
168
+ preds.append({"date": target_date, "predicted_pm25": pred})
169
+ pm25_history.append(pred)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  if not preds:
172
  return None
 
188
 
189
 
190
  # -------------------------
191
+ # HINDCAST LOGIC (lag-aware model)
192
  # -------------------------
193
 
194
+ def generate_hindcast(sensor_internal: str, days: int) -> str | None:
195
  """
196
+ Generate hindcast PNG path for given sensor.
197
+ Returns path to saved PNG or None if no data.
198
  """
199
+ resources = get_resources()
200
  model = resources["model"]
 
201
  weather_fg = resources["weather_fg"]
202
  project = resources["project"]
203
 
 
225
  if aq_df.empty:
226
  return None
227
 
228
+ # Merge weather + actual pm25
229
+ df = pd.merge(weather_df, aq_df, on="date")
230
+ if df.empty:
231
+ return None
 
 
 
 
232
 
233
+ df["pm25_lag1"] = df["pm25"].shift(1)
234
+ df["pm25_lag2"] = df["pm25"].shift(2)
235
+ df["pm25_lag3"] = df["pm25"].shift(3)
236
+ df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean()
237
+ df["pm25_roll3_std"] = df["pm25"].rolling(3).std()
238
 
239
+ df = df.dropna().tail(days)
240
+ if df.empty:
241
+ return None
 
 
 
242
 
243
+ X = df[[
244
+ "temperature_2m_mean",
245
+ "precipitation_sum",
246
+ "wind_speed_10m_max",
247
+ "wind_direction_10m_dominant",
248
+ "pm25_lag1",
249
+ "pm25_lag2",
250
+ "pm25_lag3",
251
+ "pm25_roll3_mean",
252
+ "pm25_roll3_std",
253
+ ]]
254
+
255
+ df["predicted_pm25"] = model.predict(X)
256
+ df_hind = df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
259
  sensor_label = SENSOR_LABELS[sensor_internal]
 
270
 
271
 
272
  # -------------------------
273
+ # GRADIO UI (MAX VIBES, SINGLE MODEL)
274
  # -------------------------
275
 
276
+ def run_dashboard(sensor_display: str, forecast_days: int, hindcast_days: int):
277
  try:
278
  sensor_internal = DISPLAY_TO_INTERNAL[sensor_display]
279
  except KeyError:
280
  return None, None, f"Unknown sensor: {sensor_display}"
281
 
 
 
282
  try:
283
+ forecast_path = generate_forecast(sensor_internal, forecast_days)
284
+ hindcast_path = generate_hindcast(sensor_internal, hindcast_days)
285
  except Exception as e:
286
  # Don't explode the UI; show error text and empty images
287
  return None, None, f"⚠️ Something went wrong: {str(e)}"
 
289
  if forecast_path is None and hindcast_path is None:
290
  return None, None, "No data available for this sensor/time range yet."
291
 
 
292
  summary_text = ""
293
  if forecast_path is not None:
 
294
  summary_text = (
295
+ f"✅ Forecast generated for **{sensor_display}** using the lag-aware Dundee PM2.5 model.\n\n"
296
  f"City: **{CITY_NAME}** \n"
297
  f"Last updated: **{datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}**"
298
  )
 
304
  gr.Markdown(
305
  """
306
  # 🌤️ Dundee Air Quality Dashboard
307
+ Lag-aware PM2.5 forecasts and hindcasts for Dundee’s air quality sensors.
308
 
309
+ Select a **sensor**, set your horizons, and hit **Update**.
310
  """
311
  )
312
 
 
317
  label="Sensor",
318
  info="Choose which sensor in Dundee to analyze.",
319
  )
 
 
 
 
 
320
 
321
  with gr.Row():
322
  forecast_days_slider = gr.Slider(
 
344
 
345
  update_btn.click(
346
  fn=run_dashboard,
347
+ inputs=[sensor_dropdown, forecast_days_slider, hindcast_days_slider],
348
  outputs=[forecast_img, hindcast_img, summary_box],
349
  )
350