Jompatron commited on
Commit
f376eb0
·
1 Parent(s): 1d8d9e9

C-part done

Browse files
Files changed (1) hide show
  1. app.py +122 -61
app.py CHANGED
@@ -12,23 +12,17 @@ from airquality.util import plot_air_quality_forecast
12
  # -------------------------
13
  # HOPSWORKS LOGIN + MODEL LOAD
14
  # -------------------------
15
- def load_resources():
16
  project = hopsworks.login()
17
  fs = project.get_feature_store()
18
 
19
- # Load Feature View
20
- fv = fs.get_feature_view(
21
- name="air_quality_fv",
22
- version=2
23
- )
24
  fv.init_batch_scoring(1)
25
 
26
- # Load Weather Feature Group (for future predictions)
27
  weather_fg = fs.get_feature_group("weather", 1)
28
 
29
- # Load Model from Registry
30
  mr = project.get_model_registry()
31
- model_obj = mr.get_model("air_quality_xgboost_model", version=2)
32
  model_dir = model_obj.download()
33
 
34
  model = XGBRegressor()
@@ -37,103 +31,170 @@ def load_resources():
37
  return model, fv, weather_fg, project
38
 
39
 
40
- model, feature_view, weather_fg, project = load_resources()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  # -------------------------
44
  # FORECAST LOGIC (NEXT 7 DAYS)
45
  # -------------------------
46
- def generate_forecast(days):
47
- today = datetime.utcnow().date()
48
- future_dates = [(today + timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, days + 1)]
49
 
 
50
  df_future = weather_fg.read()
51
  df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  preds = []
54
- for d in future_dates:
55
- dt = datetime.strptime(d, "%Y-%m-%d").date()
56
- row = df_future[df_future["date"] == dt]
57
 
 
 
58
  if len(row) == 0:
59
  continue
60
 
61
- input_features = row.drop(columns=["date", "city"])
62
- pm25_pred = model.predict(input_features)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- preds.append({"date": d, "predicted_pm25": pm25_pred})
 
65
 
66
- if len(preds) == 0:
67
- return None
68
 
69
  df_preds = pd.DataFrame(preds)
70
 
71
- # Save plot
72
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
73
- plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path, hindcast=False)
74
 
75
  return tmp_path
76
 
77
-
78
  # -------------------------
79
  # HINDCAST LOGIC (LAST 7 DAYS)
80
  # -------------------------
81
- def generate_hindcast(days):
82
- # Read actual + predicted from Feature View
83
- features_df, labels_df = feature_view.training_data(
 
84
  start_time=datetime.utcnow().date() - timedelta(days=days),
85
  end_time=datetime.utcnow().date(),
86
  statistics_config=False
87
  )
88
 
89
  features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
90
- labels_df["pm25"] = labels_df["pm25"]
91
 
92
- df = features_df.copy()
93
- df["pm25"] = labels_df["pm25"]
94
 
95
- # Predict using model
96
- df["predicted_pm25"] = model.predict(
97
- df[["temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant"]]
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
 
100
- # Keep last 7 days
101
- df = df.sort_values("date").tail(days)
 
 
 
 
102
 
103
- # Save plot
104
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
105
- plot_air_quality_forecast("linkoping", "hamngatan-10", df, tmp_path, hindcast=True)
106
 
107
  return tmp_path
108
 
109
 
110
- # -------------------------
111
- # GRADIO UI
112
- # -------------------------
113
- def run_dashboard(forecast_days, hindcast_days):
114
- forecast_plot = generate_forecast(forecast_days)
115
- hindcast_plot = generate_hindcast(hindcast_days)
116
- return forecast_plot, hindcast_plot
117
-
118
 
119
  with gr.Blocks() as iface:
120
-
121
  gr.Markdown("# Air Quality Forecast Dashboard")
122
- gr.Markdown("Forecast and hindcast PM2.5 using XGBoost + Hopsworks")
123
-
124
  with gr.Row():
125
- forecast_days = gr.Slider(3, 10, value=7, step=1, label="Forecast Days (future)")
126
- hindcast_days = gr.Slider(3, 10, value=7, step=1, label="Hindcast Days (past)")
127
-
128
- generate_button = gr.Button("Generate")
129
-
130
- forecast_img = gr.Image(label="PM2.5 Forecast")
131
- hindcast_img = gr.Image(label="PM2.5 Hindcast")
132
-
133
- generate_button.click(
134
- fn=run_dashboard,
135
- inputs=[forecast_days, hindcast_days],
136
- outputs=[forecast_img, hindcast_img]
137
- )
138
 
139
  iface.launch()
 
12
  # -------------------------
13
  # HOPSWORKS LOGIN + MODEL LOAD
14
  # -------------------------
15
+ def load_resources_v1():
16
  project = hopsworks.login()
17
  fs = project.get_feature_store()
18
 
19
+ fv = fs.get_feature_view("air_quality_fv", version=1)
 
 
 
 
20
  fv.init_batch_scoring(1)
21
 
 
22
  weather_fg = fs.get_feature_group("weather", 1)
23
 
 
24
  mr = project.get_model_registry()
25
+ model_obj = mr.get_model("air_quality_xgboost_model", version=1)
26
  model_dir = model_obj.download()
27
 
28
  model = XGBRegressor()
 
31
  return model, fv, weather_fg, project
32
 
33
 
34
+ def load_resources_v2():
35
+ project = hopsworks.login()
36
+ fs = project.get_feature_store()
37
+
38
+ fv = fs.get_feature_view("air_quality_fv", version=2)
39
+ fv.init_batch_scoring(1)
40
+
41
+ weather_fg = fs.get_feature_group("weather", 1)
42
+
43
+ mr = project.get_model_registry()
44
+ model_obj = mr.get_model("air_quality_xgboost_model", version=2)
45
+ model_dir = model_obj.download()
46
+
47
+ model = XGBRegressor()
48
+ model.load_model(model_dir + "/model.json")
49
+
50
+ return model, fv, weather_fg, project
51
+
52
 
53
 
54
  # -------------------------
55
  # FORECAST LOGIC (NEXT 7 DAYS)
56
  # -------------------------
57
+ def generate_forecast_v1(days):
58
+ model, fv, weather_fg, project = load_resources_v1()
 
59
 
60
+ today = datetime.utcnow().date()
61
  df_future = weather_fg.read()
62
  df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
63
 
64
+ predictions = []
65
+ for offset in range(1, days + 1):
66
+ target_date = today + timedelta(days=offset)
67
+ row = df_future[df_future["date"] == target_date]
68
+
69
+ if len(row) == 0:
70
+ continue
71
+
72
+ X = row[["temperature_2m_mean", "precipitation_sum",
73
+ "wind_speed_10m_max", "wind_direction_10m_dominant"]]
74
+
75
+ pred = float(model.predict(X)[0])
76
+
77
+ predictions.append({"date": target_date, "predicted_pm25": pred})
78
+
79
+ df_preds = pd.DataFrame(predictions)
80
+
81
+ tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
82
+ plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path)
83
+
84
+
85
+ def generate_forecast_v2(days):
86
+ model, fv, weather_fg, project = load_resources_v2()
87
+
88
+ today = datetime.utcnow().date()
89
+ df_future = weather_fg.read().sort_values("date")
90
+ df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
91
+
92
+ # Load real PM2.5 history
93
+ aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1)
94
+ hist_pm25 = aq_fg.read().sort_values("date")
95
+ pm25_history = list(hist_pm25["pm25"].values[-3:])
96
+
97
  preds = []
98
+ for offset in range(1, days + 1):
 
 
99
 
100
+ target_date = today + timedelta(days=offset)
101
+ row = df_future[df_future["date"] == target_date]
102
  if len(row) == 0:
103
  continue
104
 
105
+ lag1, lag2, lag3 = pm25_history[-1], pm25_history[-2], pm25_history[-3]
106
+ roll_mean = np.mean(pm25_history[-3:])
107
+ roll_std = np.std(pm25_history[-3:])
108
+
109
+ X = pd.DataFrame({
110
+ "temperature_2m_mean": [row.iloc[0]["temperature_2m_mean"]],
111
+ "precipitation_sum": [row.iloc[0]["precipitation_sum"]],
112
+ "wind_speed_10m_max": [row.iloc[0]["wind_speed_10m_max"]],
113
+ "wind_direction_10m_dominant": [row.iloc[0]["wind_direction_10m_dominant"]],
114
+ "pm25_lag1": [lag1],
115
+ "pm25_lag2": [lag2],
116
+ "pm25_lag3": [lag3],
117
+ "pm25_roll3_mean": [roll_mean],
118
+ "pm25_roll3_std": [roll_std],
119
+ })
120
 
121
+ pred = float(model.predict(X)[0])
122
+ preds.append({"date": target_date, "predicted_pm25": pred})
123
 
124
+ pm25_history.append(pred)
 
125
 
126
  df_preds = pd.DataFrame(preds)
127
 
 
128
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
129
+ plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path)
130
 
131
  return tmp_path
132
 
 
133
  # -------------------------
134
  # HINDCAST LOGIC (LAST 7 DAYS)
135
  # -------------------------
136
+ def generate_hindcast_v1(days):
137
+ model, fv, weather_fg, project = load_resources_v1()
138
+
139
+ features_df, labels_df = fv.training_data(
140
  start_time=datetime.utcnow().date() - timedelta(days=days),
141
  end_time=datetime.utcnow().date(),
142
  statistics_config=False
143
  )
144
 
145
  features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
 
146
 
147
+ X = features_df[["temperature_2m_mean", "precipitation_sum",
148
+ "wind_speed_10m_max", "wind_direction_10m_dominant"]]
149
 
150
+ features_df["predicted_pm25"] = model.predict(X)
151
+
152
+ tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
153
+ plot_air_quality_forecast("linkoping", "hamngatan-10", features_df, tmp_path, hindcast=True)
154
+
155
+ return tmp_path
156
+
157
+
158
+ def generate_hindcast_v2(days):
159
+ model, fv, weather_fg, project = load_resources_v2()
160
+
161
+ features_df, labels_df = fv.training_data(
162
+ start_time=datetime.utcnow().date() - timedelta(days=days),
163
+ end_time=datetime.utcnow().date(),
164
+ statistics_config=False
165
  )
166
 
167
+ features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
168
+
169
+ # Full lag-feature dataset already inside feature view v2
170
+ X = features_df.drop(columns=["date"])
171
+
172
+ features_df["predicted_pm25"] = model.predict(X)
173
 
 
174
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
175
+ plot_air_quality_forecast("linkoping", "hamngatan-10", features_df, tmp_path, hindcast=True)
176
 
177
  return tmp_path
178
 
179
 
 
 
 
 
 
 
 
 
180
 
181
  with gr.Blocks() as iface:
182
+
183
  gr.Markdown("# Air Quality Forecast Dashboard")
184
+ gr.Markdown("Choose between Model v1 (no lags) and Model v2 (lag-aware).")
185
+
186
  with gr.Row():
187
+ forecast_days_v1 = gr.Slider(3, 10, value=7, label="Forecast Days (v1)")
188
+ forecast_days_v2 = gr.Slider(3, 10, value=7, label="Forecast Days (v2)")
189
+
190
+ with gr.Row():
191
+ btn_v1 = gr.Button("Generate Forecast (Model v1)")
192
+ btn_v2 = gr.Button("Generate Forecast (Model v2)")
193
+
194
+ out_v1 = gr.Image(label="Model v1 Forecast")
195
+ out_v2 = gr.Image(label="Model v2 Forecast")
196
+
197
+ btn_v1.click(fn=generate_forecast_v1, inputs=forecast_days_v1, outputs=out_v1)
198
+ btn_v2.click(fn=generate_forecast_v2, inputs=forecast_days_v2, outputs=out_v2)
 
199
 
200
  iface.launch()