Jompatron commited on
Commit
2c1b19a
·
1 Parent(s): 6f5a567
Files changed (2) hide show
  1. airquality/util.py +1 -0
  2. app.py +69 -16
airquality/util.py CHANGED
@@ -14,6 +14,7 @@ from retry_requests import retry
14
  import hopsworks
15
  import hsfs
16
  from pathlib import Path
 
17
 
18
  def get_historical_weather(city, start_date, end_date, latitude, longitude):
19
  # latitude, longitude = get_city_coordinates(city)
 
14
  import hopsworks
15
  import hsfs
16
  from pathlib import Path
17
+ import numpy as np
18
 
19
  def get_historical_weather(city, start_date, end_date, latitude, longitude):
20
  # latitude, longitude = get_city_coordinates(city)
app.py CHANGED
@@ -142,21 +142,35 @@ def generate_forecast_v2(days):
142
  def generate_hindcast_v1(days):
143
  model, fv, weather_fg, project = resources_v1
144
 
 
 
 
 
145
  features_df, labels_df = fv.training_data(
146
- start_time=datetime.utcnow().date() - timedelta(days=days),
147
- end_time=datetime.utcnow().date(),
148
  statistics_config=False
149
  )
150
 
151
  features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
152
 
153
- X = features_df[["temperature_2m_mean", "precipitation_sum",
154
- "wind_speed_10m_max", "wind_direction_10m_dominant"]]
 
 
 
 
 
155
 
156
  features_df["predicted_pm25"] = model.predict(X)
157
 
158
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
159
- plot_air_quality_forecast("linkoping", "hamngatan-10", features_df, tmp_path, hindcast=True)
 
 
 
 
 
160
 
161
  return tmp_path
162
 
@@ -164,21 +178,60 @@ def generate_hindcast_v1(days):
164
  def generate_hindcast_v2(days):
165
  model, fv, weather_fg, project = resources_v2
166
 
167
- features_df, labels_df = fv.training_data(
168
- start_time=datetime.utcnow().date() - timedelta(days=days),
169
- end_time=datetime.utcnow().date(),
170
- statistics_config=False
171
- )
172
-
173
- features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
174
 
175
- # Full lag-feature dataset already inside feature view v2
176
- X = features_df.drop(columns=["date"])
 
 
 
 
177
 
178
- features_df["predicted_pm25"] = model.predict(X)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
181
- plot_air_quality_forecast("linkoping", "hamngatan-10", features_df, tmp_path, hindcast=True)
 
 
 
 
 
182
 
183
  return tmp_path
184
 
 
142
  def generate_hindcast_v1(days):
143
  model, fv, weather_fg, project = resources_v1
144
 
145
+ start_date = datetime.utcnow().date() - timedelta(days=days)
146
+ end_date = datetime.utcnow().date()
147
+
148
+ # Load training data from Feature View
149
  features_df, labels_df = fv.training_data(
150
+ start_time=start_date,
151
+ end_time=end_date,
152
  statistics_config=False
153
  )
154
 
155
  features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
156
 
157
+ # The model for v1 ONLY uses weather features
158
+ X = features_df[[
159
+ "temperature_2m_mean",
160
+ "precipitation_sum",
161
+ "wind_speed_10m_max",
162
+ "wind_direction_10m_dominant"
163
+ ]]
164
 
165
  features_df["predicted_pm25"] = model.predict(X)
166
 
167
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
168
+ plot_air_quality_forecast(
169
+ "linkoping", "hamngatan-10",
170
+ features_df,
171
+ tmp_path,
172
+ hindcast=True
173
+ )
174
 
175
  return tmp_path
176
 
 
178
  def generate_hindcast_v2(days):
179
  model, fv, weather_fg, project = resources_v2
180
 
181
+ # Time window
182
+ start_date = datetime.utcnow().date() - timedelta(days=days + 3)
183
+ end_date = datetime.utcnow().date()
 
 
 
 
184
 
185
+ # Load weather history
186
+ weather_df = weather_fg.read()
187
+ weather_df["date"] = pd.to_datetime(weather_df["date"]).dt.date
188
+ weather_df = weather_df[(weather_df["date"] >= start_date) &
189
+ (weather_df["date"] <= end_date)]
190
+ weather_df = weather_df.sort_values("date")
191
 
192
+ # Load PM2.5 history (actual values, not predictions)
193
+ aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1)
194
+ aq_df = aq_fg.read()
195
+ aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date
196
+ aq_df = aq_df[(aq_df["date"] >= start_date) &
197
+ (aq_df["date"] <= end_date)]
198
+ aq_df = aq_df.sort_values("date")
199
+
200
+ # Merge actual historical PM2.5 + weather
201
+ df = pd.merge(weather_df, aq_df, on="date")
202
+
203
+ # Build lag features
204
+ df["pm25_lag1"] = df["pm25"].shift(1)
205
+ df["pm25_lag2"] = df["pm25"].shift(2)
206
+ df["pm25_lag3"] = df["pm25"].shift(3)
207
+ df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean()
208
+ df["pm25_roll3_std"] = df["pm25"].rolling(3).std()
209
+
210
+ # Only keep the last N days (where all lags exist)
211
+ df = df.dropna().tail(days)
212
+
213
+ # Features for model v2
214
+ X = df[[
215
+ "temperature_2m_mean",
216
+ "precipitation_sum",
217
+ "wind_speed_10m_max",
218
+ "wind_direction_10m_dominant",
219
+ "pm25_lag1",
220
+ "pm25_lag2",
221
+ "pm25_lag3",
222
+ "pm25_roll3_mean",
223
+ "pm25_roll3_std",
224
+ ]]
225
+
226
+ df["predicted_pm25"] = model.predict(X)
227
 
228
  tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
229
+ plot_air_quality_forecast(
230
+ "linkoping", "hamngatan-10",
231
+ df,
232
+ tmp_path,
233
+ hindcast=True
234
+ )
235
 
236
  return tmp_path
237