Spaces:
Sleeping
Sleeping
Jompatron
commited on
Commit
·
2c1b19a
1
Parent(s):
6f5a567
hindcast
Browse files- airquality/util.py +1 -0
- app.py +69 -16
airquality/util.py
CHANGED
|
@@ -14,6 +14,7 @@ from retry_requests import retry
|
|
| 14 |
import hopsworks
|
| 15 |
import hsfs
|
| 16 |
from pathlib import Path
|
|
|
|
| 17 |
|
| 18 |
def get_historical_weather(city, start_date, end_date, latitude, longitude):
|
| 19 |
# latitude, longitude = get_city_coordinates(city)
|
|
|
|
| 14 |
import hopsworks
|
| 15 |
import hsfs
|
| 16 |
from pathlib import Path
|
| 17 |
+
import numpy as np
|
| 18 |
|
| 19 |
def get_historical_weather(city, start_date, end_date, latitude, longitude):
|
| 20 |
# latitude, longitude = get_city_coordinates(city)
|
app.py
CHANGED
|
@@ -142,21 +142,35 @@ def generate_forecast_v2(days):
|
|
| 142 |
def generate_hindcast_v1(days):
|
| 143 |
model, fv, weather_fg, project = resources_v1
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
features_df, labels_df = fv.training_data(
|
| 146 |
-
start_time=
|
| 147 |
-
end_time=
|
| 148 |
statistics_config=False
|
| 149 |
)
|
| 150 |
|
| 151 |
features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
features_df["predicted_pm25"] = model.predict(X)
|
| 157 |
|
| 158 |
tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
|
| 159 |
-
plot_air_quality_forecast(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
return tmp_path
|
| 162 |
|
|
@@ -164,21 +178,60 @@ def generate_hindcast_v1(days):
|
|
| 164 |
def generate_hindcast_v2(days):
|
| 165 |
model, fv, weather_fg, project = resources_v2
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
statistics_config=False
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
|
| 174 |
|
| 175 |
-
#
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
|
| 181 |
-
plot_air_quality_forecast(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
return tmp_path
|
| 184 |
|
|
|
|
| 142 |
def generate_hindcast_v1(days):
|
| 143 |
model, fv, weather_fg, project = resources_v1
|
| 144 |
|
| 145 |
+
start_date = datetime.utcnow().date() - timedelta(days=days)
|
| 146 |
+
end_date = datetime.utcnow().date()
|
| 147 |
+
|
| 148 |
+
# Load training data from Feature View
|
| 149 |
features_df, labels_df = fv.training_data(
|
| 150 |
+
start_time=start_date,
|
| 151 |
+
end_time=end_date,
|
| 152 |
statistics_config=False
|
| 153 |
)
|
| 154 |
|
| 155 |
features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
|
| 156 |
|
| 157 |
+
# The model for v1 ONLY uses weather features
|
| 158 |
+
X = features_df[[
|
| 159 |
+
"temperature_2m_mean",
|
| 160 |
+
"precipitation_sum",
|
| 161 |
+
"wind_speed_10m_max",
|
| 162 |
+
"wind_direction_10m_dominant"
|
| 163 |
+
]]
|
| 164 |
|
| 165 |
features_df["predicted_pm25"] = model.predict(X)
|
| 166 |
|
| 167 |
tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
|
| 168 |
+
plot_air_quality_forecast(
|
| 169 |
+
"linkoping", "hamngatan-10",
|
| 170 |
+
features_df,
|
| 171 |
+
tmp_path,
|
| 172 |
+
hindcast=True
|
| 173 |
+
)
|
| 174 |
|
| 175 |
return tmp_path
|
| 176 |
|
|
|
|
| 178 |
def generate_hindcast_v2(days):
|
| 179 |
model, fv, weather_fg, project = resources_v2
|
| 180 |
|
| 181 |
+
# Time window
|
| 182 |
+
start_date = datetime.utcnow().date() - timedelta(days=days + 3)
|
| 183 |
+
end_date = datetime.utcnow().date()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
# Load weather history
|
| 186 |
+
weather_df = weather_fg.read()
|
| 187 |
+
weather_df["date"] = pd.to_datetime(weather_df["date"]).dt.date
|
| 188 |
+
weather_df = weather_df[(weather_df["date"] >= start_date) &
|
| 189 |
+
(weather_df["date"] <= end_date)]
|
| 190 |
+
weather_df = weather_df.sort_values("date")
|
| 191 |
|
| 192 |
+
# Load PM2.5 history (actual values, not predictions)
|
| 193 |
+
aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1)
|
| 194 |
+
aq_df = aq_fg.read()
|
| 195 |
+
aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date
|
| 196 |
+
aq_df = aq_df[(aq_df["date"] >= start_date) &
|
| 197 |
+
(aq_df["date"] <= end_date)]
|
| 198 |
+
aq_df = aq_df.sort_values("date")
|
| 199 |
+
|
| 200 |
+
# Merge actual historical PM2.5 + weather
|
| 201 |
+
df = pd.merge(weather_df, aq_df, on="date")
|
| 202 |
+
|
| 203 |
+
# Build lag features
|
| 204 |
+
df["pm25_lag1"] = df["pm25"].shift(1)
|
| 205 |
+
df["pm25_lag2"] = df["pm25"].shift(2)
|
| 206 |
+
df["pm25_lag3"] = df["pm25"].shift(3)
|
| 207 |
+
df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean()
|
| 208 |
+
df["pm25_roll3_std"] = df["pm25"].rolling(3).std()
|
| 209 |
+
|
| 210 |
+
# Only keep the last N days (where all lags exist)
|
| 211 |
+
df = df.dropna().tail(days)
|
| 212 |
+
|
| 213 |
+
# Features for model v2
|
| 214 |
+
X = df[[
|
| 215 |
+
"temperature_2m_mean",
|
| 216 |
+
"precipitation_sum",
|
| 217 |
+
"wind_speed_10m_max",
|
| 218 |
+
"wind_direction_10m_dominant",
|
| 219 |
+
"pm25_lag1",
|
| 220 |
+
"pm25_lag2",
|
| 221 |
+
"pm25_lag3",
|
| 222 |
+
"pm25_roll3_mean",
|
| 223 |
+
"pm25_roll3_std",
|
| 224 |
+
]]
|
| 225 |
+
|
| 226 |
+
df["predicted_pm25"] = model.predict(X)
|
| 227 |
|
| 228 |
tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
|
| 229 |
+
plot_air_quality_forecast(
|
| 230 |
+
"linkoping", "hamngatan-10",
|
| 231 |
+
df,
|
| 232 |
+
tmp_path,
|
| 233 |
+
hindcast=True
|
| 234 |
+
)
|
| 235 |
|
| 236 |
return tmp_path
|
| 237 |
|