Jompatron commited on
Commit
8f77f9f
·
1 Parent(s): 334d234
Files changed (1) hide show
  1. app.py +104 -79
app.py CHANGED
@@ -1,106 +1,131 @@
1
  import gradio as gr
2
- import hopsworks
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
- import os
 
6
  from xgboost import XGBRegressor
 
7
 
8
- # IMPORTANT: HuggingFace builds need non-interactive backend
9
- import matplotlib
10
- matplotlib.use("Agg")
11
 
12
- FEATURE_COLUMNS = [
13
- "temperature_2m_mean",
14
- "precipitation_sum",
15
- "wind_speed_10m_max",
16
- "wind_direction_10m_dominant"
17
- ]
18
 
 
 
 
19
  def load_resources():
20
- """Connect to Hopsworks, load model + feature view."""
21
  project = hopsworks.login()
22
-
23
  fs = project.get_feature_store()
 
 
 
 
 
 
 
 
 
 
 
 
24
  mr = project.get_model_registry()
 
 
25
 
26
- # Load model
27
- model_meta = mr.get_model("air_quality_xgboost_model", version=1)
28
- model_dir = model_meta.download()
29
  model = XGBRegressor()
30
  model.load_model(model_dir + "/model.json")
31
 
32
- # Load feature view
33
- fv = fs.get_feature_view("air_quality_fv", version=1)
34
 
35
- return model, fv
36
 
37
- # Load on startup
38
- model, feature_view = load_resources()
39
 
 
 
 
 
40
  def generate_forecast():
41
- """Fetch latest feature data, generate PM25 forecast plot."""
42
- df = feature_view.get_batch_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Convert timestamp to datetime
45
- df["date"] = pd.to_datetime(df["date"], unit="us")
46
 
47
- # Predict PM2.5
48
- df["predicted_pm25"] = model.predict(df[FEATURE_COLUMNS])
49
 
50
- # Plot forecast
51
- plt.figure(figsize=(10, 4))
52
- plt.plot(df["date"], df["predicted_pm25"], marker="o")
53
- plt.title("PM2.5 Forecast (Next Days)")
54
- plt.xlabel("Date")
55
- plt.ylabel("Predicted PM2.5")
56
- plt.grid(True)
57
- plt.tight_layout()
58
- plt.savefig("forecast.png")
59
- plt.close()
60
 
61
- return "forecast.png"
62
 
 
 
 
 
63
  def generate_hindcast():
64
- """Generate hindcast accuracy plot (past days)."""
65
- df = feature_view.get_batch_data()
66
- df["date"] = pd.to_datetime(df["date"], unit="us")
67
- df["predicted_pm25"] = model.predict(df[FEATURE_COLUMNS])
68
-
69
- # For hindcast: show difference between predicted & actual (most recent available data)
70
- # NOTE: Your data may not include true pm25 for recent dates;
71
- # we'll plot model signal only.
72
-
73
- plt.figure(figsize=(10, 4))
74
- plt.plot(df["date"], df["predicted_pm25"], label="Predicted", marker="o")
75
- plt.title("PM2.5 Hindcast (Recent Days)")
76
- plt.xlabel("Date")
77
- plt.ylabel("PM2.5")
78
- plt.grid(True)
79
- plt.legend()
80
- plt.tight_layout()
81
- plt.savefig("hindcast.png")
82
- plt.close()
83
-
84
- return "hindcast.png"
85
-
86
-
87
- def run_dashboard():
88
- forecast_img = generate_forecast()
89
- hindcast_img = generate_hindcast()
90
- return forecast_img, hindcast_img
91
-
92
-
93
- with gr.Blocks() as demo:
94
- gr.Markdown("# 🌤️ PM2.5 Air Quality Dashboard")
95
- gr.Markdown("Powered by Hopsworks Feature Store + XGBoost Model")
96
-
97
- btn = gr.Button("Generate Forecast")
98
- output_forecast = gr.Image(label="Forecast (Next Days)")
99
- output_hindcast = gr.Image(label="Hindcast (Past Days)")
100
-
101
- btn.click(
102
- run_dashboard,
103
- outputs=[output_forecast, output_hindcast]
104
  )
105
 
106
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ import tempfile
5
+ import hopsworks
6
  from xgboost import XGBRegressor
7
+ from datetime import datetime, timedelta
8
 
9
+ from airquality.util import plot_air_quality_forecast
 
 
10
 
 
 
 
 
 
 
11
 
12
+ # -------------------------
13
+ # HOPSWORKS LOGIN + MODEL LOAD
14
+ # -------------------------
15
  def load_resources():
 
16
  project = hopsworks.login()
 
17
  fs = project.get_feature_store()
18
+
19
+ # Load Feature View
20
+ fv = fs.get_feature_view(
21
+ name="air_quality_fv",
22
+ version=1
23
+ )
24
+ fv.init_batch_scoring(1)
25
+
26
+ # Load Weather Feature Group (for future predictions)
27
+ weather_fg = fs.get_feature_group("weather", 1)
28
+
29
+ # Load Model from Registry
30
  mr = project.get_model_registry()
31
+ model_obj = mr.get_model("air_quality_xgboost_model", version=1)
32
+ model_dir = model_obj.download()
33
 
 
 
 
34
  model = XGBRegressor()
35
  model.load_model(model_dir + "/model.json")
36
 
37
+ return model, fv, weather_fg, project
 
38
 
 
39
 
40
+ model, feature_view, weather_fg, project = load_resources()
 
41
 
42
+
43
+ # -------------------------
44
+ # FORECAST LOGIC (NEXT 7 DAYS)
45
+ # -------------------------
46
  def generate_forecast():
47
+ today = datetime.utcnow().date()
48
+ future_dates = [(today + timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, 8)]
49
+
50
+ df_future = weather_fg.read()
51
+ df_future["date"] = pd.to_datetime(df_future["date"]).dt.date
52
+
53
+ preds = []
54
+ for d in future_dates:
55
+ dt = datetime.strptime(d, "%Y-%m-%d").date()
56
+ row = df_future[df_future["date"] == dt]
57
+
58
+ if len(row) == 0:
59
+ continue
60
+
61
+ input_features = row.drop(columns=["date", "city"])
62
+ pm25_pred = model.predict(input_features)[0]
63
+
64
+ preds.append({"date": d, "predicted_pm25": pm25_pred})
65
 
66
+ if len(preds) == 0:
67
+ return None
68
 
69
+ df_preds = pd.DataFrame(preds)
 
70
 
71
+ # Save plot
72
+ tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
73
+ plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path, hindcast=False)
 
 
 
 
 
 
 
74
 
75
+ return tmp_path
76
 
77
+
78
+ # -------------------------
79
+ # HINDCAST LOGIC (LAST 7 DAYS)
80
+ # -------------------------
81
  def generate_hindcast():
82
+ # Read actual + predicted from Feature View
83
+ features_df, labels_df = feature_view.training_data(
84
+ start_time=datetime.utcnow().date() - timedelta(days=7),
85
+ end_time=datetime.utcnow().date(),
86
+ statistics_config=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  )
88
 
89
+ features_df["date"] = pd.to_datetime(features_df["date"]).dt.date
90
+ labels_df["pm25"] = labels_df["pm25"]
91
+
92
+ df = features_df.copy()
93
+ df["pm25"] = labels_df["pm25"]
94
+
95
+ # Predict using model
96
+ df["predicted_pm25"] = model.predict(
97
+ df[["temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant"]]
98
+ )
99
+
100
+ # Keep last 7 days
101
+ df = df.sort_values("date").tail(7)
102
+
103
+ # Save plot
104
+ tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
105
+ plot_air_quality_forecast("linkoping", "hamngatan-10", df, tmp_path, hindcast=True)
106
+
107
+ return tmp_path
108
+
109
+
110
+ # -------------------------
111
+ # GRADIO UI
112
+ # -------------------------
113
+ def run_dashboard(_):
114
+ forecast_plot = generate_forecast()
115
+ hindcast_plot = generate_hindcast()
116
+
117
+ return forecast_plot, hindcast_plot
118
+
119
+
120
+ iface = gr.Interface(
121
+ fn=run_dashboard,
122
+ inputs=gr.Button("Generate Dashboard"),
123
+ outputs=[
124
+ gr.Image(label="PM2.5 Forecast (Next 7 Days)"),
125
+ gr.Image(label="PM2.5 Hindcast (Past 7 Days)")
126
+ ],
127
+ title="Air Quality Forecast Dashboard",
128
+ description="Forecast and Hindcast PM2.5 for Linköping using XGBoost + Hopsworks",
129
+ )
130
+
131
+ iface.launch()