import gradio as gr import pandas as pd import matplotlib.pyplot as plt import tempfile import hopsworks from xgboost import XGBRegressor from datetime import datetime, timedelta import numpy as np from airquality.util import plot_air_quality_forecast # ------------------------- # HOPSWORKS LOGIN + MODEL LOAD # ------------------------- def load_resources_v1(): project = hopsworks.login() fs = project.get_feature_store() fv = fs.get_feature_view("air_quality_fv", version=1) fv.init_batch_scoring(1) weather_fg = fs.get_feature_group("weather", 1) mr = project.get_model_registry() model_obj = mr.get_model("air_quality_xgboost_model", version=1) model_dir = model_obj.download() model = XGBRegressor() model.load_model(model_dir + "/model.json") return model, fv, weather_fg, project def load_resources_v2(): project = hopsworks.login() fs = project.get_feature_store() fv = fs.get_feature_view("air_quality_fv", version=2) fv.init_batch_scoring(1) weather_fg = fs.get_feature_group("weather", 1) mr = project.get_model_registry() model_obj = mr.get_model("air_quality_xgboost_model", version=2) model_dir = model_obj.download() model = XGBRegressor() model.load_model(model_dir + "/model.json") return model, fv, weather_fg, project resources_v1 = load_resources_v1() resources_v2 = load_resources_v2() # ------------------------- # FORECAST LOGIC (NEXT 7 DAYS) # ------------------------- def generate_forecast_v1(days): model, fv, weather_fg, project = resources_v1 today = datetime.utcnow().date() df_future = weather_fg.read() df_future["date"] = pd.to_datetime(df_future["date"]).dt.date predictions = [] for offset in range(1, days + 1): target_date = today + timedelta(days=offset) row = df_future[df_future["date"] == target_date] if len(row) == 0: continue X = row[["temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant"]] pred = float(model.predict(X)[0]) predictions.append({"date": target_date, "predicted_pm25": pred}) df_preds = pd.DataFrame(predictions) tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path) return tmp_path def generate_forecast_v2(days): model, fv, weather_fg, project = resources_v2 today = datetime.utcnow().date() df_future = weather_fg.read().sort_values("date") df_future["date"] = pd.to_datetime(df_future["date"]).dt.date # Load real PM2.5 history aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) hist_pm25 = aq_fg.read().sort_values("date") pm25_history = list(hist_pm25["pm25"].values[-3:]) preds = [] for offset in range(1, days + 1): target_date = today + timedelta(days=offset) row = df_future[df_future["date"] == target_date] if len(row) == 0: continue lag1, lag2, lag3 = pm25_history[-1], pm25_history[-2], pm25_history[-3] roll_mean = np.mean(pm25_history[-3:]) roll_std = np.std(pm25_history[-3:]) X = pd.DataFrame({ "temperature_2m_mean": [row.iloc[0]["temperature_2m_mean"]], "precipitation_sum": [row.iloc[0]["precipitation_sum"]], "wind_speed_10m_max": [row.iloc[0]["wind_speed_10m_max"]], "wind_direction_10m_dominant": [row.iloc[0]["wind_direction_10m_dominant"]], "pm25_lag1": [lag1], "pm25_lag2": [lag2], "pm25_lag3": [lag3], "pm25_roll3_mean": [roll_mean], "pm25_roll3_std": [roll_std], }) pred = float(model.predict(X)[0]) preds.append({"date": target_date, "predicted_pm25": pred}) pm25_history.append(pred) df_preds = pd.DataFrame(preds) tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path) return tmp_path # ------------------------- # HINDCAST LOGIC (LAST 7 DAYS) # ------------------------- def generate_hindcast_v1(days): model, fv, weather_fg, project = resources_v1 start_date = datetime.utcnow().date() - timedelta(days=days) end_date = datetime.utcnow().date() # 1. Read weather + feature view data (for prediction) features_df, labels_df = fv.training_data( start_time=start_date, end_time=end_date, statistics_config=False ) features_df["date"] = pd.to_datetime(features_df["date"]).dt.date # 2. Load ACTUAL PM2.5 values for the same time range aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) aq_df = aq_fg.read() aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date # Reduce to matching period aq_df = aq_df[(aq_df["date"] >= start_date) & (aq_df["date"] <= end_date)] # 3. Merge actual pm25 onto features_df merged = pd.merge(features_df, aq_df[["date", "pm25"]], on="date", how="inner") # 4. Predict using v1 model X = merged[[ "temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant" ]] merged["predicted_pm25"] = model.predict(X) # 5. Plot tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name plot_air_quality_forecast( "linkoping", "hamngatan-10", merged, tmp_path, hindcast=True ) return tmp_path def generate_hindcast_v2(days): model, fv, weather_fg, project = resources_v2 # Time window start_date = datetime.utcnow().date() - timedelta(days=days + 3) end_date = datetime.utcnow().date() # Load weather history weather_df = weather_fg.read() weather_df["date"] = pd.to_datetime(weather_df["date"]).dt.date weather_df = weather_df[(weather_df["date"] >= start_date) & (weather_df["date"] <= end_date)] weather_df = weather_df.sort_values("date") # Load PM2.5 history (actual values, not predictions) aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) aq_df = aq_fg.read() aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date aq_df = aq_df[(aq_df["date"] >= start_date) & (aq_df["date"] <= end_date)] aq_df = aq_df.sort_values("date") # Merge actual historical PM2.5 + weather df = pd.merge(weather_df, aq_df, on="date") # Build lag features df["pm25_lag1"] = df["pm25"].shift(1) df["pm25_lag2"] = df["pm25"].shift(2) df["pm25_lag3"] = df["pm25"].shift(3) df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean() df["pm25_roll3_std"] = df["pm25"].rolling(3).std() # Only keep the last N days (where all lags exist) df = df.dropna().tail(days) # Features for model v2 X = df[[ "temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant", "pm25_lag1", "pm25_lag2", "pm25_lag3", "pm25_roll3_mean", "pm25_roll3_std", ]] df["predicted_pm25"] = model.predict(X) tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name plot_air_quality_forecast( "linkoping", "hamngatan-10", df, tmp_path, hindcast=True ) return tmp_path with gr.Blocks() as iface: gr.Markdown("# Air Quality Dashboard (Model v1 & Model v2)") with gr.Row(): gr.Markdown("### **Model v1 (No lag features)**") gr.Markdown("### **Model v2 (Lag-aware)**") with gr.Row(): days_v1_f = gr.Slider(3, 10, value=7, step=1, label="Forecast Days (v1)") days_v2_f = gr.Slider(3, 10, value=7, step=1, label="Forecast Days (v2)") with gr.Row(): btn_v1_f = gr.Button("Generate Forecast (v1)") btn_v2_f = gr.Button("Generate Forecast (v2)") out_v1_f = gr.Image() out_v2_f = gr.Image() btn_v1_f.click(generate_forecast_v1, inputs=days_v1_f, outputs=out_v1_f) btn_v2_f.click(generate_forecast_v2, inputs=days_v2_f, outputs=out_v2_f) # HINDCAST with gr.Row(): days_v1_h = gr.Slider(3, 10, value=7, step=1, label="Hindcast Days (v1)") days_v2_h = gr.Slider(3, 10, value=7, step=1, label="Hindcast Days (v2)") with gr.Row(): btn_v1_h = gr.Button("Generate Hindcast (v1)") btn_v2_h = gr.Button("Generate Hindcast (v2)") out_v1_h = gr.Image() out_v2_h = gr.Image() btn_v1_h.click(generate_hindcast_v1, inputs=days_v1_h, outputs=out_v1_h) btn_v2_h.click(generate_hindcast_v2, inputs=days_v2_h, outputs=out_v2_h) iface.launch()