Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import tempfile | |
| import hopsworks | |
| from xgboost import XGBRegressor | |
| from datetime import datetime, timedelta | |
| import numpy as np | |
| from airquality.util import plot_air_quality_forecast | |
| # ------------------------- | |
| # HOPSWORKS LOGIN + MODEL LOAD | |
| # ------------------------- | |
| def load_resources_v1(): | |
| project = hopsworks.login() | |
| fs = project.get_feature_store() | |
| fv = fs.get_feature_view("air_quality_fv", version=1) | |
| fv.init_batch_scoring(1) | |
| weather_fg = fs.get_feature_group("weather", 1) | |
| mr = project.get_model_registry() | |
| model_obj = mr.get_model("air_quality_xgboost_model", version=1) | |
| model_dir = model_obj.download() | |
| model = XGBRegressor() | |
| model.load_model(model_dir + "/model.json") | |
| return model, fv, weather_fg, project | |
| def load_resources_v2(): | |
| project = hopsworks.login() | |
| fs = project.get_feature_store() | |
| fv = fs.get_feature_view("air_quality_fv", version=2) | |
| fv.init_batch_scoring(1) | |
| weather_fg = fs.get_feature_group("weather", 1) | |
| mr = project.get_model_registry() | |
| model_obj = mr.get_model("air_quality_xgboost_model", version=2) | |
| model_dir = model_obj.download() | |
| model = XGBRegressor() | |
| model.load_model(model_dir + "/model.json") | |
| return model, fv, weather_fg, project | |
| resources_v1 = load_resources_v1() | |
| resources_v2 = load_resources_v2() | |
| # ------------------------- | |
| # FORECAST LOGIC (NEXT 7 DAYS) | |
| # ------------------------- | |
| def generate_forecast_v1(days): | |
| model, fv, weather_fg, project = resources_v1 | |
| today = datetime.utcnow().date() | |
| df_future = weather_fg.read() | |
| df_future["date"] = pd.to_datetime(df_future["date"]).dt.date | |
| predictions = [] | |
| for offset in range(1, days + 1): | |
| target_date = today + timedelta(days=offset) | |
| row = df_future[df_future["date"] == target_date] | |
| if len(row) == 0: | |
| continue | |
| X = row[["temperature_2m_mean", "precipitation_sum", | |
| "wind_speed_10m_max", "wind_direction_10m_dominant"]] | |
| pred = float(model.predict(X)[0]) | |
| predictions.append({"date": target_date, "predicted_pm25": pred}) | |
| df_preds = pd.DataFrame(predictions) | |
| tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path) | |
| return tmp_path | |
| def generate_forecast_v2(days): | |
| model, fv, weather_fg, project = resources_v2 | |
| today = datetime.utcnow().date() | |
| df_future = weather_fg.read().sort_values("date") | |
| df_future["date"] = pd.to_datetime(df_future["date"]).dt.date | |
| # Load real PM2.5 history | |
| aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) | |
| hist_pm25 = aq_fg.read().sort_values("date") | |
| pm25_history = list(hist_pm25["pm25"].values[-3:]) | |
| preds = [] | |
| for offset in range(1, days + 1): | |
| target_date = today + timedelta(days=offset) | |
| row = df_future[df_future["date"] == target_date] | |
| if len(row) == 0: | |
| continue | |
| lag1, lag2, lag3 = pm25_history[-1], pm25_history[-2], pm25_history[-3] | |
| roll_mean = np.mean(pm25_history[-3:]) | |
| roll_std = np.std(pm25_history[-3:]) | |
| X = pd.DataFrame({ | |
| "temperature_2m_mean": [row.iloc[0]["temperature_2m_mean"]], | |
| "precipitation_sum": [row.iloc[0]["precipitation_sum"]], | |
| "wind_speed_10m_max": [row.iloc[0]["wind_speed_10m_max"]], | |
| "wind_direction_10m_dominant": [row.iloc[0]["wind_direction_10m_dominant"]], | |
| "pm25_lag1": [lag1], | |
| "pm25_lag2": [lag2], | |
| "pm25_lag3": [lag3], | |
| "pm25_roll3_mean": [roll_mean], | |
| "pm25_roll3_std": [roll_std], | |
| }) | |
| pred = float(model.predict(X)[0]) | |
| preds.append({"date": target_date, "predicted_pm25": pred}) | |
| pm25_history.append(pred) | |
| df_preds = pd.DataFrame(preds) | |
| tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| plot_air_quality_forecast("linkoping", "hamngatan-10", df_preds, tmp_path) | |
| return tmp_path | |
| # ------------------------- | |
| # HINDCAST LOGIC (LAST 7 DAYS) | |
| # ------------------------- | |
| def generate_hindcast_v1(days): | |
| model, fv, weather_fg, project = resources_v1 | |
| start_date = datetime.utcnow().date() - timedelta(days=days) | |
| end_date = datetime.utcnow().date() | |
| # 1. Read weather + feature view data (for prediction) | |
| features_df, labels_df = fv.training_data( | |
| start_time=start_date, | |
| end_time=end_date, | |
| statistics_config=False | |
| ) | |
| features_df["date"] = pd.to_datetime(features_df["date"]).dt.date | |
| # 2. Load ACTUAL PM2.5 values for the same time range | |
| aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) | |
| aq_df = aq_fg.read() | |
| aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date | |
| # Reduce to matching period | |
| aq_df = aq_df[(aq_df["date"] >= start_date) & (aq_df["date"] <= end_date)] | |
| # 3. Merge actual pm25 onto features_df | |
| merged = pd.merge(features_df, aq_df[["date", "pm25"]], on="date", how="inner") | |
| # 4. Predict using v1 model | |
| X = merged[[ | |
| "temperature_2m_mean", | |
| "precipitation_sum", | |
| "wind_speed_10m_max", | |
| "wind_direction_10m_dominant" | |
| ]] | |
| merged["predicted_pm25"] = model.predict(X) | |
| # 5. Plot | |
| tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| plot_air_quality_forecast( | |
| "linkoping", "hamngatan-10", | |
| merged, | |
| tmp_path, | |
| hindcast=True | |
| ) | |
| return tmp_path | |
| def generate_hindcast_v2(days): | |
| model, fv, weather_fg, project = resources_v2 | |
| # Time window | |
| start_date = datetime.utcnow().date() - timedelta(days=days + 3) | |
| end_date = datetime.utcnow().date() | |
| # Load weather history | |
| weather_df = weather_fg.read() | |
| weather_df["date"] = pd.to_datetime(weather_df["date"]).dt.date | |
| weather_df = weather_df[(weather_df["date"] >= start_date) & | |
| (weather_df["date"] <= end_date)] | |
| weather_df = weather_df.sort_values("date") | |
| # Load PM2.5 history (actual values, not predictions) | |
| aq_fg = project.get_feature_store().get_feature_group("air_quality", version=1) | |
| aq_df = aq_fg.read() | |
| aq_df["date"] = pd.to_datetime(aq_df["date"]).dt.date | |
| aq_df = aq_df[(aq_df["date"] >= start_date) & | |
| (aq_df["date"] <= end_date)] | |
| aq_df = aq_df.sort_values("date") | |
| # Merge actual historical PM2.5 + weather | |
| df = pd.merge(weather_df, aq_df, on="date") | |
| # Build lag features | |
| df["pm25_lag1"] = df["pm25"].shift(1) | |
| df["pm25_lag2"] = df["pm25"].shift(2) | |
| df["pm25_lag3"] = df["pm25"].shift(3) | |
| df["pm25_roll3_mean"] = df["pm25"].rolling(3).mean() | |
| df["pm25_roll3_std"] = df["pm25"].rolling(3).std() | |
| # Only keep the last N days (where all lags exist) | |
| df = df.dropna().tail(days) | |
| # Features for model v2 | |
| X = df[[ | |
| "temperature_2m_mean", | |
| "precipitation_sum", | |
| "wind_speed_10m_max", | |
| "wind_direction_10m_dominant", | |
| "pm25_lag1", | |
| "pm25_lag2", | |
| "pm25_lag3", | |
| "pm25_roll3_mean", | |
| "pm25_roll3_std", | |
| ]] | |
| df["predicted_pm25"] = model.predict(X) | |
| tmp_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name | |
| plot_air_quality_forecast( | |
| "linkoping", "hamngatan-10", | |
| df, | |
| tmp_path, | |
| hindcast=True | |
| ) | |
| return tmp_path | |
| with gr.Blocks() as iface: | |
| gr.Markdown("# Air Quality Dashboard (Model v1 & Model v2)") | |
| with gr.Row(): | |
| gr.Markdown("### **Model v1 (No lag features)**") | |
| gr.Markdown("### **Model v2 (Lag-aware)**") | |
| with gr.Row(): | |
| days_v1_f = gr.Slider(3, 10, value=7, step=1, label="Forecast Days (v1)") | |
| days_v2_f = gr.Slider(3, 10, value=7, step=1, label="Forecast Days (v2)") | |
| with gr.Row(): | |
| btn_v1_f = gr.Button("Generate Forecast (v1)") | |
| btn_v2_f = gr.Button("Generate Forecast (v2)") | |
| out_v1_f = gr.Image() | |
| out_v2_f = gr.Image() | |
| btn_v1_f.click(generate_forecast_v1, inputs=days_v1_f, outputs=out_v1_f) | |
| btn_v2_f.click(generate_forecast_v2, inputs=days_v2_f, outputs=out_v2_f) | |
| # HINDCAST | |
| with gr.Row(): | |
| days_v1_h = gr.Slider(3, 10, value=7, step=1, label="Hindcast Days (v1)") | |
| days_v2_h = gr.Slider(3, 10, value=7, step=1, label="Hindcast Days (v2)") | |
| with gr.Row(): | |
| btn_v1_h = gr.Button("Generate Hindcast (v1)") | |
| btn_v2_h = gr.Button("Generate Hindcast (v2)") | |
| out_v1_h = gr.Image() | |
| out_v2_h = gr.Image() | |
| btn_v1_h.click(generate_hindcast_v1, inputs=days_v1_h, outputs=out_v1_h) | |
| btn_v2_h.click(generate_hindcast_v2, inputs=days_v2_h, outputs=out_v2_h) | |
| iface.launch() | |