Spaces:
Running
Running
| import importlib | |
| import os | |
| import threading | |
| import time | |
| import traceback | |
| from dataclasses import dataclass, field | |
| from datetime import datetime, timedelta | |
| from pathlib import Path | |
| from zoneinfo import ZoneInfo | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| DATASET_NAME = "Ciroc0/dmi-aarhus-weather-data" | |
| PREDICTIONS_DATASET = "Ciroc0/dmi-aarhus-predictions" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen") | |
| APP_NAME = "dmi-vs-ml-dashboard" | |
| CACHE_TTL_SECONDS = 300 | |
| WARMUP_DELAY_SECONDS = 15 | |
| HISTORY_WINDOW_DAYS = 7 | |
| FUTURE_WINDOW_HOURS = 48 | |
| MODEL_FILES = { | |
| "temperature": "temperature_models.pkl", | |
| "wind_speed": "wind_speed_models.pkl", | |
| "wind_gust": "wind_gust_models.pkl", | |
| "rain_event": "rain_event_models.pkl", | |
| "rain_amount": "rain_amount_models.pkl", | |
| } | |
| class LazyModule: | |
| def __init__(self, module_name): | |
| self.module_name = module_name | |
| self._module = None | |
| def _load(self): | |
| if self._module is None: | |
| self._module = importlib.import_module(self.module_name) | |
| return self._module | |
| def __getattr__(self, item): | |
| return getattr(self._load(), item) | |
| pd = LazyModule("pandas") | |
| np = LazyModule("numpy") | |
| joblib = LazyModule("joblib") | |
| class AppState: | |
| lock: threading.Lock = field(default_factory=threading.Lock) | |
| warming: bool = True | |
| ready: bool = False | |
| last_error: str | None = None | |
| last_warning: str | None = None | |
| cache_loaded_at: datetime | None = None | |
| cache_expires_at: datetime | None = None | |
| cached_payload: dict | None = None | |
| APP_STATE = AppState() | |
| def log_event(message, **fields): | |
| timestamp = datetime.utcnow().isoformat(timespec="seconds") + "Z" | |
| details = " ".join(f"{key}={fields[key]!r}" for key in sorted(fields)) | |
| if details: | |
| print(f"[{APP_NAME}] {timestamp} {message} {details}", flush=True) | |
| else: | |
| print(f"[{APP_NAME}] {timestamp} {message}", flush=True) | |
| def log_exception(context, exc): | |
| log_event(f"{context} failed", error=str(exc), error_type=type(exc).__name__) | |
| print(traceback.format_exc(), flush=True) | |
| def now_cph(): | |
| return datetime.now(COPENHAGEN_TZ) | |
| def build_status_text(): | |
| with APP_STATE.lock: | |
| if APP_STATE.last_error: | |
| return f"Status: failed. {APP_STATE.last_error}" | |
| if APP_STATE.last_warning: | |
| return f"Status: partial. {APP_STATE.last_warning}" | |
| if APP_STATE.warming: | |
| return "Status: loading forecast and backtest data." | |
| if APP_STATE.cache_loaded_at is None: | |
| return "Status: ready. No data loaded yet." | |
| return ( | |
| "Status: ready. " | |
| f"Cache loaded at {APP_STATE.cache_loaded_at.strftime('%Y-%m-%d %H:%M:%S')}." | |
| ) | |
| def placeholder_table(message): | |
| return pd.DataFrame([{"status": message}]) | |
| def ensure_copenhagen_time(df, column_name): | |
| if df is None or column_name not in df.columns: | |
| return df | |
| series = pd.to_datetime(df[column_name], errors="coerce") | |
| if getattr(series.dt, "tz", None) is None: | |
| df[column_name] = series.dt.tz_localize( | |
| COPENHAGEN_TZ, | |
| ambiguous="infer", | |
| nonexistent="shift_forward", | |
| ) | |
| else: | |
| df[column_name] = series.dt.tz_convert(COPENHAGEN_TZ) | |
| return df | |
| def dataset_local_candidates(dataset_slug, filename): | |
| here = Path(__file__).resolve() | |
| candidates = [ | |
| here.parent / filename, | |
| Path.cwd() / filename, | |
| Path.cwd() / "hf" / "datasets" / dataset_slug / filename, | |
| ] | |
| if len(here.parents) >= 3: | |
| candidates.insert(1, here.parents[2] / "datasets" / dataset_slug / filename) | |
| return candidates | |
| def resolve_dataset_file(repo_id, dataset_slug, filename): | |
| for candidate in dataset_local_candidates(dataset_slug, filename): | |
| if candidate.exists(): | |
| return str(candidate) | |
| return hf_hub_download( | |
| repo_id=repo_id, | |
| filename=filename, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| ) | |
| def summarize_attempt_errors(filename_errors): | |
| parts = [] | |
| for filename, exc in filename_errors: | |
| parts.append(f"{filename}: {type(exc).__name__}: {exc}") | |
| return "; ".join(parts) | |
| def load_first_available_prediction_file(): | |
| errors = [] | |
| for filename in ["predictions_latest.parquet", "predictions.parquet"]: | |
| try: | |
| return resolve_dataset_file(PREDICTIONS_DATASET, "dmi-aarhus-predictions", filename) | |
| except Exception as exc: | |
| errors.append((filename, exc)) | |
| continue | |
| raise FileNotFoundError( | |
| "No predictions parquet found in dataset. Attempts: " | |
| + summarize_attempt_errors(errors) | |
| ) | |
| def normalize_predictions(df): | |
| if df is None or len(df) == 0: | |
| return None | |
| if "timestamp" in df.columns and "target_timestamp" not in df.columns: | |
| df = df.rename(columns={"timestamp": "target_timestamp"}) | |
| df = ensure_copenhagen_time(df, "target_timestamp") | |
| df = ensure_copenhagen_time(df, "reference_time") | |
| df = ensure_copenhagen_time(df, "prediction_made_at") | |
| if "verified" not in df.columns: | |
| df["verified"] = False | |
| df["verified"] = df["verified"].fillna(False).astype(bool) | |
| sort_keys = [column for column in ["target_timestamp", "prediction_made_at", "reference_time"] if column in df.columns] | |
| if sort_keys: | |
| df = df.sort_values(sort_keys) | |
| if "target_timestamp" in df.columns: | |
| df = df.drop_duplicates(subset=["target_timestamp"], keep="last") | |
| return df.reset_index(drop=True) | |
| def load_prediction_frame(): | |
| path = load_first_available_prediction_file() | |
| df = pd.read_parquet(path) | |
| return normalize_predictions(df) | |
| def normalize_training_matrix(df): | |
| if df is None or len(df) == 0: | |
| return None | |
| rename_map = {} | |
| if "timestamp" in df.columns and "target_timestamp" not in df.columns: | |
| rename_map["timestamp"] = "target_timestamp" | |
| if "dmi_temp_pred" in df.columns and "dmi_temperature_2m_pred" not in df.columns: | |
| rename_map["dmi_temp_pred"] = "dmi_temperature_2m_pred" | |
| if "dmi_wind_pred" in df.columns and "dmi_windspeed_10m_pred" not in df.columns: | |
| rename_map["dmi_wind_pred"] = "dmi_windspeed_10m_pred" | |
| if "dmi_pressure_pred" in df.columns and "dmi_pressure_msl_pred" not in df.columns: | |
| rename_map["dmi_pressure_pred"] = "dmi_pressure_msl_pred" | |
| if "dmi_humidity_pred" in df.columns and "dmi_relative_humidity_2m_pred" not in df.columns: | |
| rename_map["dmi_humidity_pred"] = "dmi_relative_humidity_2m_pred" | |
| if "actual_wind" in df.columns and "actual_wind_speed" not in df.columns: | |
| rename_map["actual_wind"] = "actual_wind_speed" | |
| if rename_map: | |
| df = df.rename(columns=rename_map) | |
| df = ensure_copenhagen_time(df, "target_timestamp") | |
| df = ensure_copenhagen_time(df, "reference_time") | |
| return df.sort_values(["target_timestamp", "reference_time"]).reset_index(drop=True) | |
| def load_training_matrix(): | |
| errors = [] | |
| for filename in ["training_matrix.parquet", "data.parquet"]: | |
| try: | |
| path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename) | |
| df = pd.read_parquet(path) | |
| return normalize_training_matrix(df) | |
| except Exception as exc: | |
| errors.append((filename, exc)) | |
| continue | |
| raise FileNotFoundError( | |
| "No training matrix parquet found in dataset. Attempts: " | |
| + summarize_attempt_errors(errors) | |
| ) | |
| def load_model_bundle(target_name): | |
| filename = MODEL_FILES[target_name] | |
| path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename) | |
| return joblib.load(path) | |
| def predict_with_bundle(bundle, df): | |
| if bundle is None or df is None or len(df) == 0 or "lead_bucket" not in df.columns: | |
| return None | |
| predictions = np.full(len(df), np.nan) | |
| models = bundle.get("models", {}) | |
| for bucket in df["lead_bucket"].dropna().unique(): | |
| if bucket not in models: | |
| continue | |
| bucket_mask = df["lead_bucket"] == bucket | |
| model_info = models[bucket] | |
| model = model_info.get("model") | |
| feature_cols = model_info.get("feature_columns") or bundle.get("feature_columns", []) | |
| if model is None or not feature_cols: | |
| continue | |
| missing_cols = [column for column in feature_cols if column not in df.columns] | |
| if missing_cols: | |
| log_event("predict_with_bundle missing_features", bucket=bucket, missing_columns=missing_cols) | |
| continue | |
| bucket_df = df.loc[bucket_mask, feature_cols].fillna(0.0) | |
| if hasattr(model, "predict_proba"): | |
| bucket_pred = model.predict_proba(bucket_df)[:, 1] | |
| else: | |
| bucket_pred = model.predict(bucket_df) | |
| predictions[bucket_mask] = bucket_pred | |
| return predictions | |
| def build_historical_backtest(training_df): | |
| if training_df is None or len(training_df) == 0 or "target_timestamp" not in training_df.columns: | |
| return None | |
| current_time = now_cph() | |
| window_end = min(current_time, training_df["target_timestamp"].max()) | |
| window_start = window_end - timedelta(days=HISTORY_WINDOW_DAYS) | |
| history = training_df[ | |
| (training_df["target_timestamp"] >= window_start) | |
| & (training_df["target_timestamp"] <= window_end) | |
| ].copy() | |
| if len(history) == 0: | |
| return None | |
| if "lead_time_hours" in history.columns: | |
| history = history[ | |
| history["lead_time_hours"].fillna(0).between(0.0001, FUTURE_WINDOW_HOURS, inclusive="both") | |
| ].copy() | |
| if len(history) == 0: | |
| return None | |
| history["ml_temp"] = history["dmi_temperature_2m_pred"] if "dmi_temperature_2m_pred" in history.columns else np.nan | |
| history["ml_wind_speed"] = history["dmi_windspeed_10m_pred"] if "dmi_windspeed_10m_pred" in history.columns else np.nan | |
| history["ml_wind_gust"] = history["dmi_windgusts_10m_pred"] if "dmi_windgusts_10m_pred" in history.columns else np.nan | |
| if "dmi_precipitation_probability_pred" in history.columns: | |
| history["ml_rain_prob"] = ( | |
| history["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0 | |
| ) | |
| else: | |
| history["ml_rain_prob"] = 0.0 | |
| if "dmi_precipitation_pred" in history.columns: | |
| history["ml_rain_amount"] = history["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None) | |
| else: | |
| history["ml_rain_amount"] = 0.0 | |
| bundle_specs = [ | |
| ("temperature", "ml_temp", "dmi_temperature_2m_pred", "correction"), | |
| ("wind_speed", "ml_wind_speed", "dmi_windspeed_10m_pred", "correction"), | |
| ("wind_gust", "ml_wind_gust", "dmi_windgusts_10m_pred", "correction"), | |
| ("rain_event", "ml_rain_prob", None, "probability"), | |
| ("rain_amount", "ml_rain_amount", None, "absolute"), | |
| ] | |
| for target_name, output_column, baseline_column, prediction_kind in bundle_specs: | |
| try: | |
| bundle = load_model_bundle(target_name) | |
| except Exception as exc: | |
| log_event("load_model_bundle skipped", target=target_name, error=str(exc)) | |
| continue | |
| predictions = predict_with_bundle(bundle, history) | |
| if predictions is None: | |
| continue | |
| prediction_series = pd.Series(predictions, index=history.index, dtype="float64") | |
| prediction_mask = prediction_series.notna() | |
| if not prediction_mask.any(): | |
| continue | |
| if prediction_kind == "correction": | |
| history.loc[prediction_mask, output_column] = ( | |
| history.loc[prediction_mask, baseline_column] + prediction_series[prediction_mask] | |
| ) | |
| elif prediction_kind == "probability": | |
| history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, 1.0) | |
| else: | |
| history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, None) | |
| sort_columns = ["target_timestamp"] | |
| ascending = [True] | |
| if "lead_time_hours" in history.columns: | |
| sort_columns.append("lead_time_hours") | |
| ascending.append(False) | |
| if "reference_time" in history.columns: | |
| sort_columns.append("reference_time") | |
| ascending.append(False) | |
| history = history.sort_values(sort_columns, ascending=ascending) | |
| history = history.drop_duplicates(subset=["target_timestamp"], keep="first").reset_index(drop=True) | |
| return history | |
| def load_dashboard_payload(force=False): | |
| current_time = now_cph() | |
| with APP_STATE.lock: | |
| cache_valid = ( | |
| not force | |
| and APP_STATE.cached_payload is not None | |
| and APP_STATE.cache_expires_at is not None | |
| and APP_STATE.cache_expires_at > current_time | |
| ) | |
| if cache_valid: | |
| return APP_STATE.cached_payload | |
| APP_STATE.warming = True | |
| APP_STATE.last_error = None | |
| APP_STATE.last_warning = None | |
| log_event("load_dashboard_payload started", force=force) | |
| training_df = load_training_matrix() | |
| history_df = build_historical_backtest(training_df) | |
| predictions_df = None | |
| prediction_warning = None | |
| try: | |
| predictions_df = load_prediction_frame() | |
| except Exception as exc: | |
| prediction_warning = f"Live predictions unavailable. {exc}" | |
| log_exception("load_prediction_frame", exc) | |
| future_df = None | |
| if predictions_df is not None and len(predictions_df) > 0: | |
| future_df = predictions_df[ | |
| (predictions_df["target_timestamp"] > current_time) | |
| & (predictions_df["target_timestamp"] <= current_time + timedelta(hours=FUTURE_WINDOW_HOURS)) | |
| ].copy() | |
| future_df = future_df.sort_values("target_timestamp").reset_index(drop=True) | |
| payload = {"future": future_df, "history": history_df} | |
| with APP_STATE.lock: | |
| APP_STATE.cached_payload = payload | |
| APP_STATE.cache_loaded_at = current_time | |
| APP_STATE.cache_expires_at = current_time + timedelta(seconds=CACHE_TTL_SECONDS) | |
| APP_STATE.warming = False | |
| APP_STATE.ready = True | |
| APP_STATE.last_error = None | |
| APP_STATE.last_warning = prediction_warning | |
| log_event( | |
| "load_dashboard_payload completed", | |
| future_rows=0 if future_df is None else len(future_df), | |
| history_rows=0 if history_df is None else len(history_df), | |
| ) | |
| return payload | |
| def add_now_marker(fig): | |
| fig.add_vline(x=now_cph(), line_width=1, line_dash="dot", line_color="gray") | |
| def create_temperature_plot(history_df, future_df): | |
| if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0): | |
| return None | |
| go = importlib.import_module("plotly.graph_objects") | |
| fig = go.Figure() | |
| if history_df is not None and len(history_df) > 0: | |
| if "actual_temp" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["actual_temp"], | |
| name="Actual Temperature", | |
| line=dict(color="black", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if "dmi_temperature_2m_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_temperature_2m_pred"], | |
| name="DMI Backtest", | |
| line=dict(color="red", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if "ml_temp" in history_df.columns and history_df["ml_temp"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_temp"], | |
| name="ML Backtest", | |
| line=dict(color="green", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if future_df is not None and len(future_df) > 0: | |
| if "dmi_temperature_2m_pred" in future_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["dmi_temperature_2m_pred"], | |
| name="DMI Forecast", | |
| line=dict(color="red", width=2, dash="dash"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| if "ml_temp" in future_df.columns and future_df["ml_temp"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["ml_temp"], | |
| name="ML Forecast", | |
| line=dict(color="green", width=2, dash="dash"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| fig.update_layout( | |
| title="Temperature - Last 7 days backtest and next 48 hours forecast", | |
| xaxis_title="Time (Danish)", | |
| yaxis_title="Temperature (C)", | |
| height=420, | |
| template="plotly_white", | |
| hovermode="x unified", | |
| ) | |
| add_now_marker(fig) | |
| return fig | |
| def create_wind_plot(history_df, future_df): | |
| if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0): | |
| return None | |
| go = importlib.import_module("plotly.graph_objects") | |
| fig = go.Figure() | |
| if history_df is not None and len(history_df) > 0: | |
| if "actual_wind_speed" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["actual_wind_speed"], | |
| name="Actual Wind Speed", | |
| line=dict(color="black", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if "actual_wind_gust" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["actual_wind_gust"], | |
| name="Actual Wind Gust", | |
| line=dict(color="gray", width=1, dash="dot"), | |
| mode="lines", | |
| ) | |
| ) | |
| if "dmi_windspeed_10m_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_windspeed_10m_pred"], | |
| name="DMI Wind Speed Backtest", | |
| line=dict(color="blue", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if "ml_wind_speed" in history_df.columns and history_df["ml_wind_speed"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_wind_speed"], | |
| name="ML Wind Speed Backtest", | |
| line=dict(color="green", width=2), | |
| mode="lines", | |
| ) | |
| ) | |
| if "dmi_windgusts_10m_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_windgusts_10m_pred"], | |
| name="DMI Wind Gust Backtest", | |
| line=dict(color="orange", width=2, dash="dash"), | |
| mode="lines", | |
| ) | |
| ) | |
| if "ml_wind_gust" in history_df.columns and history_df["ml_wind_gust"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_wind_gust"], | |
| name="ML Wind Gust Backtest", | |
| line=dict(color="darkgreen", width=2, dash="dash"), | |
| mode="lines", | |
| ) | |
| ) | |
| if future_df is not None and len(future_df) > 0: | |
| if "dmi_windspeed_10m_pred" in future_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["dmi_windspeed_10m_pred"], | |
| name="DMI Wind Speed Forecast", | |
| line=dict(color="blue", width=2, dash="dot"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| if "ml_wind_speed" in future_df.columns and future_df["ml_wind_speed"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["ml_wind_speed"], | |
| name="ML Wind Speed Forecast", | |
| line=dict(color="green", width=2, dash="dot"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| if "dmi_windgusts_10m_pred" in future_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["dmi_windgusts_10m_pred"], | |
| name="DMI Wind Gust Forecast", | |
| line=dict(color="orange", width=2, dash="dashdot"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| if "ml_wind_gust" in future_df.columns and future_df["ml_wind_gust"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["ml_wind_gust"], | |
| name="ML Wind Gust Forecast", | |
| line=dict(color="darkgreen", width=2, dash="dashdot"), | |
| mode="lines+markers", | |
| ) | |
| ) | |
| fig.update_layout( | |
| title="Wind - Last 7 days backtest and next 48 hours forecast", | |
| xaxis_title="Time (Danish)", | |
| yaxis_title="Wind Speed / Gust (m/s)", | |
| height=460, | |
| template="plotly_white", | |
| hovermode="x unified", | |
| ) | |
| add_now_marker(fig) | |
| return fig | |
| def create_rain_plot(history_df, future_df): | |
| if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0): | |
| return None | |
| go = importlib.import_module("plotly.graph_objects") | |
| make_subplots = importlib.import_module("plotly.subplots").make_subplots | |
| fig = make_subplots(specs=[[{"secondary_y": True}]]) | |
| if history_df is not None and len(history_df) > 0: | |
| if "actual_precipitation" in history_df.columns: | |
| fig.add_trace( | |
| go.Bar( | |
| x=history_df["target_timestamp"], | |
| y=history_df["actual_precipitation"].fillna(0.0), | |
| name="Actual Rain Amount", | |
| marker_color="lightgray", | |
| opacity=0.45, | |
| ), | |
| secondary_y=True, | |
| ) | |
| if "dmi_precipitation_probability_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_precipitation_probability_pred"], | |
| name="DMI Rain Probability Backtest", | |
| line=dict(color="blue", width=2), | |
| mode="lines", | |
| ), | |
| secondary_y=False, | |
| ) | |
| if "ml_rain_prob" in history_df.columns and history_df["ml_rain_prob"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_rain_prob"] * 100, | |
| name="ML Rain Probability Backtest", | |
| line=dict(color="green", width=2), | |
| mode="lines", | |
| ), | |
| secondary_y=False, | |
| ) | |
| if "dmi_precipitation_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_precipitation_pred"].fillna(0.0), | |
| name="DMI Rain Amount Backtest", | |
| line=dict(color="orange", width=2), | |
| mode="lines", | |
| ), | |
| secondary_y=True, | |
| ) | |
| if "ml_rain_amount" in history_df.columns and history_df["ml_rain_amount"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_rain_amount"].fillna(0.0), | |
| name="ML Rain Amount Backtest", | |
| line=dict(color="darkgreen", width=2), | |
| mode="lines", | |
| ), | |
| secondary_y=True, | |
| ) | |
| if future_df is not None and len(future_df) > 0: | |
| if "dmi_precipitation_probability_pred" in future_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["dmi_precipitation_probability_pred"], | |
| name="DMI Rain Probability Forecast", | |
| line=dict(color="blue", width=2, dash="dash"), | |
| mode="lines+markers", | |
| ), | |
| secondary_y=False, | |
| ) | |
| if "ml_rain_prob" in future_df.columns and future_df["ml_rain_prob"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["ml_rain_prob"] * 100, | |
| name="ML Rain Probability Forecast", | |
| line=dict(color="green", width=2, dash="dash"), | |
| mode="lines+markers", | |
| ), | |
| secondary_y=False, | |
| ) | |
| if "dmi_precipitation_pred" in future_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["dmi_precipitation_pred"].fillna(0.0), | |
| name="DMI Rain Amount Forecast", | |
| line=dict(color="orange", width=2, dash="dot"), | |
| mode="lines+markers", | |
| ), | |
| secondary_y=True, | |
| ) | |
| if "ml_rain_amount" in future_df.columns and future_df["ml_rain_amount"].notna().any(): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=future_df["target_timestamp"], | |
| y=future_df["ml_rain_amount"].fillna(0.0), | |
| name="ML Rain Amount Forecast", | |
| line=dict(color="darkgreen", width=2, dash="dot"), | |
| mode="lines+markers", | |
| ), | |
| secondary_y=True, | |
| ) | |
| fig.update_layout( | |
| title="Rain - Last 7 days backtest and next 48 hours forecast", | |
| xaxis_title="Time (Danish)", | |
| template="plotly_white", | |
| height=460, | |
| hovermode="x unified", | |
| ) | |
| fig.update_yaxes(title_text="Probability (%)", secondary_y=False, range=[0, 100]) | |
| fig.update_yaxes(title_text="Amount (mm)", secondary_y=True) | |
| add_now_marker(fig) | |
| return fig | |
| def calculate_metrics(history_df): | |
| if history_df is None or len(history_df) == 0: | |
| return {} | |
| metrics = {} | |
| if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns): | |
| actual = history_df["actual_temp"].dropna() | |
| if len(actual) > 0: | |
| aligned = history_df.loc[actual.index] | |
| dmi_error = aligned["actual_temp"] - aligned["dmi_temperature_2m_pred"] | |
| ml_error = aligned["actual_temp"] - aligned["ml_temp"] | |
| dmi_rmse = float(np.sqrt(np.mean(dmi_error**2))) | |
| ml_rmse = float(np.sqrt(np.mean(ml_error**2))) | |
| metrics["temp"] = { | |
| "dmi_rmse": dmi_rmse, | |
| "ml_rmse": ml_rmse, | |
| "dmi_mae": float(np.mean(np.abs(dmi_error))), | |
| "ml_mae": float(np.mean(np.abs(ml_error))), | |
| "improvement": ((dmi_rmse - ml_rmse) / dmi_rmse) * 100 if dmi_rmse > 0 else 0.0, | |
| } | |
| if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns): | |
| actual = history_df["actual_wind_speed"].dropna() | |
| if len(actual) > 0: | |
| aligned = history_df.loc[actual.index] | |
| dmi_error = aligned["actual_wind_speed"] - aligned["dmi_windspeed_10m_pred"] | |
| ml_error = aligned["actual_wind_speed"] - aligned["ml_wind_speed"] | |
| dmi_mae = float(np.mean(np.abs(dmi_error))) | |
| ml_mae = float(np.mean(np.abs(ml_error))) | |
| metrics["wind_speed"] = { | |
| "dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))), | |
| "ml_rmse": float(np.sqrt(np.mean(ml_error**2))), | |
| "dmi_mae": dmi_mae, | |
| "ml_mae": ml_mae, | |
| "improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0, | |
| } | |
| if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns): | |
| actual = history_df["actual_wind_gust"].dropna() | |
| if len(actual) > 0: | |
| aligned = history_df.loc[actual.index] | |
| dmi_error = aligned["actual_wind_gust"] - aligned["dmi_windgusts_10m_pred"] | |
| ml_error = aligned["actual_wind_gust"] - aligned["ml_wind_gust"] | |
| dmi_mae = float(np.mean(np.abs(dmi_error))) | |
| ml_mae = float(np.mean(np.abs(ml_error))) | |
| metrics["wind_gust"] = { | |
| "dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))), | |
| "ml_rmse": float(np.sqrt(np.mean(ml_error**2))), | |
| "dmi_mae": dmi_mae, | |
| "ml_mae": ml_mae, | |
| "improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0, | |
| } | |
| if "actual_precipitation" in history_df.columns: | |
| actual_amount = history_df["actual_precipitation"].fillna(0.0) | |
| actual_event = (actual_amount > 0.1).astype(int) | |
| if {"dmi_precipitation_probability_pred", "ml_rain_prob"}.issubset(history_df.columns): | |
| dmi_prob = history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0 | |
| ml_prob = history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0) | |
| metrics["rain_event"] = { | |
| "dmi_brier": float(np.mean((actual_event - dmi_prob) ** 2)), | |
| "ml_brier": float(np.mean((actual_event - ml_prob) ** 2)), | |
| "dmi_accuracy": float(np.mean((dmi_prob >= 0.5).astype(int) == actual_event)), | |
| "ml_accuracy": float(np.mean((ml_prob >= 0.5).astype(int) == actual_event)), | |
| } | |
| if {"dmi_precipitation_pred", "ml_rain_amount"}.issubset(history_df.columns): | |
| dmi_amount = history_df["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None) | |
| ml_amount = history_df["ml_rain_amount"].fillna(0.0).clip(0.0, None) | |
| dmi_mae = float(np.mean(np.abs(actual_amount - dmi_amount))) | |
| ml_mae = float(np.mean(np.abs(actual_amount - ml_amount))) | |
| metrics["rain_amount"] = { | |
| "dmi_mae": dmi_mae, | |
| "ml_mae": ml_mae, | |
| "improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0, | |
| } | |
| return metrics | |
| def build_metrics_text(metrics): | |
| if not metrics: | |
| return "No historical backtest data available yet." | |
| parts = [] | |
| if "temp" in metrics: | |
| metric = metrics["temp"] | |
| parts.append( | |
| f"**Temperature:** DMI RMSE={metric['dmi_rmse']:.2f}C, " | |
| f"ML RMSE={metric['ml_rmse']:.2f}C, Improvement={metric['improvement']:+.1f}%" | |
| ) | |
| if "wind_speed" in metrics: | |
| metric = metrics["wind_speed"] | |
| parts.append( | |
| f"**Wind Speed:** DMI MAE={metric['dmi_mae']:.2f}m/s, " | |
| f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%" | |
| ) | |
| if "wind_gust" in metrics: | |
| metric = metrics["wind_gust"] | |
| parts.append( | |
| f"**Wind Gust:** DMI MAE={metric['dmi_mae']:.2f}m/s, " | |
| f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%" | |
| ) | |
| if "rain_event" in metrics: | |
| metric = metrics["rain_event"] | |
| parts.append( | |
| f"**Rain Event:** DMI Brier={metric['dmi_brier']:.3f}, ML Brier={metric['ml_brier']:.3f}, " | |
| f"DMI accuracy={metric['dmi_accuracy']:.1%}, ML accuracy={metric['ml_accuracy']:.1%}" | |
| ) | |
| if "rain_amount" in metrics: | |
| metric = metrics["rain_amount"] | |
| parts.append( | |
| f"**Rain Amount:** DMI MAE={metric['dmi_mae']:.2f}mm, " | |
| f"ML MAE={metric['ml_mae']:.2f}mm, Improvement={metric['improvement']:+.1f}%" | |
| ) | |
| return "\n\n".join(parts) | |
| def create_performance_plot(history_df, metrics): | |
| if history_df is None or len(history_df) == 0: | |
| return None | |
| go = importlib.import_module("plotly.graph_objects") | |
| make_subplots = importlib.import_module("plotly.subplots").make_subplots | |
| fig = make_subplots( | |
| rows=2, | |
| cols=2, | |
| subplot_titles=("Temperature Error", "Wind Error", "Rain Event Probability", "Overall Metrics"), | |
| specs=[[{}, {}], [{}, {}]], | |
| ) | |
| if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_temp"] - history_df["dmi_temperature_2m_pred"]), | |
| name="DMI Temp Error", | |
| line=dict(color="red"), | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_temp"] - history_df["ml_temp"]), | |
| name="ML Temp Error", | |
| line=dict(color="green"), | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_wind_speed"] - history_df["dmi_windspeed_10m_pred"]), | |
| name="DMI Wind Speed Error", | |
| line=dict(color="blue"), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_wind_speed"] - history_df["ml_wind_speed"]), | |
| name="ML Wind Speed Error", | |
| line=dict(color="green"), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_wind_gust"] - history_df["dmi_windgusts_10m_pred"]), | |
| name="DMI Wind Gust Error", | |
| line=dict(color="orange", dash="dash"), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=np.abs(history_df["actual_wind_gust"] - history_df["ml_wind_gust"]), | |
| name="ML Wind Gust Error", | |
| line=dict(color="darkgreen", dash="dash"), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| if "actual_precipitation" in history_df.columns: | |
| actual_event = (history_df["actual_precipitation"].fillna(0.0) > 0.1).astype(int) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=actual_event, | |
| name="Actual Rain Event", | |
| line=dict(color="black"), | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| if "dmi_precipitation_probability_pred" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0, | |
| name="DMI Rain Probability", | |
| line=dict(color="blue"), | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| if "ml_rain_prob" in history_df.columns: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=history_df["target_timestamp"], | |
| y=history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0), | |
| name="ML Rain Probability", | |
| line=dict(color="green"), | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| labels = [] | |
| dmi_values = [] | |
| ml_values = [] | |
| if "temp" in metrics: | |
| labels.append("Temp RMSE") | |
| dmi_values.append(metrics["temp"]["dmi_rmse"]) | |
| ml_values.append(metrics["temp"]["ml_rmse"]) | |
| if "wind_speed" in metrics: | |
| labels.append("Wind Speed MAE") | |
| dmi_values.append(metrics["wind_speed"]["dmi_mae"]) | |
| ml_values.append(metrics["wind_speed"]["ml_mae"]) | |
| if "wind_gust" in metrics: | |
| labels.append("Wind Gust MAE") | |
| dmi_values.append(metrics["wind_gust"]["dmi_mae"]) | |
| ml_values.append(metrics["wind_gust"]["ml_mae"]) | |
| if "rain_event" in metrics: | |
| labels.append("Rain Brier") | |
| dmi_values.append(metrics["rain_event"]["dmi_brier"]) | |
| ml_values.append(metrics["rain_event"]["ml_brier"]) | |
| if "rain_amount" in metrics: | |
| labels.append("Rain Amount MAE") | |
| dmi_values.append(metrics["rain_amount"]["dmi_mae"]) | |
| ml_values.append(metrics["rain_amount"]["ml_mae"]) | |
| if labels: | |
| fig.add_trace(go.Bar(x=labels, y=dmi_values, name="DMI", marker_color="red"), row=2, col=2) | |
| fig.add_trace(go.Bar(x=labels, y=ml_values, name="ML", marker_color="green"), row=2, col=2) | |
| fig.update_yaxes(title_text="Absolute Error", row=1, col=1) | |
| fig.update_yaxes(title_text="Absolute Error", row=1, col=2) | |
| fig.update_yaxes(title_text="Probability", row=2, col=1, range=[-0.05, 1.05]) | |
| fig.update_layout(height=680, template="plotly_white", hovermode="x unified") | |
| add_now_marker(fig) | |
| return fig | |
| def build_future_table(future_df): | |
| if future_df is None or len(future_df) == 0: | |
| return placeholder_table("No future predictions available.") | |
| display_df = future_df.copy() | |
| display_df["target_timestamp"] = display_df["target_timestamp"].dt.strftime("%Y-%m-%d %H:%M") | |
| if "ml_rain_prob" in display_df.columns: | |
| display_df["ml_rain_prob_pct"] = display_df["ml_rain_prob"] * 100 | |
| if "dmi_precipitation_probability_pred" in display_df.columns: | |
| display_df["dmi_rain_prob_pct"] = display_df["dmi_precipitation_probability_pred"] | |
| display_columns = [ | |
| "target_timestamp", | |
| "lead_time_hours", | |
| "dmi_temperature_2m_pred", | |
| "ml_temp", | |
| "dmi_windspeed_10m_pred", | |
| "ml_wind_speed", | |
| "dmi_windgusts_10m_pred", | |
| "ml_wind_gust", | |
| "dmi_rain_prob_pct", | |
| "ml_rain_prob_pct", | |
| "dmi_precipitation_pred", | |
| "ml_rain_amount", | |
| ] | |
| display_columns = [column for column in display_columns if column in display_df.columns] | |
| return display_df[display_columns].round(2) | |
| def refresh_dashboard(force=False): | |
| try: | |
| payload = load_dashboard_payload(force=force) | |
| future_df = payload["future"] | |
| history_df = payload["history"] | |
| metrics = calculate_metrics(history_df) | |
| return ( | |
| build_status_text(), | |
| create_temperature_plot(history_df, future_df), | |
| create_wind_plot(history_df, future_df), | |
| create_rain_plot(history_df, future_df), | |
| create_performance_plot(history_df, metrics), | |
| build_metrics_text(metrics), | |
| build_future_table(future_df), | |
| ) | |
| except Exception as exc: | |
| log_exception("refresh_dashboard", exc) | |
| with APP_STATE.lock: | |
| APP_STATE.last_error = str(exc) | |
| APP_STATE.warming = False | |
| return ( | |
| build_status_text(), | |
| None, | |
| None, | |
| None, | |
| None, | |
| f"Loading failed: {exc}", | |
| placeholder_table("Refresh failed."), | |
| ) | |
| def warm_cache_after_startup(): | |
| time.sleep(WARMUP_DELAY_SECONDS) | |
| try: | |
| load_dashboard_payload(force=True) | |
| log_event("warm_cache_after_startup completed") | |
| except Exception as exc: | |
| log_exception("warm_cache_after_startup", exc) | |
| with APP_STATE.lock: | |
| APP_STATE.last_error = str(exc) | |
| APP_STATE.warming = False | |
| log_event("bootstrap_begin") | |
| with gr.Blocks(title="Aarhus Weather Dashboard") as demo: | |
| gr.Markdown( | |
| """ | |
| # DMI vs ML Dashboard - Aarhus | |
| Each tab shows the latest 7 days of backtest data plus the next 48 hours of live forecast. | |
| Historical charts use holdout data with actual observations so model performance is visible directly on the graphs. | |
| """ | |
| ) | |
| dashboard_status = gr.Markdown(build_status_text()) | |
| with gr.Tabs(): | |
| with gr.Tab("Temperature"): | |
| temp_plot = gr.Plot(label="Temperature") | |
| with gr.Tab("Wind"): | |
| wind_plot = gr.Plot(label="Wind") | |
| with gr.Tab("Rain"): | |
| rain_plot = gr.Plot(label="Rain") | |
| with gr.Tab("Performance"): | |
| metrics_text = gr.Markdown("No data loaded yet.") | |
| perf_plot = gr.Plot(label="Performance Analysis") | |
| future_table = gr.DataFrame(label="Next 48 Hours") | |
| global_refresh = gr.Button("Refresh All", variant="primary") | |
| outputs = [dashboard_status, temp_plot, wind_plot, rain_plot, perf_plot, metrics_text, future_table] | |
| global_refresh.click(lambda: refresh_dashboard(force=True), outputs=outputs) | |
| demo.load(lambda: refresh_dashboard(force=False), outputs=outputs) | |
| log_event("ui_constructed") | |
| if __name__ == "__main__": | |
| threading.Thread(target=warm_cache_after_startup, daemon=True, name="dashboard-warmup").start() | |
| log_event("gradio_launch_called", server_name="0.0.0.0", server_port=7860) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |