Ciroc0's picture
Fix dataset path resolution on HF Space
2cb99f9
import importlib
import os
import threading
import time
import traceback
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
import gradio as gr
from huggingface_hub import hf_hub_download
DATASET_NAME = "Ciroc0/dmi-aarhus-weather-data"
PREDICTIONS_DATASET = "Ciroc0/dmi-aarhus-predictions"
HF_TOKEN = os.environ.get("HF_TOKEN")
COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen")
APP_NAME = "dmi-vs-ml-dashboard"
CACHE_TTL_SECONDS = 300
WARMUP_DELAY_SECONDS = 15
HISTORY_WINDOW_DAYS = 7
FUTURE_WINDOW_HOURS = 48
MODEL_FILES = {
"temperature": "temperature_models.pkl",
"wind_speed": "wind_speed_models.pkl",
"wind_gust": "wind_gust_models.pkl",
"rain_event": "rain_event_models.pkl",
"rain_amount": "rain_amount_models.pkl",
}
class LazyModule:
def __init__(self, module_name):
self.module_name = module_name
self._module = None
def _load(self):
if self._module is None:
self._module = importlib.import_module(self.module_name)
return self._module
def __getattr__(self, item):
return getattr(self._load(), item)
pd = LazyModule("pandas")
np = LazyModule("numpy")
joblib = LazyModule("joblib")
@dataclass
class AppState:
lock: threading.Lock = field(default_factory=threading.Lock)
warming: bool = True
ready: bool = False
last_error: str | None = None
last_warning: str | None = None
cache_loaded_at: datetime | None = None
cache_expires_at: datetime | None = None
cached_payload: dict | None = None
APP_STATE = AppState()
def log_event(message, **fields):
timestamp = datetime.utcnow().isoformat(timespec="seconds") + "Z"
details = " ".join(f"{key}={fields[key]!r}" for key in sorted(fields))
if details:
print(f"[{APP_NAME}] {timestamp} {message} {details}", flush=True)
else:
print(f"[{APP_NAME}] {timestamp} {message}", flush=True)
def log_exception(context, exc):
log_event(f"{context} failed", error=str(exc), error_type=type(exc).__name__)
print(traceback.format_exc(), flush=True)
def now_cph():
return datetime.now(COPENHAGEN_TZ)
def build_status_text():
with APP_STATE.lock:
if APP_STATE.last_error:
return f"Status: failed. {APP_STATE.last_error}"
if APP_STATE.last_warning:
return f"Status: partial. {APP_STATE.last_warning}"
if APP_STATE.warming:
return "Status: loading forecast and backtest data."
if APP_STATE.cache_loaded_at is None:
return "Status: ready. No data loaded yet."
return (
"Status: ready. "
f"Cache loaded at {APP_STATE.cache_loaded_at.strftime('%Y-%m-%d %H:%M:%S')}."
)
def placeholder_table(message):
return pd.DataFrame([{"status": message}])
def ensure_copenhagen_time(df, column_name):
if df is None or column_name not in df.columns:
return df
series = pd.to_datetime(df[column_name], errors="coerce")
if getattr(series.dt, "tz", None) is None:
df[column_name] = series.dt.tz_localize(
COPENHAGEN_TZ,
ambiguous="infer",
nonexistent="shift_forward",
)
else:
df[column_name] = series.dt.tz_convert(COPENHAGEN_TZ)
return df
def dataset_local_candidates(dataset_slug, filename):
here = Path(__file__).resolve()
candidates = [
here.parent / filename,
Path.cwd() / filename,
Path.cwd() / "hf" / "datasets" / dataset_slug / filename,
]
if len(here.parents) >= 3:
candidates.insert(1, here.parents[2] / "datasets" / dataset_slug / filename)
return candidates
def resolve_dataset_file(repo_id, dataset_slug, filename):
for candidate in dataset_local_candidates(dataset_slug, filename):
if candidate.exists():
return str(candidate)
return hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type="dataset",
token=HF_TOKEN,
)
def summarize_attempt_errors(filename_errors):
parts = []
for filename, exc in filename_errors:
parts.append(f"{filename}: {type(exc).__name__}: {exc}")
return "; ".join(parts)
def load_first_available_prediction_file():
errors = []
for filename in ["predictions_latest.parquet", "predictions.parquet"]:
try:
return resolve_dataset_file(PREDICTIONS_DATASET, "dmi-aarhus-predictions", filename)
except Exception as exc:
errors.append((filename, exc))
continue
raise FileNotFoundError(
"No predictions parquet found in dataset. Attempts: "
+ summarize_attempt_errors(errors)
)
def normalize_predictions(df):
if df is None or len(df) == 0:
return None
if "timestamp" in df.columns and "target_timestamp" not in df.columns:
df = df.rename(columns={"timestamp": "target_timestamp"})
df = ensure_copenhagen_time(df, "target_timestamp")
df = ensure_copenhagen_time(df, "reference_time")
df = ensure_copenhagen_time(df, "prediction_made_at")
if "verified" not in df.columns:
df["verified"] = False
df["verified"] = df["verified"].fillna(False).astype(bool)
sort_keys = [column for column in ["target_timestamp", "prediction_made_at", "reference_time"] if column in df.columns]
if sort_keys:
df = df.sort_values(sort_keys)
if "target_timestamp" in df.columns:
df = df.drop_duplicates(subset=["target_timestamp"], keep="last")
return df.reset_index(drop=True)
def load_prediction_frame():
path = load_first_available_prediction_file()
df = pd.read_parquet(path)
return normalize_predictions(df)
def normalize_training_matrix(df):
if df is None or len(df) == 0:
return None
rename_map = {}
if "timestamp" in df.columns and "target_timestamp" not in df.columns:
rename_map["timestamp"] = "target_timestamp"
if "dmi_temp_pred" in df.columns and "dmi_temperature_2m_pred" not in df.columns:
rename_map["dmi_temp_pred"] = "dmi_temperature_2m_pred"
if "dmi_wind_pred" in df.columns and "dmi_windspeed_10m_pred" not in df.columns:
rename_map["dmi_wind_pred"] = "dmi_windspeed_10m_pred"
if "dmi_pressure_pred" in df.columns and "dmi_pressure_msl_pred" not in df.columns:
rename_map["dmi_pressure_pred"] = "dmi_pressure_msl_pred"
if "dmi_humidity_pred" in df.columns and "dmi_relative_humidity_2m_pred" not in df.columns:
rename_map["dmi_humidity_pred"] = "dmi_relative_humidity_2m_pred"
if "actual_wind" in df.columns and "actual_wind_speed" not in df.columns:
rename_map["actual_wind"] = "actual_wind_speed"
if rename_map:
df = df.rename(columns=rename_map)
df = ensure_copenhagen_time(df, "target_timestamp")
df = ensure_copenhagen_time(df, "reference_time")
return df.sort_values(["target_timestamp", "reference_time"]).reset_index(drop=True)
def load_training_matrix():
errors = []
for filename in ["training_matrix.parquet", "data.parquet"]:
try:
path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename)
df = pd.read_parquet(path)
return normalize_training_matrix(df)
except Exception as exc:
errors.append((filename, exc))
continue
raise FileNotFoundError(
"No training matrix parquet found in dataset. Attempts: "
+ summarize_attempt_errors(errors)
)
def load_model_bundle(target_name):
filename = MODEL_FILES[target_name]
path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename)
return joblib.load(path)
def predict_with_bundle(bundle, df):
if bundle is None or df is None or len(df) == 0 or "lead_bucket" not in df.columns:
return None
predictions = np.full(len(df), np.nan)
models = bundle.get("models", {})
for bucket in df["lead_bucket"].dropna().unique():
if bucket not in models:
continue
bucket_mask = df["lead_bucket"] == bucket
model_info = models[bucket]
model = model_info.get("model")
feature_cols = model_info.get("feature_columns") or bundle.get("feature_columns", [])
if model is None or not feature_cols:
continue
missing_cols = [column for column in feature_cols if column not in df.columns]
if missing_cols:
log_event("predict_with_bundle missing_features", bucket=bucket, missing_columns=missing_cols)
continue
bucket_df = df.loc[bucket_mask, feature_cols].fillna(0.0)
if hasattr(model, "predict_proba"):
bucket_pred = model.predict_proba(bucket_df)[:, 1]
else:
bucket_pred = model.predict(bucket_df)
predictions[bucket_mask] = bucket_pred
return predictions
def build_historical_backtest(training_df):
if training_df is None or len(training_df) == 0 or "target_timestamp" not in training_df.columns:
return None
current_time = now_cph()
window_end = min(current_time, training_df["target_timestamp"].max())
window_start = window_end - timedelta(days=HISTORY_WINDOW_DAYS)
history = training_df[
(training_df["target_timestamp"] >= window_start)
& (training_df["target_timestamp"] <= window_end)
].copy()
if len(history) == 0:
return None
if "lead_time_hours" in history.columns:
history = history[
history["lead_time_hours"].fillna(0).between(0.0001, FUTURE_WINDOW_HOURS, inclusive="both")
].copy()
if len(history) == 0:
return None
history["ml_temp"] = history["dmi_temperature_2m_pred"] if "dmi_temperature_2m_pred" in history.columns else np.nan
history["ml_wind_speed"] = history["dmi_windspeed_10m_pred"] if "dmi_windspeed_10m_pred" in history.columns else np.nan
history["ml_wind_gust"] = history["dmi_windgusts_10m_pred"] if "dmi_windgusts_10m_pred" in history.columns else np.nan
if "dmi_precipitation_probability_pred" in history.columns:
history["ml_rain_prob"] = (
history["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0
)
else:
history["ml_rain_prob"] = 0.0
if "dmi_precipitation_pred" in history.columns:
history["ml_rain_amount"] = history["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None)
else:
history["ml_rain_amount"] = 0.0
bundle_specs = [
("temperature", "ml_temp", "dmi_temperature_2m_pred", "correction"),
("wind_speed", "ml_wind_speed", "dmi_windspeed_10m_pred", "correction"),
("wind_gust", "ml_wind_gust", "dmi_windgusts_10m_pred", "correction"),
("rain_event", "ml_rain_prob", None, "probability"),
("rain_amount", "ml_rain_amount", None, "absolute"),
]
for target_name, output_column, baseline_column, prediction_kind in bundle_specs:
try:
bundle = load_model_bundle(target_name)
except Exception as exc:
log_event("load_model_bundle skipped", target=target_name, error=str(exc))
continue
predictions = predict_with_bundle(bundle, history)
if predictions is None:
continue
prediction_series = pd.Series(predictions, index=history.index, dtype="float64")
prediction_mask = prediction_series.notna()
if not prediction_mask.any():
continue
if prediction_kind == "correction":
history.loc[prediction_mask, output_column] = (
history.loc[prediction_mask, baseline_column] + prediction_series[prediction_mask]
)
elif prediction_kind == "probability":
history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, 1.0)
else:
history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, None)
sort_columns = ["target_timestamp"]
ascending = [True]
if "lead_time_hours" in history.columns:
sort_columns.append("lead_time_hours")
ascending.append(False)
if "reference_time" in history.columns:
sort_columns.append("reference_time")
ascending.append(False)
history = history.sort_values(sort_columns, ascending=ascending)
history = history.drop_duplicates(subset=["target_timestamp"], keep="first").reset_index(drop=True)
return history
def load_dashboard_payload(force=False):
current_time = now_cph()
with APP_STATE.lock:
cache_valid = (
not force
and APP_STATE.cached_payload is not None
and APP_STATE.cache_expires_at is not None
and APP_STATE.cache_expires_at > current_time
)
if cache_valid:
return APP_STATE.cached_payload
APP_STATE.warming = True
APP_STATE.last_error = None
APP_STATE.last_warning = None
log_event("load_dashboard_payload started", force=force)
training_df = load_training_matrix()
history_df = build_historical_backtest(training_df)
predictions_df = None
prediction_warning = None
try:
predictions_df = load_prediction_frame()
except Exception as exc:
prediction_warning = f"Live predictions unavailable. {exc}"
log_exception("load_prediction_frame", exc)
future_df = None
if predictions_df is not None and len(predictions_df) > 0:
future_df = predictions_df[
(predictions_df["target_timestamp"] > current_time)
& (predictions_df["target_timestamp"] <= current_time + timedelta(hours=FUTURE_WINDOW_HOURS))
].copy()
future_df = future_df.sort_values("target_timestamp").reset_index(drop=True)
payload = {"future": future_df, "history": history_df}
with APP_STATE.lock:
APP_STATE.cached_payload = payload
APP_STATE.cache_loaded_at = current_time
APP_STATE.cache_expires_at = current_time + timedelta(seconds=CACHE_TTL_SECONDS)
APP_STATE.warming = False
APP_STATE.ready = True
APP_STATE.last_error = None
APP_STATE.last_warning = prediction_warning
log_event(
"load_dashboard_payload completed",
future_rows=0 if future_df is None else len(future_df),
history_rows=0 if history_df is None else len(history_df),
)
return payload
def add_now_marker(fig):
fig.add_vline(x=now_cph(), line_width=1, line_dash="dot", line_color="gray")
def create_temperature_plot(history_df, future_df):
if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
return None
go = importlib.import_module("plotly.graph_objects")
fig = go.Figure()
if history_df is not None and len(history_df) > 0:
if "actual_temp" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["actual_temp"],
name="Actual Temperature",
line=dict(color="black", width=2),
mode="lines",
)
)
if "dmi_temperature_2m_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_temperature_2m_pred"],
name="DMI Backtest",
line=dict(color="red", width=2),
mode="lines",
)
)
if "ml_temp" in history_df.columns and history_df["ml_temp"].notna().any():
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_temp"],
name="ML Backtest",
line=dict(color="green", width=2),
mode="lines",
)
)
if future_df is not None and len(future_df) > 0:
if "dmi_temperature_2m_pred" in future_df.columns:
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["dmi_temperature_2m_pred"],
name="DMI Forecast",
line=dict(color="red", width=2, dash="dash"),
mode="lines+markers",
)
)
if "ml_temp" in future_df.columns and future_df["ml_temp"].notna().any():
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["ml_temp"],
name="ML Forecast",
line=dict(color="green", width=2, dash="dash"),
mode="lines+markers",
)
)
fig.update_layout(
title="Temperature - Last 7 days backtest and next 48 hours forecast",
xaxis_title="Time (Danish)",
yaxis_title="Temperature (C)",
height=420,
template="plotly_white",
hovermode="x unified",
)
add_now_marker(fig)
return fig
def create_wind_plot(history_df, future_df):
if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
return None
go = importlib.import_module("plotly.graph_objects")
fig = go.Figure()
if history_df is not None and len(history_df) > 0:
if "actual_wind_speed" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["actual_wind_speed"],
name="Actual Wind Speed",
line=dict(color="black", width=2),
mode="lines",
)
)
if "actual_wind_gust" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["actual_wind_gust"],
name="Actual Wind Gust",
line=dict(color="gray", width=1, dash="dot"),
mode="lines",
)
)
if "dmi_windspeed_10m_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_windspeed_10m_pred"],
name="DMI Wind Speed Backtest",
line=dict(color="blue", width=2),
mode="lines",
)
)
if "ml_wind_speed" in history_df.columns and history_df["ml_wind_speed"].notna().any():
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_wind_speed"],
name="ML Wind Speed Backtest",
line=dict(color="green", width=2),
mode="lines",
)
)
if "dmi_windgusts_10m_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_windgusts_10m_pred"],
name="DMI Wind Gust Backtest",
line=dict(color="orange", width=2, dash="dash"),
mode="lines",
)
)
if "ml_wind_gust" in history_df.columns and history_df["ml_wind_gust"].notna().any():
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_wind_gust"],
name="ML Wind Gust Backtest",
line=dict(color="darkgreen", width=2, dash="dash"),
mode="lines",
)
)
if future_df is not None and len(future_df) > 0:
if "dmi_windspeed_10m_pred" in future_df.columns:
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["dmi_windspeed_10m_pred"],
name="DMI Wind Speed Forecast",
line=dict(color="blue", width=2, dash="dot"),
mode="lines+markers",
)
)
if "ml_wind_speed" in future_df.columns and future_df["ml_wind_speed"].notna().any():
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["ml_wind_speed"],
name="ML Wind Speed Forecast",
line=dict(color="green", width=2, dash="dot"),
mode="lines+markers",
)
)
if "dmi_windgusts_10m_pred" in future_df.columns:
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["dmi_windgusts_10m_pred"],
name="DMI Wind Gust Forecast",
line=dict(color="orange", width=2, dash="dashdot"),
mode="lines+markers",
)
)
if "ml_wind_gust" in future_df.columns and future_df["ml_wind_gust"].notna().any():
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["ml_wind_gust"],
name="ML Wind Gust Forecast",
line=dict(color="darkgreen", width=2, dash="dashdot"),
mode="lines+markers",
)
)
fig.update_layout(
title="Wind - Last 7 days backtest and next 48 hours forecast",
xaxis_title="Time (Danish)",
yaxis_title="Wind Speed / Gust (m/s)",
height=460,
template="plotly_white",
hovermode="x unified",
)
add_now_marker(fig)
return fig
def create_rain_plot(history_df, future_df):
if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
return None
go = importlib.import_module("plotly.graph_objects")
make_subplots = importlib.import_module("plotly.subplots").make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
if history_df is not None and len(history_df) > 0:
if "actual_precipitation" in history_df.columns:
fig.add_trace(
go.Bar(
x=history_df["target_timestamp"],
y=history_df["actual_precipitation"].fillna(0.0),
name="Actual Rain Amount",
marker_color="lightgray",
opacity=0.45,
),
secondary_y=True,
)
if "dmi_precipitation_probability_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_precipitation_probability_pred"],
name="DMI Rain Probability Backtest",
line=dict(color="blue", width=2),
mode="lines",
),
secondary_y=False,
)
if "ml_rain_prob" in history_df.columns and history_df["ml_rain_prob"].notna().any():
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_rain_prob"] * 100,
name="ML Rain Probability Backtest",
line=dict(color="green", width=2),
mode="lines",
),
secondary_y=False,
)
if "dmi_precipitation_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_precipitation_pred"].fillna(0.0),
name="DMI Rain Amount Backtest",
line=dict(color="orange", width=2),
mode="lines",
),
secondary_y=True,
)
if "ml_rain_amount" in history_df.columns and history_df["ml_rain_amount"].notna().any():
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_rain_amount"].fillna(0.0),
name="ML Rain Amount Backtest",
line=dict(color="darkgreen", width=2),
mode="lines",
),
secondary_y=True,
)
if future_df is not None and len(future_df) > 0:
if "dmi_precipitation_probability_pred" in future_df.columns:
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["dmi_precipitation_probability_pred"],
name="DMI Rain Probability Forecast",
line=dict(color="blue", width=2, dash="dash"),
mode="lines+markers",
),
secondary_y=False,
)
if "ml_rain_prob" in future_df.columns and future_df["ml_rain_prob"].notna().any():
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["ml_rain_prob"] * 100,
name="ML Rain Probability Forecast",
line=dict(color="green", width=2, dash="dash"),
mode="lines+markers",
),
secondary_y=False,
)
if "dmi_precipitation_pred" in future_df.columns:
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["dmi_precipitation_pred"].fillna(0.0),
name="DMI Rain Amount Forecast",
line=dict(color="orange", width=2, dash="dot"),
mode="lines+markers",
),
secondary_y=True,
)
if "ml_rain_amount" in future_df.columns and future_df["ml_rain_amount"].notna().any():
fig.add_trace(
go.Scatter(
x=future_df["target_timestamp"],
y=future_df["ml_rain_amount"].fillna(0.0),
name="ML Rain Amount Forecast",
line=dict(color="darkgreen", width=2, dash="dot"),
mode="lines+markers",
),
secondary_y=True,
)
fig.update_layout(
title="Rain - Last 7 days backtest and next 48 hours forecast",
xaxis_title="Time (Danish)",
template="plotly_white",
height=460,
hovermode="x unified",
)
fig.update_yaxes(title_text="Probability (%)", secondary_y=False, range=[0, 100])
fig.update_yaxes(title_text="Amount (mm)", secondary_y=True)
add_now_marker(fig)
return fig
def calculate_metrics(history_df):
if history_df is None or len(history_df) == 0:
return {}
metrics = {}
if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns):
actual = history_df["actual_temp"].dropna()
if len(actual) > 0:
aligned = history_df.loc[actual.index]
dmi_error = aligned["actual_temp"] - aligned["dmi_temperature_2m_pred"]
ml_error = aligned["actual_temp"] - aligned["ml_temp"]
dmi_rmse = float(np.sqrt(np.mean(dmi_error**2)))
ml_rmse = float(np.sqrt(np.mean(ml_error**2)))
metrics["temp"] = {
"dmi_rmse": dmi_rmse,
"ml_rmse": ml_rmse,
"dmi_mae": float(np.mean(np.abs(dmi_error))),
"ml_mae": float(np.mean(np.abs(ml_error))),
"improvement": ((dmi_rmse - ml_rmse) / dmi_rmse) * 100 if dmi_rmse > 0 else 0.0,
}
if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns):
actual = history_df["actual_wind_speed"].dropna()
if len(actual) > 0:
aligned = history_df.loc[actual.index]
dmi_error = aligned["actual_wind_speed"] - aligned["dmi_windspeed_10m_pred"]
ml_error = aligned["actual_wind_speed"] - aligned["ml_wind_speed"]
dmi_mae = float(np.mean(np.abs(dmi_error)))
ml_mae = float(np.mean(np.abs(ml_error)))
metrics["wind_speed"] = {
"dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))),
"ml_rmse": float(np.sqrt(np.mean(ml_error**2))),
"dmi_mae": dmi_mae,
"ml_mae": ml_mae,
"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
}
if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns):
actual = history_df["actual_wind_gust"].dropna()
if len(actual) > 0:
aligned = history_df.loc[actual.index]
dmi_error = aligned["actual_wind_gust"] - aligned["dmi_windgusts_10m_pred"]
ml_error = aligned["actual_wind_gust"] - aligned["ml_wind_gust"]
dmi_mae = float(np.mean(np.abs(dmi_error)))
ml_mae = float(np.mean(np.abs(ml_error)))
metrics["wind_gust"] = {
"dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))),
"ml_rmse": float(np.sqrt(np.mean(ml_error**2))),
"dmi_mae": dmi_mae,
"ml_mae": ml_mae,
"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
}
if "actual_precipitation" in history_df.columns:
actual_amount = history_df["actual_precipitation"].fillna(0.0)
actual_event = (actual_amount > 0.1).astype(int)
if {"dmi_precipitation_probability_pred", "ml_rain_prob"}.issubset(history_df.columns):
dmi_prob = history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0
ml_prob = history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0)
metrics["rain_event"] = {
"dmi_brier": float(np.mean((actual_event - dmi_prob) ** 2)),
"ml_brier": float(np.mean((actual_event - ml_prob) ** 2)),
"dmi_accuracy": float(np.mean((dmi_prob >= 0.5).astype(int) == actual_event)),
"ml_accuracy": float(np.mean((ml_prob >= 0.5).astype(int) == actual_event)),
}
if {"dmi_precipitation_pred", "ml_rain_amount"}.issubset(history_df.columns):
dmi_amount = history_df["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None)
ml_amount = history_df["ml_rain_amount"].fillna(0.0).clip(0.0, None)
dmi_mae = float(np.mean(np.abs(actual_amount - dmi_amount)))
ml_mae = float(np.mean(np.abs(actual_amount - ml_amount)))
metrics["rain_amount"] = {
"dmi_mae": dmi_mae,
"ml_mae": ml_mae,
"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
}
return metrics
def build_metrics_text(metrics):
if not metrics:
return "No historical backtest data available yet."
parts = []
if "temp" in metrics:
metric = metrics["temp"]
parts.append(
f"**Temperature:** DMI RMSE={metric['dmi_rmse']:.2f}C, "
f"ML RMSE={metric['ml_rmse']:.2f}C, Improvement={metric['improvement']:+.1f}%"
)
if "wind_speed" in metrics:
metric = metrics["wind_speed"]
parts.append(
f"**Wind Speed:** DMI MAE={metric['dmi_mae']:.2f}m/s, "
f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%"
)
if "wind_gust" in metrics:
metric = metrics["wind_gust"]
parts.append(
f"**Wind Gust:** DMI MAE={metric['dmi_mae']:.2f}m/s, "
f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%"
)
if "rain_event" in metrics:
metric = metrics["rain_event"]
parts.append(
f"**Rain Event:** DMI Brier={metric['dmi_brier']:.3f}, ML Brier={metric['ml_brier']:.3f}, "
f"DMI accuracy={metric['dmi_accuracy']:.1%}, ML accuracy={metric['ml_accuracy']:.1%}"
)
if "rain_amount" in metrics:
metric = metrics["rain_amount"]
parts.append(
f"**Rain Amount:** DMI MAE={metric['dmi_mae']:.2f}mm, "
f"ML MAE={metric['ml_mae']:.2f}mm, Improvement={metric['improvement']:+.1f}%"
)
return "\n\n".join(parts)
def create_performance_plot(history_df, metrics):
if history_df is None or len(history_df) == 0:
return None
go = importlib.import_module("plotly.graph_objects")
make_subplots = importlib.import_module("plotly.subplots").make_subplots
fig = make_subplots(
rows=2,
cols=2,
subplot_titles=("Temperature Error", "Wind Error", "Rain Event Probability", "Overall Metrics"),
specs=[[{}, {}], [{}, {}]],
)
if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns):
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_temp"] - history_df["dmi_temperature_2m_pred"]),
name="DMI Temp Error",
line=dict(color="red"),
),
row=1,
col=1,
)
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_temp"] - history_df["ml_temp"]),
name="ML Temp Error",
line=dict(color="green"),
),
row=1,
col=1,
)
if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns):
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_wind_speed"] - history_df["dmi_windspeed_10m_pred"]),
name="DMI Wind Speed Error",
line=dict(color="blue"),
),
row=1,
col=2,
)
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_wind_speed"] - history_df["ml_wind_speed"]),
name="ML Wind Speed Error",
line=dict(color="green"),
),
row=1,
col=2,
)
if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns):
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_wind_gust"] - history_df["dmi_windgusts_10m_pred"]),
name="DMI Wind Gust Error",
line=dict(color="orange", dash="dash"),
),
row=1,
col=2,
)
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=np.abs(history_df["actual_wind_gust"] - history_df["ml_wind_gust"]),
name="ML Wind Gust Error",
line=dict(color="darkgreen", dash="dash"),
),
row=1,
col=2,
)
if "actual_precipitation" in history_df.columns:
actual_event = (history_df["actual_precipitation"].fillna(0.0) > 0.1).astype(int)
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=actual_event,
name="Actual Rain Event",
line=dict(color="black"),
),
row=2,
col=1,
)
if "dmi_precipitation_probability_pred" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0,
name="DMI Rain Probability",
line=dict(color="blue"),
),
row=2,
col=1,
)
if "ml_rain_prob" in history_df.columns:
fig.add_trace(
go.Scatter(
x=history_df["target_timestamp"],
y=history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0),
name="ML Rain Probability",
line=dict(color="green"),
),
row=2,
col=1,
)
labels = []
dmi_values = []
ml_values = []
if "temp" in metrics:
labels.append("Temp RMSE")
dmi_values.append(metrics["temp"]["dmi_rmse"])
ml_values.append(metrics["temp"]["ml_rmse"])
if "wind_speed" in metrics:
labels.append("Wind Speed MAE")
dmi_values.append(metrics["wind_speed"]["dmi_mae"])
ml_values.append(metrics["wind_speed"]["ml_mae"])
if "wind_gust" in metrics:
labels.append("Wind Gust MAE")
dmi_values.append(metrics["wind_gust"]["dmi_mae"])
ml_values.append(metrics["wind_gust"]["ml_mae"])
if "rain_event" in metrics:
labels.append("Rain Brier")
dmi_values.append(metrics["rain_event"]["dmi_brier"])
ml_values.append(metrics["rain_event"]["ml_brier"])
if "rain_amount" in metrics:
labels.append("Rain Amount MAE")
dmi_values.append(metrics["rain_amount"]["dmi_mae"])
ml_values.append(metrics["rain_amount"]["ml_mae"])
if labels:
fig.add_trace(go.Bar(x=labels, y=dmi_values, name="DMI", marker_color="red"), row=2, col=2)
fig.add_trace(go.Bar(x=labels, y=ml_values, name="ML", marker_color="green"), row=2, col=2)
fig.update_yaxes(title_text="Absolute Error", row=1, col=1)
fig.update_yaxes(title_text="Absolute Error", row=1, col=2)
fig.update_yaxes(title_text="Probability", row=2, col=1, range=[-0.05, 1.05])
fig.update_layout(height=680, template="plotly_white", hovermode="x unified")
add_now_marker(fig)
return fig
def build_future_table(future_df):
if future_df is None or len(future_df) == 0:
return placeholder_table("No future predictions available.")
display_df = future_df.copy()
display_df["target_timestamp"] = display_df["target_timestamp"].dt.strftime("%Y-%m-%d %H:%M")
if "ml_rain_prob" in display_df.columns:
display_df["ml_rain_prob_pct"] = display_df["ml_rain_prob"] * 100
if "dmi_precipitation_probability_pred" in display_df.columns:
display_df["dmi_rain_prob_pct"] = display_df["dmi_precipitation_probability_pred"]
display_columns = [
"target_timestamp",
"lead_time_hours",
"dmi_temperature_2m_pred",
"ml_temp",
"dmi_windspeed_10m_pred",
"ml_wind_speed",
"dmi_windgusts_10m_pred",
"ml_wind_gust",
"dmi_rain_prob_pct",
"ml_rain_prob_pct",
"dmi_precipitation_pred",
"ml_rain_amount",
]
display_columns = [column for column in display_columns if column in display_df.columns]
return display_df[display_columns].round(2)
def refresh_dashboard(force=False):
try:
payload = load_dashboard_payload(force=force)
future_df = payload["future"]
history_df = payload["history"]
metrics = calculate_metrics(history_df)
return (
build_status_text(),
create_temperature_plot(history_df, future_df),
create_wind_plot(history_df, future_df),
create_rain_plot(history_df, future_df),
create_performance_plot(history_df, metrics),
build_metrics_text(metrics),
build_future_table(future_df),
)
except Exception as exc:
log_exception("refresh_dashboard", exc)
with APP_STATE.lock:
APP_STATE.last_error = str(exc)
APP_STATE.warming = False
return (
build_status_text(),
None,
None,
None,
None,
f"Loading failed: {exc}",
placeholder_table("Refresh failed."),
)
def warm_cache_after_startup():
time.sleep(WARMUP_DELAY_SECONDS)
try:
load_dashboard_payload(force=True)
log_event("warm_cache_after_startup completed")
except Exception as exc:
log_exception("warm_cache_after_startup", exc)
with APP_STATE.lock:
APP_STATE.last_error = str(exc)
APP_STATE.warming = False
log_event("bootstrap_begin")
with gr.Blocks(title="Aarhus Weather Dashboard") as demo:
gr.Markdown(
"""
# DMI vs ML Dashboard - Aarhus
Each tab shows the latest 7 days of backtest data plus the next 48 hours of live forecast.
Historical charts use holdout data with actual observations so model performance is visible directly on the graphs.
"""
)
dashboard_status = gr.Markdown(build_status_text())
with gr.Tabs():
with gr.Tab("Temperature"):
temp_plot = gr.Plot(label="Temperature")
with gr.Tab("Wind"):
wind_plot = gr.Plot(label="Wind")
with gr.Tab("Rain"):
rain_plot = gr.Plot(label="Rain")
with gr.Tab("Performance"):
metrics_text = gr.Markdown("No data loaded yet.")
perf_plot = gr.Plot(label="Performance Analysis")
future_table = gr.DataFrame(label="Next 48 Hours")
global_refresh = gr.Button("Refresh All", variant="primary")
outputs = [dashboard_status, temp_plot, wind_plot, rain_plot, perf_plot, metrics_text, future_table]
global_refresh.click(lambda: refresh_dashboard(force=True), outputs=outputs)
demo.load(lambda: refresh_dashboard(force=False), outputs=outputs)
log_event("ui_constructed")
if __name__ == "__main__":
threading.Thread(target=warm_cache_after_startup, daemon=True, name="dashboard-warmup").start()
log_event("gradio_launch_called", server_name="0.0.0.0", server_port=7860)
demo.launch(server_name="0.0.0.0", server_port=7860)