Spaces:

Ciroc0
/

dmi-vs-ml-dashboard

Running

App Files Files Community

dmi-vs-ml-dashboard / app.py

Ciroc0

Fix dataset path resolution on HF Space

2cb99f9 about 1 month ago

raw

history blame contribute delete

42.5 kB

	import importlib
	import os
	import threading
	import time
	import traceback
	from dataclasses import dataclass, field
	from datetime import datetime, timedelta
	from pathlib import Path
	from zoneinfo import ZoneInfo

	import gradio as gr
	from huggingface_hub import hf_hub_download


	DATASET_NAME = "Ciroc0/dmi-aarhus-weather-data"
	PREDICTIONS_DATASET = "Ciroc0/dmi-aarhus-predictions"
	HF_TOKEN = os.environ.get("HF_TOKEN")
	COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen")
	APP_NAME = "dmi-vs-ml-dashboard"
	CACHE_TTL_SECONDS = 300
	WARMUP_DELAY_SECONDS = 15
	HISTORY_WINDOW_DAYS = 7
	FUTURE_WINDOW_HOURS = 48
	MODEL_FILES = {
	"temperature": "temperature_models.pkl",
	"wind_speed": "wind_speed_models.pkl",
	"wind_gust": "wind_gust_models.pkl",
	"rain_event": "rain_event_models.pkl",
	"rain_amount": "rain_amount_models.pkl",
	}


	class LazyModule:
	def __init__(self, module_name):
	self.module_name = module_name
	self._module = None

	def _load(self):
	if self._module is None:
	self._module = importlib.import_module(self.module_name)
	return self._module

	def __getattr__(self, item):
	return getattr(self._load(), item)


	pd = LazyModule("pandas")
	np = LazyModule("numpy")
	joblib = LazyModule("joblib")


	@dataclass
	class AppState:
	lock: threading.Lock = field(default_factory=threading.Lock)
	warming: bool = True
	ready: bool = False
	last_error: str \| None = None
	last_warning: str \| None = None
	cache_loaded_at: datetime \| None = None
	cache_expires_at: datetime \| None = None
	cached_payload: dict \| None = None


	APP_STATE = AppState()


	def log_event(message, **fields):
	timestamp = datetime.utcnow().isoformat(timespec="seconds") + "Z"
	details = " ".join(f"{key}={fields[key]!r}" for key in sorted(fields))
	if details:
	print(f"[{APP_NAME}] {timestamp} {message} {details}", flush=True)
	else:
	print(f"[{APP_NAME}] {timestamp} {message}", flush=True)


	def log_exception(context, exc):
	log_event(f"{context} failed", error=str(exc), error_type=type(exc).__name__)
	print(traceback.format_exc(), flush=True)


	def now_cph():
	return datetime.now(COPENHAGEN_TZ)


	def build_status_text():
	with APP_STATE.lock:
	if APP_STATE.last_error:
	return f"Status: failed. {APP_STATE.last_error}"
	if APP_STATE.last_warning:
	return f"Status: partial. {APP_STATE.last_warning}"
	if APP_STATE.warming:
	return "Status: loading forecast and backtest data."
	if APP_STATE.cache_loaded_at is None:
	return "Status: ready. No data loaded yet."
	return (
	"Status: ready. "
	f"Cache loaded at {APP_STATE.cache_loaded_at.strftime('%Y-%m-%d %H:%M:%S')}."
	)


	def placeholder_table(message):
	return pd.DataFrame([{"status": message}])


	def ensure_copenhagen_time(df, column_name):
	if df is None or column_name not in df.columns:
	return df
	series = pd.to_datetime(df[column_name], errors="coerce")
	if getattr(series.dt, "tz", None) is None:
	df[column_name] = series.dt.tz_localize(
	COPENHAGEN_TZ,
	ambiguous="infer",
	nonexistent="shift_forward",
	)
	else:
	df[column_name] = series.dt.tz_convert(COPENHAGEN_TZ)
	return df


	def dataset_local_candidates(dataset_slug, filename):
	here = Path(__file__).resolve()
	candidates = [
	here.parent / filename,
	Path.cwd() / filename,
	Path.cwd() / "hf" / "datasets" / dataset_slug / filename,
	]

	if len(here.parents) >= 3:
	candidates.insert(1, here.parents[2] / "datasets" / dataset_slug / filename)

	return candidates


	def resolve_dataset_file(repo_id, dataset_slug, filename):
	for candidate in dataset_local_candidates(dataset_slug, filename):
	if candidate.exists():
	return str(candidate)
	return hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	repo_type="dataset",
	token=HF_TOKEN,
	)


	def summarize_attempt_errors(filename_errors):
	parts = []
	for filename, exc in filename_errors:
	parts.append(f"{filename}: {type(exc).__name__}: {exc}")
	return "; ".join(parts)


	def load_first_available_prediction_file():
	errors = []
	for filename in ["predictions_latest.parquet", "predictions.parquet"]:
	try:
	return resolve_dataset_file(PREDICTIONS_DATASET, "dmi-aarhus-predictions", filename)
	except Exception as exc:
	errors.append((filename, exc))
	continue
	raise FileNotFoundError(
	"No predictions parquet found in dataset. Attempts: "
	+ summarize_attempt_errors(errors)
	)


	def normalize_predictions(df):
	if df is None or len(df) == 0:
	return None

	if "timestamp" in df.columns and "target_timestamp" not in df.columns:
	df = df.rename(columns={"timestamp": "target_timestamp"})

	df = ensure_copenhagen_time(df, "target_timestamp")
	df = ensure_copenhagen_time(df, "reference_time")
	df = ensure_copenhagen_time(df, "prediction_made_at")
	if "verified" not in df.columns:
	df["verified"] = False
	df["verified"] = df["verified"].fillna(False).astype(bool)

	sort_keys = [column for column in ["target_timestamp", "prediction_made_at", "reference_time"] if column in df.columns]
	if sort_keys:
	df = df.sort_values(sort_keys)
	if "target_timestamp" in df.columns:
	df = df.drop_duplicates(subset=["target_timestamp"], keep="last")
	return df.reset_index(drop=True)


	def load_prediction_frame():
	path = load_first_available_prediction_file()
	df = pd.read_parquet(path)
	return normalize_predictions(df)


	def normalize_training_matrix(df):
	if df is None or len(df) == 0:
	return None

	rename_map = {}
	if "timestamp" in df.columns and "target_timestamp" not in df.columns:
	rename_map["timestamp"] = "target_timestamp"
	if "dmi_temp_pred" in df.columns and "dmi_temperature_2m_pred" not in df.columns:
	rename_map["dmi_temp_pred"] = "dmi_temperature_2m_pred"
	if "dmi_wind_pred" in df.columns and "dmi_windspeed_10m_pred" not in df.columns:
	rename_map["dmi_wind_pred"] = "dmi_windspeed_10m_pred"
	if "dmi_pressure_pred" in df.columns and "dmi_pressure_msl_pred" not in df.columns:
	rename_map["dmi_pressure_pred"] = "dmi_pressure_msl_pred"
	if "dmi_humidity_pred" in df.columns and "dmi_relative_humidity_2m_pred" not in df.columns:
	rename_map["dmi_humidity_pred"] = "dmi_relative_humidity_2m_pred"
	if "actual_wind" in df.columns and "actual_wind_speed" not in df.columns:
	rename_map["actual_wind"] = "actual_wind_speed"
	if rename_map:
	df = df.rename(columns=rename_map)

	df = ensure_copenhagen_time(df, "target_timestamp")
	df = ensure_copenhagen_time(df, "reference_time")
	return df.sort_values(["target_timestamp", "reference_time"]).reset_index(drop=True)


	def load_training_matrix():
	errors = []
	for filename in ["training_matrix.parquet", "data.parquet"]:
	try:
	path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename)
	df = pd.read_parquet(path)
	return normalize_training_matrix(df)
	except Exception as exc:
	errors.append((filename, exc))
	continue
	raise FileNotFoundError(
	"No training matrix parquet found in dataset. Attempts: "
	+ summarize_attempt_errors(errors)
	)


	def load_model_bundle(target_name):
	filename = MODEL_FILES[target_name]
	path = resolve_dataset_file(DATASET_NAME, "dmi-aarhus-weather-data", filename)
	return joblib.load(path)


	def predict_with_bundle(bundle, df):
	if bundle is None or df is None or len(df) == 0 or "lead_bucket" not in df.columns:
	return None

	predictions = np.full(len(df), np.nan)
	models = bundle.get("models", {})

	for bucket in df["lead_bucket"].dropna().unique():
	if bucket not in models:
	continue
	bucket_mask = df["lead_bucket"] == bucket
	model_info = models[bucket]
	model = model_info.get("model")
	feature_cols = model_info.get("feature_columns") or bundle.get("feature_columns", [])
	if model is None or not feature_cols:
	continue

	missing_cols = [column for column in feature_cols if column not in df.columns]
	if missing_cols:
	log_event("predict_with_bundle missing_features", bucket=bucket, missing_columns=missing_cols)
	continue

	bucket_df = df.loc[bucket_mask, feature_cols].fillna(0.0)
	if hasattr(model, "predict_proba"):
	bucket_pred = model.predict_proba(bucket_df)[:, 1]
	else:
	bucket_pred = model.predict(bucket_df)
	predictions[bucket_mask] = bucket_pred

	return predictions


	def build_historical_backtest(training_df):
	if training_df is None or len(training_df) == 0 or "target_timestamp" not in training_df.columns:
	return None

	current_time = now_cph()
	window_end = min(current_time, training_df["target_timestamp"].max())
	window_start = window_end - timedelta(days=HISTORY_WINDOW_DAYS)

	history = training_df[
	(training_df["target_timestamp"] >= window_start)
	& (training_df["target_timestamp"] <= window_end)
	].copy()
	if len(history) == 0:
	return None

	if "lead_time_hours" in history.columns:
	history = history[
	history["lead_time_hours"].fillna(0).between(0.0001, FUTURE_WINDOW_HOURS, inclusive="both")
	].copy()
	if len(history) == 0:
	return None

	history["ml_temp"] = history["dmi_temperature_2m_pred"] if "dmi_temperature_2m_pred" in history.columns else np.nan
	history["ml_wind_speed"] = history["dmi_windspeed_10m_pred"] if "dmi_windspeed_10m_pred" in history.columns else np.nan
	history["ml_wind_gust"] = history["dmi_windgusts_10m_pred"] if "dmi_windgusts_10m_pred" in history.columns else np.nan

	if "dmi_precipitation_probability_pred" in history.columns:
	history["ml_rain_prob"] = (
	history["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0
	)
	else:
	history["ml_rain_prob"] = 0.0

	if "dmi_precipitation_pred" in history.columns:
	history["ml_rain_amount"] = history["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None)
	else:
	history["ml_rain_amount"] = 0.0

	bundle_specs = [
	("temperature", "ml_temp", "dmi_temperature_2m_pred", "correction"),
	("wind_speed", "ml_wind_speed", "dmi_windspeed_10m_pred", "correction"),
	("wind_gust", "ml_wind_gust", "dmi_windgusts_10m_pred", "correction"),
	("rain_event", "ml_rain_prob", None, "probability"),
	("rain_amount", "ml_rain_amount", None, "absolute"),
	]

	for target_name, output_column, baseline_column, prediction_kind in bundle_specs:
	try:
	bundle = load_model_bundle(target_name)
	except Exception as exc:
	log_event("load_model_bundle skipped", target=target_name, error=str(exc))
	continue

	predictions = predict_with_bundle(bundle, history)
	if predictions is None:
	continue

	prediction_series = pd.Series(predictions, index=history.index, dtype="float64")
	prediction_mask = prediction_series.notna()
	if not prediction_mask.any():
	continue

	if prediction_kind == "correction":
	history.loc[prediction_mask, output_column] = (
	history.loc[prediction_mask, baseline_column] + prediction_series[prediction_mask]
	)
	elif prediction_kind == "probability":
	history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, 1.0)
	else:
	history.loc[prediction_mask, output_column] = prediction_series[prediction_mask].clip(0.0, None)

	sort_columns = ["target_timestamp"]
	ascending = [True]
	if "lead_time_hours" in history.columns:
	sort_columns.append("lead_time_hours")
	ascending.append(False)
	if "reference_time" in history.columns:
	sort_columns.append("reference_time")
	ascending.append(False)

	history = history.sort_values(sort_columns, ascending=ascending)
	history = history.drop_duplicates(subset=["target_timestamp"], keep="first").reset_index(drop=True)
	return history


	def load_dashboard_payload(force=False):
	current_time = now_cph()
	with APP_STATE.lock:
	cache_valid = (
	not force
	and APP_STATE.cached_payload is not None
	and APP_STATE.cache_expires_at is not None
	and APP_STATE.cache_expires_at > current_time
	)
	if cache_valid:
	return APP_STATE.cached_payload
	APP_STATE.warming = True
	APP_STATE.last_error = None
	APP_STATE.last_warning = None

	log_event("load_dashboard_payload started", force=force)
	training_df = load_training_matrix()
	history_df = build_historical_backtest(training_df)
	predictions_df = None
	prediction_warning = None
	try:
	predictions_df = load_prediction_frame()
	except Exception as exc:
	prediction_warning = f"Live predictions unavailable. {exc}"
	log_exception("load_prediction_frame", exc)

	future_df = None
	if predictions_df is not None and len(predictions_df) > 0:
	future_df = predictions_df[
	(predictions_df["target_timestamp"] > current_time)
	& (predictions_df["target_timestamp"] <= current_time + timedelta(hours=FUTURE_WINDOW_HOURS))
	].copy()
	future_df = future_df.sort_values("target_timestamp").reset_index(drop=True)

	payload = {"future": future_df, "history": history_df}

	with APP_STATE.lock:
	APP_STATE.cached_payload = payload
	APP_STATE.cache_loaded_at = current_time
	APP_STATE.cache_expires_at = current_time + timedelta(seconds=CACHE_TTL_SECONDS)
	APP_STATE.warming = False
	APP_STATE.ready = True
	APP_STATE.last_error = None
	APP_STATE.last_warning = prediction_warning

	log_event(
	"load_dashboard_payload completed",
	future_rows=0 if future_df is None else len(future_df),
	history_rows=0 if history_df is None else len(history_df),
	)
	return payload


	def add_now_marker(fig):
	fig.add_vline(x=now_cph(), line_width=1, line_dash="dot", line_color="gray")


	def create_temperature_plot(history_df, future_df):
	if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
	return None

	go = importlib.import_module("plotly.graph_objects")
	fig = go.Figure()

	if history_df is not None and len(history_df) > 0:
	if "actual_temp" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["actual_temp"],
	name="Actual Temperature",
	line=dict(color="black", width=2),
	mode="lines",
	)
	)
	if "dmi_temperature_2m_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_temperature_2m_pred"],
	name="DMI Backtest",
	line=dict(color="red", width=2),
	mode="lines",
	)
	)
	if "ml_temp" in history_df.columns and history_df["ml_temp"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_temp"],
	name="ML Backtest",
	line=dict(color="green", width=2),
	mode="lines",
	)
	)

	if future_df is not None and len(future_df) > 0:
	if "dmi_temperature_2m_pred" in future_df.columns:
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["dmi_temperature_2m_pred"],
	name="DMI Forecast",
	line=dict(color="red", width=2, dash="dash"),
	mode="lines+markers",
	)
	)
	if "ml_temp" in future_df.columns and future_df["ml_temp"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["ml_temp"],
	name="ML Forecast",
	line=dict(color="green", width=2, dash="dash"),
	mode="lines+markers",
	)
	)

	fig.update_layout(
	title="Temperature - Last 7 days backtest and next 48 hours forecast",
	xaxis_title="Time (Danish)",
	yaxis_title="Temperature (C)",
	height=420,
	template="plotly_white",
	hovermode="x unified",
	)
	add_now_marker(fig)
	return fig


	def create_wind_plot(history_df, future_df):
	if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
	return None

	go = importlib.import_module("plotly.graph_objects")
	fig = go.Figure()

	if history_df is not None and len(history_df) > 0:
	if "actual_wind_speed" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["actual_wind_speed"],
	name="Actual Wind Speed",
	line=dict(color="black", width=2),
	mode="lines",
	)
	)
	if "actual_wind_gust" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["actual_wind_gust"],
	name="Actual Wind Gust",
	line=dict(color="gray", width=1, dash="dot"),
	mode="lines",
	)
	)
	if "dmi_windspeed_10m_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_windspeed_10m_pred"],
	name="DMI Wind Speed Backtest",
	line=dict(color="blue", width=2),
	mode="lines",
	)
	)
	if "ml_wind_speed" in history_df.columns and history_df["ml_wind_speed"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_wind_speed"],
	name="ML Wind Speed Backtest",
	line=dict(color="green", width=2),
	mode="lines",
	)
	)
	if "dmi_windgusts_10m_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_windgusts_10m_pred"],
	name="DMI Wind Gust Backtest",
	line=dict(color="orange", width=2, dash="dash"),
	mode="lines",
	)
	)
	if "ml_wind_gust" in history_df.columns and history_df["ml_wind_gust"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_wind_gust"],
	name="ML Wind Gust Backtest",
	line=dict(color="darkgreen", width=2, dash="dash"),
	mode="lines",
	)
	)

	if future_df is not None and len(future_df) > 0:
	if "dmi_windspeed_10m_pred" in future_df.columns:
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["dmi_windspeed_10m_pred"],
	name="DMI Wind Speed Forecast",
	line=dict(color="blue", width=2, dash="dot"),
	mode="lines+markers",
	)
	)
	if "ml_wind_speed" in future_df.columns and future_df["ml_wind_speed"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["ml_wind_speed"],
	name="ML Wind Speed Forecast",
	line=dict(color="green", width=2, dash="dot"),
	mode="lines+markers",
	)
	)
	if "dmi_windgusts_10m_pred" in future_df.columns:
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["dmi_windgusts_10m_pred"],
	name="DMI Wind Gust Forecast",
	line=dict(color="orange", width=2, dash="dashdot"),
	mode="lines+markers",
	)
	)
	if "ml_wind_gust" in future_df.columns and future_df["ml_wind_gust"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["ml_wind_gust"],
	name="ML Wind Gust Forecast",
	line=dict(color="darkgreen", width=2, dash="dashdot"),
	mode="lines+markers",
	)
	)

	fig.update_layout(
	title="Wind - Last 7 days backtest and next 48 hours forecast",
	xaxis_title="Time (Danish)",
	yaxis_title="Wind Speed / Gust (m/s)",
	height=460,
	template="plotly_white",
	hovermode="x unified",
	)
	add_now_marker(fig)
	return fig


	def create_rain_plot(history_df, future_df):
	if (history_df is None or len(history_df) == 0) and (future_df is None or len(future_df) == 0):
	return None

	go = importlib.import_module("plotly.graph_objects")
	make_subplots = importlib.import_module("plotly.subplots").make_subplots
	fig = make_subplots(specs=[[{"secondary_y": True}]])

	if history_df is not None and len(history_df) > 0:
	if "actual_precipitation" in history_df.columns:
	fig.add_trace(
	go.Bar(
	x=history_df["target_timestamp"],
	y=history_df["actual_precipitation"].fillna(0.0),
	name="Actual Rain Amount",
	marker_color="lightgray",
	opacity=0.45,
	),
	secondary_y=True,
	)
	if "dmi_precipitation_probability_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_precipitation_probability_pred"],
	name="DMI Rain Probability Backtest",
	line=dict(color="blue", width=2),
	mode="lines",
	),
	secondary_y=False,
	)
	if "ml_rain_prob" in history_df.columns and history_df["ml_rain_prob"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_rain_prob"] * 100,
	name="ML Rain Probability Backtest",
	line=dict(color="green", width=2),
	mode="lines",
	),
	secondary_y=False,
	)
	if "dmi_precipitation_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_precipitation_pred"].fillna(0.0),
	name="DMI Rain Amount Backtest",
	line=dict(color="orange", width=2),
	mode="lines",
	),
	secondary_y=True,
	)
	if "ml_rain_amount" in history_df.columns and history_df["ml_rain_amount"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_rain_amount"].fillna(0.0),
	name="ML Rain Amount Backtest",
	line=dict(color="darkgreen", width=2),
	mode="lines",
	),
	secondary_y=True,
	)

	if future_df is not None and len(future_df) > 0:
	if "dmi_precipitation_probability_pred" in future_df.columns:
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["dmi_precipitation_probability_pred"],
	name="DMI Rain Probability Forecast",
	line=dict(color="blue", width=2, dash="dash"),
	mode="lines+markers",
	),
	secondary_y=False,
	)
	if "ml_rain_prob" in future_df.columns and future_df["ml_rain_prob"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["ml_rain_prob"] * 100,
	name="ML Rain Probability Forecast",
	line=dict(color="green", width=2, dash="dash"),
	mode="lines+markers",
	),
	secondary_y=False,
	)
	if "dmi_precipitation_pred" in future_df.columns:
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["dmi_precipitation_pred"].fillna(0.0),
	name="DMI Rain Amount Forecast",
	line=dict(color="orange", width=2, dash="dot"),
	mode="lines+markers",
	),
	secondary_y=True,
	)
	if "ml_rain_amount" in future_df.columns and future_df["ml_rain_amount"].notna().any():
	fig.add_trace(
	go.Scatter(
	x=future_df["target_timestamp"],
	y=future_df["ml_rain_amount"].fillna(0.0),
	name="ML Rain Amount Forecast",
	line=dict(color="darkgreen", width=2, dash="dot"),
	mode="lines+markers",
	),
	secondary_y=True,
	)

	fig.update_layout(
	title="Rain - Last 7 days backtest and next 48 hours forecast",
	xaxis_title="Time (Danish)",
	template="plotly_white",
	height=460,
	hovermode="x unified",
	)
	fig.update_yaxes(title_text="Probability (%)", secondary_y=False, range=[0, 100])
	fig.update_yaxes(title_text="Amount (mm)", secondary_y=True)
	add_now_marker(fig)
	return fig


	def calculate_metrics(history_df):
	if history_df is None or len(history_df) == 0:
	return {}

	metrics = {}

	if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns):
	actual = history_df["actual_temp"].dropna()
	if len(actual) > 0:
	aligned = history_df.loc[actual.index]
	dmi_error = aligned["actual_temp"] - aligned["dmi_temperature_2m_pred"]
	ml_error = aligned["actual_temp"] - aligned["ml_temp"]
	dmi_rmse = float(np.sqrt(np.mean(dmi_error**2)))
	ml_rmse = float(np.sqrt(np.mean(ml_error**2)))
	metrics["temp"] = {
	"dmi_rmse": dmi_rmse,
	"ml_rmse": ml_rmse,
	"dmi_mae": float(np.mean(np.abs(dmi_error))),
	"ml_mae": float(np.mean(np.abs(ml_error))),
	"improvement": ((dmi_rmse - ml_rmse) / dmi_rmse) * 100 if dmi_rmse > 0 else 0.0,
	}

	if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns):
	actual = history_df["actual_wind_speed"].dropna()
	if len(actual) > 0:
	aligned = history_df.loc[actual.index]
	dmi_error = aligned["actual_wind_speed"] - aligned["dmi_windspeed_10m_pred"]
	ml_error = aligned["actual_wind_speed"] - aligned["ml_wind_speed"]
	dmi_mae = float(np.mean(np.abs(dmi_error)))
	ml_mae = float(np.mean(np.abs(ml_error)))
	metrics["wind_speed"] = {
	"dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))),
	"ml_rmse": float(np.sqrt(np.mean(ml_error**2))),
	"dmi_mae": dmi_mae,
	"ml_mae": ml_mae,
	"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
	}

	if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns):
	actual = history_df["actual_wind_gust"].dropna()
	if len(actual) > 0:
	aligned = history_df.loc[actual.index]
	dmi_error = aligned["actual_wind_gust"] - aligned["dmi_windgusts_10m_pred"]
	ml_error = aligned["actual_wind_gust"] - aligned["ml_wind_gust"]
	dmi_mae = float(np.mean(np.abs(dmi_error)))
	ml_mae = float(np.mean(np.abs(ml_error)))
	metrics["wind_gust"] = {
	"dmi_rmse": float(np.sqrt(np.mean(dmi_error**2))),
	"ml_rmse": float(np.sqrt(np.mean(ml_error**2))),
	"dmi_mae": dmi_mae,
	"ml_mae": ml_mae,
	"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
	}

	if "actual_precipitation" in history_df.columns:
	actual_amount = history_df["actual_precipitation"].fillna(0.0)
	actual_event = (actual_amount > 0.1).astype(int)

	if {"dmi_precipitation_probability_pred", "ml_rain_prob"}.issubset(history_df.columns):
	dmi_prob = history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0
	ml_prob = history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0)
	metrics["rain_event"] = {
	"dmi_brier": float(np.mean((actual_event - dmi_prob) ** 2)),
	"ml_brier": float(np.mean((actual_event - ml_prob) ** 2)),
	"dmi_accuracy": float(np.mean((dmi_prob >= 0.5).astype(int) == actual_event)),
	"ml_accuracy": float(np.mean((ml_prob >= 0.5).astype(int) == actual_event)),
	}

	if {"dmi_precipitation_pred", "ml_rain_amount"}.issubset(history_df.columns):
	dmi_amount = history_df["dmi_precipitation_pred"].fillna(0.0).clip(0.0, None)
	ml_amount = history_df["ml_rain_amount"].fillna(0.0).clip(0.0, None)
	dmi_mae = float(np.mean(np.abs(actual_amount - dmi_amount)))
	ml_mae = float(np.mean(np.abs(actual_amount - ml_amount)))
	metrics["rain_amount"] = {
	"dmi_mae": dmi_mae,
	"ml_mae": ml_mae,
	"improvement": ((dmi_mae - ml_mae) / dmi_mae) * 100 if dmi_mae > 0 else 0.0,
	}

	return metrics


	def build_metrics_text(metrics):
	if not metrics:
	return "No historical backtest data available yet."

	parts = []
	if "temp" in metrics:
	metric = metrics["temp"]
	parts.append(
	f"Temperature: DMI RMSE={metric['dmi_rmse']:.2f}C, "
	f"ML RMSE={metric['ml_rmse']:.2f}C, Improvement={metric['improvement']:+.1f}%"
	)
	if "wind_speed" in metrics:
	metric = metrics["wind_speed"]
	parts.append(
	f"Wind Speed: DMI MAE={metric['dmi_mae']:.2f}m/s, "
	f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%"
	)
	if "wind_gust" in metrics:
	metric = metrics["wind_gust"]
	parts.append(
	f"Wind Gust: DMI MAE={metric['dmi_mae']:.2f}m/s, "
	f"ML MAE={metric['ml_mae']:.2f}m/s, Improvement={metric['improvement']:+.1f}%"
	)
	if "rain_event" in metrics:
	metric = metrics["rain_event"]
	parts.append(
	f"Rain Event: DMI Brier={metric['dmi_brier']:.3f}, ML Brier={metric['ml_brier']:.3f}, "
	f"DMI accuracy={metric['dmi_accuracy']:.1%}, ML accuracy={metric['ml_accuracy']:.1%}"
	)
	if "rain_amount" in metrics:
	metric = metrics["rain_amount"]
	parts.append(
	f"Rain Amount: DMI MAE={metric['dmi_mae']:.2f}mm, "
	f"ML MAE={metric['ml_mae']:.2f}mm, Improvement={metric['improvement']:+.1f}%"
	)
	return "\n\n".join(parts)


	def create_performance_plot(history_df, metrics):
	if history_df is None or len(history_df) == 0:
	return None

	go = importlib.import_module("plotly.graph_objects")
	make_subplots = importlib.import_module("plotly.subplots").make_subplots
	fig = make_subplots(
	rows=2,
	cols=2,
	subplot_titles=("Temperature Error", "Wind Error", "Rain Event Probability", "Overall Metrics"),
	specs=[[{}, {}], [{}, {}]],
	)

	if {"actual_temp", "dmi_temperature_2m_pred", "ml_temp"}.issubset(history_df.columns):
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_temp"] - history_df["dmi_temperature_2m_pred"]),
	name="DMI Temp Error",
	line=dict(color="red"),
	),
	row=1,
	col=1,
	)
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_temp"] - history_df["ml_temp"]),
	name="ML Temp Error",
	line=dict(color="green"),
	),
	row=1,
	col=1,
	)

	if {"actual_wind_speed", "dmi_windspeed_10m_pred", "ml_wind_speed"}.issubset(history_df.columns):
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_wind_speed"] - history_df["dmi_windspeed_10m_pred"]),
	name="DMI Wind Speed Error",
	line=dict(color="blue"),
	),
	row=1,
	col=2,
	)
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_wind_speed"] - history_df["ml_wind_speed"]),
	name="ML Wind Speed Error",
	line=dict(color="green"),
	),
	row=1,
	col=2,
	)

	if {"actual_wind_gust", "dmi_windgusts_10m_pred", "ml_wind_gust"}.issubset(history_df.columns):
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_wind_gust"] - history_df["dmi_windgusts_10m_pred"]),
	name="DMI Wind Gust Error",
	line=dict(color="orange", dash="dash"),
	),
	row=1,
	col=2,
	)
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=np.abs(history_df["actual_wind_gust"] - history_df["ml_wind_gust"]),
	name="ML Wind Gust Error",
	line=dict(color="darkgreen", dash="dash"),
	),
	row=1,
	col=2,
	)

	if "actual_precipitation" in history_df.columns:
	actual_event = (history_df["actual_precipitation"].fillna(0.0) > 0.1).astype(int)
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=actual_event,
	name="Actual Rain Event",
	line=dict(color="black"),
	),
	row=2,
	col=1,
	)
	if "dmi_precipitation_probability_pred" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["dmi_precipitation_probability_pred"].fillna(0.0).clip(0.0, 100.0) / 100.0,
	name="DMI Rain Probability",
	line=dict(color="blue"),
	),
	row=2,
	col=1,
	)
	if "ml_rain_prob" in history_df.columns:
	fig.add_trace(
	go.Scatter(
	x=history_df["target_timestamp"],
	y=history_df["ml_rain_prob"].fillna(0.0).clip(0.0, 1.0),
	name="ML Rain Probability",
	line=dict(color="green"),
	),
	row=2,
	col=1,
	)

	labels = []
	dmi_values = []
	ml_values = []
	if "temp" in metrics:
	labels.append("Temp RMSE")
	dmi_values.append(metrics["temp"]["dmi_rmse"])
	ml_values.append(metrics["temp"]["ml_rmse"])
	if "wind_speed" in metrics:
	labels.append("Wind Speed MAE")
	dmi_values.append(metrics["wind_speed"]["dmi_mae"])
	ml_values.append(metrics["wind_speed"]["ml_mae"])
	if "wind_gust" in metrics:
	labels.append("Wind Gust MAE")
	dmi_values.append(metrics["wind_gust"]["dmi_mae"])
	ml_values.append(metrics["wind_gust"]["ml_mae"])
	if "rain_event" in metrics:
	labels.append("Rain Brier")
	dmi_values.append(metrics["rain_event"]["dmi_brier"])
	ml_values.append(metrics["rain_event"]["ml_brier"])
	if "rain_amount" in metrics:
	labels.append("Rain Amount MAE")
	dmi_values.append(metrics["rain_amount"]["dmi_mae"])
	ml_values.append(metrics["rain_amount"]["ml_mae"])

	if labels:
	fig.add_trace(go.Bar(x=labels, y=dmi_values, name="DMI", marker_color="red"), row=2, col=2)
	fig.add_trace(go.Bar(x=labels, y=ml_values, name="ML", marker_color="green"), row=2, col=2)

	fig.update_yaxes(title_text="Absolute Error", row=1, col=1)
	fig.update_yaxes(title_text="Absolute Error", row=1, col=2)
	fig.update_yaxes(title_text="Probability", row=2, col=1, range=[-0.05, 1.05])
	fig.update_layout(height=680, template="plotly_white", hovermode="x unified")
	add_now_marker(fig)
	return fig


	def build_future_table(future_df):
	if future_df is None or len(future_df) == 0:
	return placeholder_table("No future predictions available.")

	display_df = future_df.copy()
	display_df["target_timestamp"] = display_df["target_timestamp"].dt.strftime("%Y-%m-%d %H:%M")

	if "ml_rain_prob" in display_df.columns:
	display_df["ml_rain_prob_pct"] = display_df["ml_rain_prob"] * 100
	if "dmi_precipitation_probability_pred" in display_df.columns:
	display_df["dmi_rain_prob_pct"] = display_df["dmi_precipitation_probability_pred"]

	display_columns = [
	"target_timestamp",
	"lead_time_hours",
	"dmi_temperature_2m_pred",
	"ml_temp",
	"dmi_windspeed_10m_pred",
	"ml_wind_speed",
	"dmi_windgusts_10m_pred",
	"ml_wind_gust",
	"dmi_rain_prob_pct",
	"ml_rain_prob_pct",
	"dmi_precipitation_pred",
	"ml_rain_amount",
	]
	display_columns = [column for column in display_columns if column in display_df.columns]
	return display_df[display_columns].round(2)


	def refresh_dashboard(force=False):
	try:
	payload = load_dashboard_payload(force=force)
	future_df = payload["future"]
	history_df = payload["history"]
	metrics = calculate_metrics(history_df)
	return (
	build_status_text(),
	create_temperature_plot(history_df, future_df),
	create_wind_plot(history_df, future_df),
	create_rain_plot(history_df, future_df),
	create_performance_plot(history_df, metrics),
	build_metrics_text(metrics),
	build_future_table(future_df),
	)
	except Exception as exc:
	log_exception("refresh_dashboard", exc)
	with APP_STATE.lock:
	APP_STATE.last_error = str(exc)
	APP_STATE.warming = False
	return (
	build_status_text(),
	None,
	None,
	None,
	None,
	f"Loading failed: {exc}",
	placeholder_table("Refresh failed."),
	)


	def warm_cache_after_startup():
	time.sleep(WARMUP_DELAY_SECONDS)
	try:
	load_dashboard_payload(force=True)
	log_event("warm_cache_after_startup completed")
	except Exception as exc:
	log_exception("warm_cache_after_startup", exc)
	with APP_STATE.lock:
	APP_STATE.last_error = str(exc)
	APP_STATE.warming = False


	log_event("bootstrap_begin")

	with gr.Blocks(title="Aarhus Weather Dashboard") as demo:
	gr.Markdown(
	"""
	# DMI vs ML Dashboard - Aarhus

	Each tab shows the latest 7 days of backtest data plus the next 48 hours of live forecast.
	Historical charts use holdout data with actual observations so model performance is visible directly on the graphs.
	"""
	)

	dashboard_status = gr.Markdown(build_status_text())

	with gr.Tabs():
	with gr.Tab("Temperature"):
	temp_plot = gr.Plot(label="Temperature")
	with gr.Tab("Wind"):
	wind_plot = gr.Plot(label="Wind")
	with gr.Tab("Rain"):
	rain_plot = gr.Plot(label="Rain")
	with gr.Tab("Performance"):
	metrics_text = gr.Markdown("No data loaded yet.")
	perf_plot = gr.Plot(label="Performance Analysis")
	future_table = gr.DataFrame(label="Next 48 Hours")

	global_refresh = gr.Button("Refresh All", variant="primary")
	outputs = [dashboard_status, temp_plot, wind_plot, rain_plot, perf_plot, metrics_text, future_table]
	global_refresh.click(lambda: refresh_dashboard(force=True), outputs=outputs)
	demo.load(lambda: refresh_dashboard(force=False), outputs=outputs)

	log_event("ui_constructed")


	if __name__ == "__main__":
	threading.Thread(target=warm_cache_after_startup, daemon=True, name="dashboard-warmup").start()
	log_event("gradio_launch_called", server_name="0.0.0.0", server_port=7860)
	demo.launch(server_name="0.0.0.0", server_port=7860)