Spaces:

Tanoj22
/

protloc-ai

Running

protloc-ai / src /training /visualize.py

Tanoj22

Initial commit: ProtLoc-AI project setup and core app

cb6f1ba about 2 months ago

30.1 kB

	"""
	Visualization utilities for multilabel protein localization training and evaluation.

	All plot functions save PNG files to ``output_dir`` (150 DPI) and return matplotlib Figure objects.
	"""

	from __future__ import annotations

	from pathlib import Path
	import json
	from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Union

	import matplotlib.pyplot as plt
	import numpy as np

	try:
	import mlflow
	except ImportError: # pragma: no cover
	mlflow = None # type: ignore
	import seaborn as sns
	from matplotlib.figure import Figure
	from sklearn.metrics import confusion_matrix, f1_score

	# Clean default style
	sns.set_theme(style="whitegrid", context="notebook")
	plt.rcParams.update({"figure.facecolor": "white", "axes.facecolor": "white"})


	def _to_numpy(x: Any) -> np.ndarray:
	if isinstance(x, np.ndarray):
	return x
	if hasattr(x, "detach") and hasattr(x, "cpu"):
	return x.detach().cpu().numpy()
	return np.asarray(x)


	def _ensure_output_dir(output_dir: str \| Path) -> Path:
	out = Path(output_dir).expanduser().resolve()
	out.mkdir(parents=True, exist_ok=True)
	return out


	def _subplot_grid(n: int, max_cols: int = 5) -> Tuple[int, int]:
	if n <= 0:
	raise ValueError("n must be positive")
	ncols = min(max_cols, n)
	nrows = int(np.ceil(n / ncols))
	return nrows, ncols


	def plot_training_curves(
	train_losses: Sequence[float],
	val_losses: Sequence[float],
	output_dir: str \| Path,
	best_epoch: Optional[int] = None,
	filename: str = "training_curves.png",
	) -> Figure:
	"""Plot train vs validation loss per epoch; vertical line at best epoch (1-based, min val loss if omitted)."""
	out = _ensure_output_dir(output_dir)
	train_losses = list(train_losses)
	val_losses = list(val_losses)
	if len(train_losses) != len(val_losses):
	raise ValueError("train_losses and val_losses must have the same length")
	if best_epoch is None and len(val_losses) > 0:
	best_epoch = int(np.argmin(np.asarray(val_losses, dtype=np.float64))) + 1
	epochs = np.arange(1, len(train_losses) + 1)

	fig, ax = plt.subplots(figsize=(10, 5))
	ax.plot(epochs, train_losses, label="Train loss", color="#1f77b4", linewidth=2)
	ax.plot(epochs, val_losses, label="Validation loss", color="#ff7f0e", linewidth=2)
	if best_epoch is not None and 1 <= best_epoch <= len(train_losses):
	ax.axvline(best_epoch, color="gray", linestyle="--", linewidth=1.5, label=f"Best epoch ({best_epoch})")
	ax.set_xlabel("Epoch", fontsize=11)
	ax.set_ylabel("Loss", fontsize=11)
	ax.set_title("Training & Validation Loss", fontsize=13, fontweight="bold")
	ax.legend(loc="upper right", fontsize=10)
	ax.grid(True, alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_per_class_f1_progression(
	epoch_metrics_list: Sequence[Mapping[str, float]],
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "per_class_f1_progression.png",
	) -> Figure:
	"""
	``epoch_metrics_list``: one dict per epoch, each mapping ``label_name -> f1`` for validation.
	"""
	out = _ensure_output_dir(output_dir)
	label_names = list(label_names)
	n_epochs = len(epoch_metrics_list)
	if n_epochs == 0:
	raise ValueError("epoch_metrics_list is empty")
	epochs = np.arange(1, n_epochs + 1)

	fig, ax = plt.subplots(figsize=(12, 6))
	palette = sns.color_palette("husl", n_colors=max(len(label_names), 1))
	for i, name in enumerate(label_names):
	series = [float(epoch_metrics_list[e].get(name, np.nan)) for e in range(n_epochs)]
	ax.plot(epochs, series, label=name, color=palette[i % len(palette)], linewidth=1.8)

	ax.set_xlabel("Epoch", fontsize=11)
	ax.set_ylabel("F1", fontsize=11)
	ax.set_title("Per-Class F1 Score Across Training", fontsize=13, fontweight="bold")
	ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", fontsize=8, frameon=True)
	ax.grid(True, alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def _auroc_color(val: float) -> str:
	if np.isnan(val):
	return "#cccccc"
	if val < 0.7:
	return "#d62728"
	if val <= 0.85:
	return "#ffbf00"
	return "#2ca02c"


	def plot_auroc_bars(
	test_metrics: Mapping[str, Any],
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "test_auroc_bars.png",
	) -> Figure:
	"""Horizontal bar chart of per-class AUROC (from ``test_metrics['per_class'][name]['auroc']``)."""
	out = _ensure_output_dir(output_dir)
	per_class = test_metrics.get("per_class", {})
	rows: List[Tuple[str, float]] = []
	for name in label_names:
	d = per_class.get(name, {})
	a = float(d.get("auroc", np.nan))
	rows.append((name, a))
	rows.sort(
	key=lambda x: (0 if not np.isnan(x[1]) else 1, -(x[1] if not np.isnan(x[1]) else 0.0), x[0]),
	)
	labels = [r[0] for r in rows]
	values = [r[1] for r in rows]

	fig, ax = plt.subplots(figsize=(10, max(4, 0.45 * len(labels))))
	colors = [_auroc_color(v) for v in values]
	y_pos = np.arange(len(labels))
	bars = ax.barh(y_pos, values, color=colors, edgecolor="white", linewidth=0.5)
	ax.set_yticks(y_pos)
	ax.set_yticklabels(labels, fontsize=10)
	ax.set_xlabel("AUROC", fontsize=11)
	ax.set_xlim(0, 1.05)
	ax.set_title("Test Set AUROC by Subcellular Location", fontsize=13, fontweight="bold")
	for bar, v in zip(bars, values):
	if np.isnan(v):
	ax.text(0.02, bar.get_y() + bar.get_height() / 2, "nan", va="center", fontsize=9)
	else:
	ax.text(min(v + 0.02, 1.0), bar.get_y() + bar.get_height() / 2, f"{v:.3f}", va="center", fontsize=9)
	ax.grid(True, axis="x", alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_confusion_matrices(
	y_true: Any,
	y_pred: Any,
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "confusion_matrices.png",
	) -> Figure:
	"""One 2×2 confusion matrix per class (binary multilabel)."""
	out = _ensure_output_dir(output_dir)
	yt = _to_numpy(y_true).astype(np.int64)
	yp = _to_numpy(y_pred).astype(np.int64)
	if yt.shape != yp.shape or yt.ndim != 2:
	raise ValueError("y_true and y_pred must be 2D arrays of the same shape")
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(nrows, ncols, figsize=(3.2 * ncols, 3.0 * nrows))
	axes_flat = np.atleast_1d(axes).ravel()
	for i, name in enumerate(label_names):
	ax = axes_flat[i]
	cm = confusion_matrix(yt[:, i], yp[:, i], labels=[0, 1])
	sns.heatmap(
	cm,
	annot=True,
	fmt="d",
	cmap="Blues",
	cbar=False,
	ax=ax,
	xticklabels=["Pred 0", "Pred 1"],
	yticklabels=["True 0", "True 1"],
	)
	ax.set_title(name, fontsize=10, fontweight="bold")
	for j in range(n, len(axes_flat)):
	axes_flat[j].set_visible(False)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def _cooccurrence_matrix(y_bin: np.ndarray) -> np.ndarray:
	"""Symmetric co-occurrence counts: (y.T @ y) for binary matrix y (n_samples, n_labels)."""
	return (y_bin.T @ y_bin).astype(np.float64)


	def plot_label_cooccurrence(
	y_true: Any,
	y_pred: Any,
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "label_cooccurrence.png",
	) -> Figure:
	"""Side-by-side heatmaps: ground-truth vs predicted label co-occurrence."""
	out = _ensure_output_dir(output_dir)
	yt = _to_numpy(y_true).astype(np.int64)
	yp = _to_numpy(y_pred).astype(np.int64)
	if yt.shape != yp.shape:
	raise ValueError("y_true and y_pred must have the same shape")
	c_true = _cooccurrence_matrix(yt)
	c_pred = _cooccurrence_matrix(yp)
	vmax = max(c_true.max(), c_pred.max(), 1.0)

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
	sns.heatmap(
	c_true,
	xticklabels=label_names,
	yticklabels=label_names,
	annot=False,
	fmt=".0f",
	cmap="YlOrRd",
	ax=ax1,
	vmin=0,
	vmax=vmax,
	square=True,
	)
	ax1.set_title("Ground truth co-occurrence", fontsize=12, fontweight="bold")
	sns.heatmap(
	c_pred,
	xticklabels=label_names,
	yticklabels=label_names,
	annot=False,
	fmt=".0f",
	cmap="YlOrRd",
	ax=ax2,
	vmin=0,
	vmax=vmax,
	square=True,
	)
	ax2.set_title("Predicted co-occurrence", fontsize=12, fontweight="bold")
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_probability_distributions(
	y_true: Any,
	y_pred_proba: Any,
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "probability_distributions.png",
	) -> Figure:
	"""Per-class histograms: predicted probability for negatives (red) vs positives (blue)."""
	out = _ensure_output_dir(output_dir)
	yt = _to_numpy(y_true).astype(np.float64)
	yp = _to_numpy(y_pred_proba).astype(np.float64)
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(nrows, ncols, figsize=(3.2 * ncols, 2.8 * nrows))
	axes_flat = np.atleast_1d(axes).ravel()
	for i, name in enumerate(label_names):
	ax = axes_flat[i]
	pos_mask = yt[:, i] > 0.5
	neg_mask = ~pos_mask
	pos_p = yp[pos_mask, i]
	neg_p = yp[neg_mask, i]
	ax.hist(neg_p, bins=20, alpha=0.65, color="#d62728", label="Negative", density=True)
	ax.hist(pos_p, bins=20, alpha=0.65, color="#1f77b4", label="Positive", density=True)
	ax.set_title(name, fontsize=9, fontweight="bold")
	ax.set_xlim(0, 1)
	ax.tick_params(labelsize=8)
	if i == 0:
	ax.legend(fontsize=7, loc="upper right")
	for j in range(n, len(axes_flat)):
	axes_flat[j].set_visible(False)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_threshold_analysis(
	y_true: Any,
	y_pred_proba: Any,
	label_names: Sequence[str],
	output_dir: str \| Path,
	filename: str = "threshold_analysis.png",
	thresholds: np.ndarray \| None = None,
	) -> Figure:
	"""Per-class F1 vs threshold; marks optimal threshold (max F1) with a red dot."""
	out = _ensure_output_dir(output_dir)
	yt = _to_numpy(y_true).astype(np.int64)
	yp = _to_numpy(y_pred_proba).astype(np.float64)
	if thresholds is None:
	thresholds = np.linspace(0.01, 0.99, 99)
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(nrows, ncols, figsize=(3.2 * ncols, 2.6 * nrows))
	axes_flat = np.atleast_1d(axes).ravel()
	for i, name in enumerate(label_names):
	ax = axes_flat[i]
	f1s = []
	for thr in thresholds:
	yb = (yp[:, i] >= thr).astype(np.int64)
	f1s.append(f1_score(yt[:, i], yb, zero_division=0))
	f1s = np.asarray(f1s)
	best_idx = int(np.argmax(f1s))
	best_thr = float(thresholds[best_idx])
	best_f1 = float(f1s[best_idx])
	ax.plot(thresholds, f1s, color="#1f77b4", linewidth=1.5)
	ax.scatter([best_thr], [best_f1], color="red", s=40, zorder=5)
	ax.set_title(name, fontsize=9, fontweight="bold")
	ax.set_xlabel("Threshold", fontsize=8)
	ax.set_ylabel("F1", fontsize=8)
	ax.tick_params(labelsize=7)
	ax.grid(True, alpha=0.35)
	for j in range(n, len(axes_flat)):
	axes_flat[j].set_visible(False)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def generate_all_plots(
	*,
	output_dir: str \| Path,
	train_losses: Sequence[float],
	val_losses: Sequence[float],
	epoch_metrics_list: Sequence[Mapping[str, float]],
	label_names: Sequence[str],
	test_metrics: Mapping[str, Any],
	y_true: Any,
	y_pred: Any,
	y_pred_proba: Any,
	best_epoch: Optional[int] = None,
	mlflow_log: bool = True,
	) -> Dict[str, Figure]:
	"""
	Generate all analysis plots and optionally log each PNG to the active MLflow run.

	``epoch_metrics_list``: one dict per epoch, each mapping ``label_name -> validation F1``.
	"""
	out = _ensure_output_dir(output_dir)
	figures: Dict[str, Figure] = {}

	figures["training_curves"] = plot_training_curves(
	train_losses, val_losses, out, best_epoch=best_epoch
	)
	figures["per_class_f1"] = plot_per_class_f1_progression(epoch_metrics_list, label_names, out)
	figures["auroc_bars"] = plot_auroc_bars(test_metrics, label_names, out)
	figures["confusion"] = plot_confusion_matrices(y_true, y_pred, label_names, out)
	figures["cooccurrence"] = plot_label_cooccurrence(y_true, y_pred, label_names, out)
	figures["proba_hist"] = plot_probability_distributions(y_true, y_pred_proba, label_names, out)
	figures["threshold"] = plot_threshold_analysis(y_true, y_pred_proba, label_names, out)

	if mlflow_log and mlflow is not None:
	try:
	if mlflow.active_run() is not None:
	artifact_files = [
	"training_curves.png",
	"per_class_f1_progression.png",
	"test_auroc_bars.png",
	"confusion_matrices.png",
	"label_cooccurrence.png",
	"probability_distributions.png",
	"threshold_analysis.png",
	]
	for fname in artifact_files:
	p = out / fname
	if p.is_file():
	mlflow.log_artifact(str(p), artifact_path="plots")
	except Exception:
	pass

	return figures


	def plot_training_curves_comparison(
	train_losses_a: Sequence[float],
	val_losses_a: Sequence[float],
	train_losses_b: Sequence[float],
	val_losses_b: Sequence[float],
	label_a: str,
	label_b: str,
	output_dir: str \| Path,
	best_epoch_a: Optional[int] = None,
	best_epoch_b: Optional[int] = None,
	filename: str = "compare_training_val_loss.png",
	) -> Figure:
	"""Overlay train/val loss curves for two runs."""
	out = _ensure_output_dir(output_dir)
	fig, ax = plt.subplots(figsize=(11, 5))
	ea = np.arange(1, len(train_losses_a) + 1)
	eb = np.arange(1, len(train_losses_b) + 1)
	ax.plot(ea, train_losses_a, label=f"Train ({label_a})", color="#1f77b4", linewidth=2, linestyle="-")
	ax.plot(ea, val_losses_a, label=f"Val ({label_a})", color="#aec7e8", linewidth=2, linestyle="-")
	ax.plot(eb, train_losses_b, label=f"Train ({label_b})", color="#d62728", linewidth=2, linestyle="--")
	ax.plot(eb, val_losses_b, label=f"Val ({label_b})", color="#ff9896", linewidth=2, linestyle="--")
	if best_epoch_a is not None and 1 <= best_epoch_a <= len(val_losses_a):
	ax.axvline(best_epoch_a, color="#1f77b4", linestyle=":", linewidth=1.2, alpha=0.8)
	if best_epoch_b is not None and 1 <= best_epoch_b <= len(val_losses_b):
	ax.axvline(best_epoch_b, color="#d62728", linestyle=":", linewidth=1.2, alpha=0.8)
	ax.set_xlabel("Epoch", fontsize=11)
	ax.set_ylabel("Loss", fontsize=11)
	ax.set_title("Training & Validation Loss — comparison", fontsize=13, fontweight="bold")
	ax.legend(loc="upper right", fontsize=9)
	ax.grid(True, alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_val_macro_f1_comparison(
	macro_f1_a: Sequence[float],
	macro_f1_b: Sequence[float],
	label_a: str,
	label_b: str,
	output_dir: str \| Path,
	filename: str = "compare_val_macro_f1.png",
	) -> Figure:
	out = _ensure_output_dir(output_dir)
	fig, ax = plt.subplots(figsize=(10, 4.5))
	ax.plot(np.arange(1, len(macro_f1_a) + 1), macro_f1_a, label=label_a, color="#1f77b4", linewidth=2)
	ax.plot(np.arange(1, len(macro_f1_b) + 1), macro_f1_b, label=label_b, color="#d62728", linewidth=2, linestyle="--")
	ax.set_xlabel("Epoch", fontsize=11)
	ax.set_ylabel("Validation macro F1", fontsize=11)
	ax.set_title("Validation macro F1 — comparison", fontsize=13, fontweight="bold")
	ax.legend(loc="lower right", fontsize=10)
	ax.grid(True, alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_per_class_f1_panels(
	epoch_metrics_a: Sequence[Mapping[str, float]],
	epoch_metrics_b: Sequence[Mapping[str, float]],
	label_names: Sequence[str],
	title_a: str,
	title_b: str,
	output_dir: str \| Path,
	filename: str = "compare_per_class_f1_panels.png",
	) -> Figure:
	"""Side-by-side panels: per-class F1 vs epoch for each run."""
	out = _ensure_output_dir(output_dir)
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6), sharey=True)
	palette = sns.color_palette("husl", n_colors=max(len(label_names), 1))
	n_a, n_b = len(epoch_metrics_a), len(epoch_metrics_b)
	for i, name in enumerate(label_names):
	sa = [float(epoch_metrics_a[e].get(name, np.nan)) for e in range(n_a)]
	sb = [float(epoch_metrics_b[e].get(name, np.nan)) for e in range(n_b)]
	ax1.plot(np.arange(1, n_a + 1), sa, label=name, color=palette[i % len(palette)], linewidth=1.5)
	ax2.plot(np.arange(1, n_b + 1), sb, label=name, color=palette[i % len(palette)], linewidth=1.5)
	ax1.set_title(title_a, fontsize=12, fontweight="bold")
	ax2.set_title(title_b, fontsize=12, fontweight="bold")
	ax1.set_xlabel("Epoch")
	ax2.set_xlabel("Epoch")
	ax1.set_ylabel("F1")
	ax1.grid(True, alpha=0.35)
	ax2.grid(True, alpha=0.35)
	h1, l1 = ax1.get_legend_handles_labels()
	fig.legend(h1, l1, loc="center left", bbox_to_anchor=(1.02, 0.5), fontsize=7, title="Class")
	fig.suptitle("Per-class validation F1 — comparison", fontsize=14, fontweight="bold")
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_auroc_comparison_bars(
	test_metrics_a: Mapping[str, Any],
	test_metrics_b: Mapping[str, Any],
	label_names: Sequence[str],
	label_a: str,
	label_b: str,
	output_dir: str \| Path,
	filename: str = "compare_test_auroc.png",
	) -> Figure:
	"""Grouped horizontal bars: AUROC per class for two runs."""
	out = _ensure_output_dir(output_dir)
	names = list(label_names)
	y = np.arange(len(names))
	h = 0.35
	va = [float(test_metrics_a.get("per_class", {}).get(n, {}).get("auroc", np.nan)) for n in names]
	vb = [float(test_metrics_b.get("per_class", {}).get(n, {}).get("auroc", np.nan)) for n in names]
	fig, ax = plt.subplots(figsize=(10, max(5, 0.42 * len(names))))
	ax.barh(y - h / 2, va, h, label=label_a, color="#1f77b4", alpha=0.85)
	ax.barh(y + h / 2, vb, h, label=label_b, color="#d62728", alpha=0.85)
	ax.set_yticks(y)
	ax.set_yticklabels(names, fontsize=9)
	ax.set_xlabel("AUROC", fontsize=11)
	ax.set_xlim(0, 1.05)
	ax.set_title("Test AUROC by class — comparison", fontsize=13, fontweight="bold")
	ax.legend(loc="lower right")
	ax.grid(True, axis="x", alpha=0.35)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_confusion_matrices_two_runs(
	y_true_a: Any,
	y_pred_a: Any,
	y_true_b: Any,
	y_pred_b: Any,
	label_names: Sequence[str],
	row_title_a: str,
	row_title_b: str,
	output_dir: str \| Path,
	filename: str = "compare_confusion_matrices.png",
	) -> Figure:
	"""Two rows of per-class confusion matrices (binary multilabel)."""
	out = _ensure_output_dir(output_dir)
	yt_a = _to_numpy(y_true_a).astype(np.int64)
	yp_a = _to_numpy(y_pred_a).astype(np.int64)
	yt_b = _to_numpy(y_true_b).astype(np.int64)
	yp_b = _to_numpy(y_pred_b).astype(np.int64)
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(2 * nrows, ncols, figsize=(3.2 * ncols, 3.0 * 2 * nrows))
	axes_arr = np.atleast_2d(axes)
	for i, name in enumerate(label_names):
	r0, c0 = divmod(i, ncols)
	for ver, (yt, yp, rtitle) in enumerate(
	[(yt_a, yp_a, row_title_a), (yt_b, yp_b, row_title_b)]
	):
	ax = axes_arr[ver * nrows + r0, c0]
	cm = confusion_matrix(yt[:, i], yp[:, i], labels=[0, 1])
	sns.heatmap(
	cm,
	annot=True,
	fmt="d",
	cmap="Blues",
	cbar=False,
	ax=ax,
	xticklabels=["P0", "P1"],
	yticklabels=["T0", "T1"],
	)
	ax.set_title(f"{name}\n{rtitle}", fontsize=8, fontweight="bold")
	used: Set[Tuple[int, int]] = set()
	for i in range(n):
	r0, c0 = divmod(i, ncols)
	for ver in (0, 1):
	used.add((ver * nrows + r0, c0))
	for r in range(2 * nrows):
	for c in range(ncols):
	if (r, c) not in used:
	axes_arr[r, c].set_visible(False)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_label_cooccurrence_four(
	y_true_a: Any,
	y_pred_a: Any,
	y_true_b: Any,
	y_pred_b: Any,
	label_names: Sequence[str],
	titles: Sequence[str],
	output_dir: str \| Path,
	filename: str = "compare_label_cooccurrence.png",
	) -> Figure:
	"""Four heatmaps: GT/pred co-occurrence for each run."""
	out = _ensure_output_dir(output_dir)
	mats = []
	for yt, yp in [(y_true_a, y_pred_a), (y_true_b, y_pred_b)]:
	yt = _to_numpy(yt).astype(np.int64)
	yp = _to_numpy(yp).astype(np.int64)
	mats.append((_cooccurrence_matrix(yt), _cooccurrence_matrix(yp)))
	vmax = 1.0
	for gt_mat, pred_mat in mats:
	vmax = max(vmax, float(gt_mat.max()), float(pred_mat.max()))
	fig, axes = plt.subplots(2, 2, figsize=(14, 12))
	flat_titles = [titles[0], titles[1], titles[2], titles[3]]
	idx = 0
	for row, pair in enumerate(mats):
	for col, cmat in enumerate(pair):
	ax = axes[row, col]
	sns.heatmap(
	cmat,
	xticklabels=label_names,
	yticklabels=label_names,
	cmap="YlOrRd",
	ax=ax,
	vmin=0,
	vmax=vmax,
	square=True,
	)
	ax.set_title(flat_titles[idx], fontsize=11, fontweight="bold")
	idx += 1
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_probability_distributions_two_runs(
	y_true_a: Any,
	y_proba_a: Any,
	y_true_b: Any,
	y_proba_b: Any,
	label_names: Sequence[str],
	title_a: str,
	title_b: str,
	output_dir: str \| Path,
	filename: str = "compare_probability_distributions.png",
	) -> Figure:
	"""Two rows of per-class probability histograms (neg vs pos)."""
	out = _ensure_output_dir(output_dir)
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(2 * nrows, ncols, figsize=(3.2 * ncols, 2.6 * 2 * nrows))
	axes_arr = np.atleast_2d(axes)

	def _row(yt: Any, yp: Any, ver: int) -> None:
	yt = _to_numpy(yt).astype(np.float64)
	yp = _to_numpy(yp).astype(np.float64)
	for i, name in enumerate(label_names):
	r0, c0 = divmod(i, ncols)
	ax = axes_arr[ver * nrows + r0, c0]
	pos_mask = yt[:, i] > 0.5
	neg_mask = ~pos_mask
	ax.hist(yp[neg_mask, i], bins=20, alpha=0.6, color="#d62728", density=True, label="Neg")
	ax.hist(yp[pos_mask, i], bins=20, alpha=0.6, color="#1f77b4", density=True, label="Pos")
	ax.set_title(f"{name}", fontsize=8, fontweight="bold")
	ax.set_xlim(0, 1)

	_row(y_true_a, y_proba_a, 0)
	_row(y_true_b, y_proba_b, 1)
	for j in range(n, nrows * ncols):
	r0, c0 = divmod(j, ncols)
	for ver in range(2):
	axes_arr[ver * nrows + r0, c0].set_visible(False)
	fig.suptitle(f"Predicted probability distributions — {title_a} (top) vs {title_b} (bottom)", fontsize=12)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def plot_threshold_analysis_two_runs(
	y_true_a: Any,
	y_proba_a: Any,
	y_true_b: Any,
	y_proba_b: Any,
	label_names: Sequence[str],
	title_a: str,
	title_b: str,
	output_dir: str \| Path,
	filename: str = "compare_threshold_analysis.png",
	) -> Figure:
	"""Two rows: F1 vs threshold per class."""
	out = _ensure_output_dir(output_dir)
	thresholds = np.linspace(0.01, 0.99, 99)
	n = len(label_names)
	nrows, ncols = _subplot_grid(n, max_cols=5)
	fig, axes = plt.subplots(2 * nrows, ncols, figsize=(3.2 * ncols, 2.4 * 2 * nrows))
	axes_arr = np.atleast_2d(axes)

	def _fill(yt: Any, yp: Any, ver: int) -> None:
	yt = _to_numpy(yt).astype(np.int64)
	yp = _to_numpy(yp).astype(np.float64)
	for i, name in enumerate(label_names):
	r0, c0 = divmod(i, ncols)
	ax = axes_arr[ver * nrows + r0, c0]
	f1s = [f1_score(yt[:, i], (yp[:, i] >= thr).astype(np.int64), zero_division=0) for thr in thresholds]
	ax.plot(thresholds, f1s, color="#1f77b4", linewidth=1.2)
	bi = int(np.argmax(f1s))
	ax.scatter([thresholds[bi]], [f1s[bi]], color="red", s=25, zorder=5)
	ax.set_title(f"{name}", fontsize=7, fontweight="bold")
	ax.set_xlabel("Thr", fontsize=7)
	ax.set_ylabel("F1", fontsize=7)

	_fill(y_true_a, y_proba_a, 0)
	_fill(y_true_b, y_proba_b, 1)
	for j in range(n, nrows * ncols):
	r0, c0 = divmod(j, ncols)
	for ver in range(2):
	axes_arr[ver * nrows + r0, c0].set_visible(False)
	fig.suptitle(f"F1 vs threshold — {title_a} (top) vs {title_b} (bottom)", fontsize=12)
	fig.tight_layout()
	fig.savefig(out / filename, dpi=150, bbox_inches="tight")
	return fig


	def _load_json(path: Path) -> Any:
	return json.loads(path.read_text(encoding="utf-8"))


	def generate_comparison_plots(
	*,
	v1_artifact_dir: Union[str, Path],
	v2_artifact_dir: Union[str, Path],
	label_names: Sequence[str],
	output_dir: Union[str, Path],
	label_v1: str = "baseline_v1",
	label_v2: str = "regularized_v2",
	mlflow_log: bool = True,
	) -> Dict[str, Figure]:
	"""
	Load two run artifact folders (``train_history.json``, ``final_test_metrics.json``,
	``test_predictions.npz``) and write comparison figures.
	"""
	d1 = Path(v1_artifact_dir).expanduser().resolve()
	d2 = Path(v2_artifact_dir).expanduser().resolve()
	out = _ensure_output_dir(output_dir)

	h1 = _load_json(d1 / "train_history.json")
	h2 = _load_json(d2 / "train_history.json")
	m1 = _load_json(d1 / "final_test_metrics.json")
	m2 = _load_json(d2 / "final_test_metrics.json")
	z1 = np.load(d1 / "test_predictions.npz")
	z2 = np.load(d2 / "test_predictions.npz")

	figures: Dict[str, Figure] = {}
	be1 = h1.get("best_epoch")
	be2 = h2.get("best_epoch")
	be1i = int(be1) if be1 is not None and int(be1) >= 1 else None
	be2i = int(be2) if be2 is not None and int(be2) >= 1 else None

	figures["compare_loss"] = plot_training_curves_comparison(
	h1.get("train_loss", []),
	h1.get("val_loss", []),
	h2.get("train_loss", []),
	h2.get("val_loss", []),
	label_v1,
	label_v2,
	out,
	best_epoch_a=be1i,
	best_epoch_b=be2i,
	)
	figures["compare_macro_f1"] = plot_val_macro_f1_comparison(
	h1.get("val_macro_f1", []),
	h2.get("val_macro_f1", []),
	label_v1,
	label_v2,
	out,
	)
	figures["compare_per_class_f1"] = plot_per_class_f1_panels(
	h1.get("val_per_class_f1", []),
	h2.get("val_per_class_f1", []),
	label_names,
	label_v1,
	label_v2,
	out,
	)
	figures["compare_auroc"] = plot_auroc_comparison_bars(m1, m2, label_names, label_v1, label_v2, out)
	figures["compare_confusion"] = plot_confusion_matrices_two_runs(
	z1["y_true"],
	z1["y_pred_binary"],
	z2["y_true"],
	z2["y_pred_binary"],
	label_names,
	label_v1,
	label_v2,
	out,
	)
	figures["compare_cooc"] = plot_label_cooccurrence_four(
	z1["y_true"],
	z1["y_pred_binary"],
	z2["y_true"],
	z2["y_pred_binary"],
	label_names,
	(f"{label_v1} GT", f"{label_v1} pred", f"{label_v2} GT", f"{label_v2} pred"),
	out,
	)
	figures["compare_proba"] = plot_probability_distributions_two_runs(
	z1["y_true"],
	z1["y_pred_proba"],
	z2["y_true"],
	z2["y_pred_proba"],
	label_names,
	label_v1,
	label_v2,
	out,
	)
	figures["compare_thr"] = plot_threshold_analysis_two_runs(
	z1["y_true"],
	z1["y_pred_proba"],
	z2["y_true"],
	z2["y_pred_proba"],
	label_names,
	label_v1,
	label_v2,
	out,
	)

	if mlflow_log and mlflow is not None:
	try:
	if mlflow.active_run() is not None:
	for fname in (
	"compare_training_val_loss.png",
	"compare_val_macro_f1.png",
	"compare_per_class_f1_panels.png",
	"compare_test_auroc.png",
	"compare_confusion_matrices.png",
	"compare_label_cooccurrence.png",
	"compare_probability_distributions.png",
	"compare_threshold_analysis.png",
	):
	p = out / fname
	if p.is_file():
	mlflow.log_artifact(str(p), artifact_path="comparison_plots")
	except Exception:
	pass

	return figures