FOXES / analysis /spatial_performance.py

refactor configuration files and update paths

0affdc2 9 days ago

11.4 kB

	"""
	Flux-Weighted Error Heatmap on Solar Disk
	==========================================
	For each matched flux map, accumulates:
	mae_sum[i,j] += flux[i,j] * \|log10 error\|
	bias_sum[i,j] += flux[i,j] * log10 error
	weight[i,j] += flux[i,j]

	Then normalizes to get flux-weighted mean error per patch.

	Usage
	-----
	python analysis/spatial_performance.py

	Outputs
	-------
	analysis/flux_weighted_errors_t0.npz — accumulation cache
	analysis/performance_heatmap_all.png
	"""

	import os
	import sys
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from concurrent.futures import ProcessPoolExecutor, as_completed

	from matplotlib.colors import LogNorm
	from tqdm import tqdm
	from pathlib import Path
	from cmap import Colormap

	PROJECT_ROOT = Path(__file__).parent.parent
	sys.path.insert(0, str(PROJECT_ROOT))
	from forecasting.inference.evaluation import setup_barlow_font

	# ---------------------------------------------------------------------------
	# Paths — override via CLI args or environment variables
	# ---------------------------------------------------------------------------
	FLUX_DIR = os.environ.get("FOXES_FLUX_DIR", "")
	PREDICTIONS_CSV = os.environ.get("FOXES_PREDICTIONS_CSV", "")
	OUT_DIR = Path(__file__).parent
	GRID_SIZE = 64 # 512px / 8px patch size
	BIN_SIZE = 1 # downsample factor (1 = full 64×64 resolution)
	CROP_FACTOR = 1.1 # AIA images cropped at 1.1 solar radii
	SOLAR_RADIUS_PATCHES = (GRID_SIZE / 2) / CROP_FACTOR # ≈ 29.1 patches

	# Patches beyond ±PATCH_CROP_RADIUS from center (in original 64×64 patch units) are masked.
	PATCH_CROP_RADIUS = 24

	# Percentile cap for colorbar scaling (applied to non-NaN values).
	# e.g. 99 clips the top 1% of values so detail in the bulk is visible.
	VMAX_PERCENTILE = 99

	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------

	def normalize_ts(series: pd.Series) -> pd.Series:
	return pd.to_datetime(
	series.astype(str).str.replace("_", ":", regex=False), utc=False,
	).dt.floor("s")


	def _ts_key(fpath: str) -> str:
	raw = os.path.basename(fpath).replace('.npy', '').replace('_', ':')
	return pd.Timestamp(raw).floor('s').isoformat()


	def load_predictions(predictions_csv: str) -> pd.DataFrame:
	df = pd.read_csv(predictions_csv)
	df["timestamp"] = normalize_ts(df["timestamp"])
	df["log_pred"] = np.log10(df["predictions"])
	df["log_gt"] = np.log10(df["groundtruth"])
	df["log_error"] = df["log_pred"] - df["log_gt"]
	df["log_abs_error"] = df["log_error"].abs()
	print(f"Loaded {len(df)} predictions")
	return df


	# ---------------------------------------------------------------------------
	# Heatmap accumulation
	# ---------------------------------------------------------------------------

	# NOTE: module-level for ProcessPoolExecutor (spawn on macOS)
	def _accumulate_flux_map(args):
	fpath, log_abs_error, log_error, bin_size = args
	fmap = np.load(fpath).astype(np.float64)

	active = fmap[fmap > 0]
	if active.size == 0:
	return None

	fmap = np.where(fmap > 0, fmap, 0.0)

	# Spatially bin before normalization — sum preserves relative log-flux within each bin
	if bin_size > 1:
	h, w = fmap.shape
	bh, bw = h // bin_size, w // bin_size
	fmap = fmap[:bh * bin_size, :bw * bin_size].reshape(bh, bin_size, bw, bin_size).sum(axis=(1, 3))

	total = fmap.sum()
	if total == 0:
	return None
	fmap = fmap / total # normalise: each timestamp contributes equal total weight
	return fmap * log_abs_error, fmap * log_error, fmap


	def _crop_mask(shape: tuple, bin_size: int, radius: int = PATCH_CROP_RADIUS) -> np.ndarray:
	"""True for patches within ±radius original-grid patches from center (y-axis only)."""
	n = shape[0]
	r_binned = radius / bin_size
	cy = (n - 1) / 2
	y = np.ogrid[:n, :n][0]
	return np.abs(y - cy) <= r_binned

	def compute_flux_weighted_errors(flux_dir: str, df: pd.DataFrame, cache_path: Path,
	bin_size: int = BIN_SIZE) -> dict:
	cache_path = cache_path.with_stem(f"{cache_path.stem}_b{bin_size}")

	if cache_path.exists():
	print(f"Loading cached flux-weighted error maps from {cache_path}")
	data = np.load(cache_path)
	n = float(data['count'])
	w = data['flux_distribution']
	mask = _crop_mask(w.shape, bin_size)
	mae = np.where(mask, data['mae_sum'] / w, np.nan) if n > 0 else np.full_like(w, np.nan)
	bias = np.where(mask, data['bias_sum'] / w, np.nan) if n > 0 else np.full_like(w, np.nan)
	return mae, bias, w

	lookup = {}
	for _, row in df.iterrows():
	key = pd.Timestamp(row['timestamp']).floor('s').isoformat()
	lookup[key] = (float(row['log_abs_error']), float(row['log_error']))

	binned_grid = GRID_SIZE // bin_size
	shape = (binned_grid, binned_grid)
	mae_sum = np.zeros(shape)
	bias_sum = np.zeros(shape)
	flux_distribution = np.zeros(shape)
	count = 0

	files = sorted([os.path.join(flux_dir, f)
	for f in os.listdir(flux_dir) if f.endswith('.npy')])
	args_list = []
	for fpath in files:
	try:
	ts_key = _ts_key(fpath)
	except Exception:
	continue
	if ts_key not in lookup:
	continue
	abs_err, err = lookup[ts_key]
	args_list.append((fpath, abs_err, err, bin_size))

	print(f"Matched {len(args_list)} / {len(files)} flux maps")

	with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
	futures = {executor.submit(_accumulate_flux_map, a): i
	for i, a in enumerate(args_list)}
	for future in tqdm(as_completed(futures), total=len(args_list),
	desc="Accumulating flux-weighted errors"):
	result = future.result()
	if result is None:
	continue
	mae_c, bias_c, flux_c = result
	mae_sum += mae_c
	bias_sum += bias_c
	flux_distribution += flux_c
	count += 1

	np.savez(cache_path, mae_sum=mae_sum, bias_sum=bias_sum,
	flux_distribution=flux_distribution, count=np.array(count))
	print(f"Saved → {cache_path}")

	mask = _crop_mask(shape, bin_size)
	mae = np.where(mask, mae_sum / flux_distribution, np.nan) if count > 0 else np.full(shape, np.nan)
	bias = np.where(mask, bias_sum / flux_distribution, np.nan) if count > 0 else np.full(shape, np.nan)
	return mae, bias, flux_distribution


	# ---------------------------------------------------------------------------
	# Plot
	# ---------------------------------------------------------------------------

	def _bin_grid(grid: np.ndarray, bin_size: int) -> np.ndarray:
	if bin_size == 1:
	return grid
	h, w = grid.shape
	bh, bw = h // bin_size, w // bin_size
	cropped = grid[:bh * bin_size, :bw * bin_size]
	return np.nanmean(cropped.reshape(bh, bin_size, bw, bin_size), axis=(1, 3))


	def plot_flux_weighted_heatmap(mae_grid: np.ndarray, bias_grid: np.ndarray,
	weight_grid: np.ndarray, out_path: Path,
	subtitle: str = "", bin_size: int = BIN_SIZE,
	vmax_pct: int = VMAX_PERCENTILE):
	setup_barlow_font()
	text_color = "#111111"
	theta = np.linspace(0, 2 * np.pi, 300)

	# Grids are already pre-binned during accumulation — use directly
	mae_b = mae_grid
	bias_b = bias_grid
	n_bins = mae_b.shape[0]
	cy, cx = n_bins / 2, n_bins / 2

	# Solar limb radius in binned-patch units
	r_limb = SOLAR_RADIUS_PATCHES / bin_size

	mae_vmax = np.nanpercentile(mae_b, vmax_pct)
	mae_norm = plt.Normalize(vmin=0, vmax=mae_vmax)

	bias_cap = np.nanpercentile(np.abs(bias_b), vmax_pct)
	bias_norm = plt.Normalize(vmin=-bias_cap, vmax=bias_cap)

	panels = [
	(mae_b, r"Normalized Flux-Weighted MAE", Colormap('cmocean:thermal').to_mpl(), mae_norm),
	(bias_b, r"Normalized Flux-Weighted MBE", Colormap('cmasher:fusion_r').to_mpl(), bias_norm),
	# (np.log10(np.where(weight_b > 0, weight_b, np.nan)),
	# r"log$_{10}$ Accumulated Flux", "viridis", None),
	]

	fig, axes = plt.subplots(1, 2, figsize=(10, 5))
	fig.patch.set_facecolor("white")

	for ax, (grid, title, cmap, norm) in zip(axes, panels):
	im = ax.imshow(grid, origin="lower", cmap=cmap, norm=norm,
	interpolation="bicubic", extent=[0, n_bins, 0, n_bins])
	cbar = fig.colorbar(im, ax=ax, shrink=0.82,norm=LogNorm(vmin=0, vmax=mae_vmax),)
	cbar.ax.tick_params(labelsize=9, colors=text_color)
	def _fmt(x, _):
	m, e = f"{x:.2e}".split("e")
	return f"{m}e{int(e)}"
	cbar.ax.yaxis.set_major_formatter(plt.matplotlib.ticker.FuncFormatter(_fmt))
	for lbl in cbar.ax.get_yticklabels():
	lbl.set_fontfamily("Barlow")
	lbl.set_fontsize(9)
	lbl.set_color(text_color)
	#cbar.set_label(title, fontsize=9, color=text_color, fontfamily="Barlow")

	ax.plot(cx + r_limb * np.cos(theta), cy + r_limb * np.sin(theta),
	color="#4488FF", linestyle="--", linewidth=1.2, alpha=0.8,
	label=f"Solar Limb")

	tick_bins = np.linspace(0, n_bins, 7)
	tick_labels = [f"{int((t - n_bins / 2) * bin_size)}" for t in tick_bins]
	ax.set_xticks(tick_bins); ax.set_xticklabels(tick_labels)
	ax.set_yticks(tick_bins); ax.set_yticklabels(tick_labels)

	ax.set_title(title, fontsize=10, color=text_color, fontfamily="Barlow",)
	ax.set_xlabel("Solar X (ViT Patches From Center)", fontsize=9,
	color=text_color, fontfamily="Barlow")
	ax.set_ylabel("Solar Y (ViT Patches From Center)", fontsize=9,
	color=text_color, fontfamily="Barlow")
	ax.tick_params(labelsize=8, colors=text_color)
	ax.legend(fontsize=7, facecolor="white", edgecolor="grey", loc="upper right",)
	for spine in ax.spines.values():
	spine.set_color(text_color)

	plt.tight_layout()
	plt.savefig(out_path, dpi=400, bbox_inches="tight", facecolor="white")
	plt.show()
	print(f"Saved → {out_path}")


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------

	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument("--flux_dir", default=FLUX_DIR)
	parser.add_argument("--predictions_csv", default=PREDICTIONS_CSV)
	parser.add_argument("--out_dir", default=str(OUT_DIR))
	args = parser.parse_args()

	out = Path(args.out_dir)
	out.mkdir(parents=True, exist_ok=True)

	df = load_predictions(args.predictions_csv)

	mae, bias, weight = compute_flux_weighted_errors(
	args.flux_dir, df, out / "flux_weighted_errors.npz"
	)
	plot_flux_weighted_heatmap(mae, bias, weight,
	out / "performance_heatmap_all.png",
	subtitle="All flares")