# /// script # dependencies = ["numpy", "matplotlib"] # /// """ Plot training data density in Munsell hue x value space. Each cell shows the sample count for a (hue_bin, value_bin) combination, aggregated across all codes and chromas. Hue on the x-axis (0-10), value on the y-axis (1-9). Usage: uv run learning_munsell/analysis/hue_density_plot.py uv run learning_munsell/analysis/hue_density_plot.py --data data/training_data.npz """ import argparse import matplotlib.colors as mcolors import matplotlib.pyplot as plt import numpy as np def main(data_path: str = "data/training_data_large.npz") -> None: """Plot hue x value density heatmap.""" data = np.load(data_path) y_train = data["y_train"] hue = y_train[:, 0] value = y_train[:, 1] hue_bins = np.linspace(0, 10, 41) # 0.25 step value_bins = np.linspace(0, 10, 41) hist, _, _ = np.histogram2d(hue, value, bins=[hue_bins, value_bins]) # Log scale to reveal structure across the full dynamic range. hist_log = hist.copy() hist_log[hist_log == 0] = np.nan fig, ax = plt.subplots(figsize=(14, 5)) im = ax.pcolormesh( hue_bins, value_bins, hist_log.T, cmap="inferno", shading="flat", norm=mcolors.LogNorm(vmin=1, vmax=hist.max()), ) ax.set_xlabel("Hue (0-10)") ax.set_ylabel("Value (0-10)") ax.set_title( f"Training Data Density in Munsell Space " f"({len(y_train):,} samples, all codes, log scale)\n" f"hue < 1: {(hue < 1).sum():,} " f"({(hue < 1).mean() * 100:.1f}%) | " f"hue < 2: {(hue < 2).sum():,} " f"({(hue < 2).mean() * 100:.1f}%)" ) fig.colorbar(im, ax=ax, label="Sample count") output = "docs/training_data_large_hue_density.png" fig.savefig(output, dpi=150, bbox_inches="tight") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--data", default="data/training_data_large.npz", help="Path to training data .npz file", ) main(parser.parse_args().data)