learning-munsell / learning_munsell /analysis /hue_density_plot.py
KelSolaar's picture
Initial commit.
3c7db92
# /// script
# dependencies = ["numpy", "matplotlib"]
# ///
"""
Plot training data density in Munsell hue x value space.
Each cell shows the sample count for a (hue_bin, value_bin) combination,
aggregated across all codes and chromas. Hue on the x-axis (0-10),
value on the y-axis (1-9).
Usage:
uv run learning_munsell/analysis/hue_density_plot.py
uv run learning_munsell/analysis/hue_density_plot.py --data data/training_data.npz
"""
import argparse
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
def main(data_path: str = "data/training_data_large.npz") -> None:
"""Plot hue x value density heatmap."""
data = np.load(data_path)
y_train = data["y_train"]
hue = y_train[:, 0]
value = y_train[:, 1]
hue_bins = np.linspace(0, 10, 41) # 0.25 step
value_bins = np.linspace(0, 10, 41)
hist, _, _ = np.histogram2d(hue, value, bins=[hue_bins, value_bins])
# Log scale to reveal structure across the full dynamic range.
hist_log = hist.copy()
hist_log[hist_log == 0] = np.nan
fig, ax = plt.subplots(figsize=(14, 5))
im = ax.pcolormesh(
hue_bins,
value_bins,
hist_log.T,
cmap="inferno",
shading="flat",
norm=mcolors.LogNorm(vmin=1, vmax=hist.max()),
)
ax.set_xlabel("Hue (0-10)")
ax.set_ylabel("Value (0-10)")
ax.set_title(
f"Training Data Density in Munsell Space "
f"({len(y_train):,} samples, all codes, log scale)\n"
f"hue < 1: {(hue < 1).sum():,} "
f"({(hue < 1).mean() * 100:.1f}%) | "
f"hue < 2: {(hue < 2).sum():,} "
f"({(hue < 2).mean() * 100:.1f}%)"
)
fig.colorbar(im, ax=ax, label="Sample count")
output = "docs/training_data_large_hue_density.png"
fig.savefig(output, dpi=150, bbox_inches="tight")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--data",
default="data/training_data_large.npz",
help="Path to training data .npz file",
)
main(parser.parse_args().data)