| |
| |
| |
| """ |
| Plot training data density in Munsell hue x value space. |
| |
| Each cell shows the sample count for a (hue_bin, value_bin) combination, |
| aggregated across all codes and chromas. Hue on the x-axis (0-10), |
| value on the y-axis (1-9). |
| |
| Usage: |
| uv run learning_munsell/analysis/hue_density_plot.py |
| uv run learning_munsell/analysis/hue_density_plot.py --data data/training_data.npz |
| """ |
|
|
| import argparse |
|
|
| import matplotlib.colors as mcolors |
| import matplotlib.pyplot as plt |
| import numpy as np |
|
|
|
|
| def main(data_path: str = "data/training_data_large.npz") -> None: |
| """Plot hue x value density heatmap.""" |
|
|
| data = np.load(data_path) |
| y_train = data["y_train"] |
|
|
| hue = y_train[:, 0] |
| value = y_train[:, 1] |
|
|
| hue_bins = np.linspace(0, 10, 41) |
| value_bins = np.linspace(0, 10, 41) |
|
|
| hist, _, _ = np.histogram2d(hue, value, bins=[hue_bins, value_bins]) |
|
|
| |
| hist_log = hist.copy() |
| hist_log[hist_log == 0] = np.nan |
|
|
| fig, ax = plt.subplots(figsize=(14, 5)) |
| im = ax.pcolormesh( |
| hue_bins, |
| value_bins, |
| hist_log.T, |
| cmap="inferno", |
| shading="flat", |
| norm=mcolors.LogNorm(vmin=1, vmax=hist.max()), |
| ) |
| ax.set_xlabel("Hue (0-10)") |
| ax.set_ylabel("Value (0-10)") |
| ax.set_title( |
| f"Training Data Density in Munsell Space " |
| f"({len(y_train):,} samples, all codes, log scale)\n" |
| f"hue < 1: {(hue < 1).sum():,} " |
| f"({(hue < 1).mean() * 100:.1f}%) | " |
| f"hue < 2: {(hue < 2).sum():,} " |
| f"({(hue < 2).mean() * 100:.1f}%)" |
| ) |
| fig.colorbar(im, ax=ax, label="Sample count") |
|
|
| output = "docs/training_data_large_hue_density.png" |
| fig.savefig(output, dpi=150, bbox_inches="tight") |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--data", |
| default="data/training_data_large.npz", |
| help="Path to training data .npz file", |
| ) |
| main(parser.parse_args().data) |
|
|