from sklearn.cluster import KMeans from .color_space import rgb_to_lab, lab_to_rgb LARGE_DATASET_THRESHOLD = 10000 KMEANS_INIT_ATTEMPTS_FOR_LARGE_DATA = 3 KMEANS_INIT_ATTEMPTS_FOR_SMALL_DATA = 10 KMEANS_MAX_ITERATIONS_LARGE_DATA = 100 KMEANS_MAX_ITERATIONS_SMALL_DATA = 300 RANDOM_SEED = 42 def quantize_colors(colors, target_color_count=256): if len(colors) == 0: raise ValueError("No colors to quantize") perceptual_colors = rgb_to_lab(colors) total_input_colors = len(perceptual_colors) kmeans_config = determine_kmeans_parameters(total_input_colors) actual_clusters = min(target_color_count, total_input_colors) print( f"Quantizing {total_input_colors} colors to {target_color_count} palette entries..." ) cluster_model = KMeans( n_clusters=actual_clusters, n_init=kmeans_config["init_attempts"], max_iter=kmeans_config["max_iterations"], random_state=RANDOM_SEED, ) cluster_model.fit(perceptual_colors) palette_centers_in_lab = cluster_model.cluster_centers_ palette_in_rgb = lab_to_rgb(palette_centers_in_lab) print(f"Created palette with {len(palette_in_rgb)} unique colors") return palette_in_rgb def determine_kmeans_parameters(sample_count): if sample_count >= LARGE_DATASET_THRESHOLD: return { "init_attempts": KMEANS_INIT_ATTEMPTS_FOR_LARGE_DATA, "max_iterations": KMEANS_MAX_ITERATIONS_LARGE_DATA, } else: return { "init_attempts": KMEANS_INIT_ATTEMPTS_FOR_SMALL_DATA, "max_iterations": KMEANS_MAX_ITERATIONS_SMALL_DATA, }