riazmo Claude Opus 4.6 commited on
Commit
ed82870
·
1 Parent(s): db8862f

fix: color classifier — strict palette mode, max 4 per hue, no .2/.3 suffixes

Browse files

- Fix hue boundary: blue ≤240°, purple starts >240° (was 260°)
- Add hex validation: reject malformed hex values like #faceb
- New shade distribution: evenly-spaced slots (1→[500], 2→[300,700], etc.)
- Cap palette to 4 colors per hue family, drop excess
- Aggressive same-hue dedup threshold raised to 50
- Remove .2/.3 collision suffix — root cause fixed instead
- Target output: 15-25 clean tokens

All 113 tests pass.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. core/color_classifier.py +206 -84
  2. core/color_utils.py +2 -2
core/color_classifier.py CHANGED
@@ -204,9 +204,21 @@ def classify_colors(
204
  # STEP 1: Build flat color list with metadata
205
  # =========================================================================
206
  raw_colors = []
 
207
  for name, c in colors_dict.items():
208
  hex_val = c.value if hasattr(c, 'value') else c.get('value', '')
209
  hex_val = normalize_hex(hex_val)
 
 
 
 
 
 
 
 
 
 
 
210
  freq = c.frequency if hasattr(c, 'frequency') else c.get('frequency', 0)
211
  css_props = c.css_properties if hasattr(c, 'css_properties') else c.get('css_properties', [])
212
  elements = c.elements if hasattr(c, 'elements') else c.get('elements', [])
@@ -226,7 +238,7 @@ def classify_colors(
226
  "hue_family": categorize_color(hex_val),
227
  })
228
 
229
- log(f"Input: {len(raw_colors)} unique colors")
230
 
231
  # =========================================================================
232
  # STEP 2: Classify each color by CSS evidence
@@ -364,7 +376,10 @@ def _classify_single_color(c: dict) -> str:
364
  def _aggressive_dedup(colors: list[dict], log) -> list[dict]:
365
  """
366
  Aggressively merge similar colors WITHIN the same category.
367
- Threshold: RGB distance < 30 for same-category colors.
 
 
 
368
  """
369
  # Group by category
370
  by_category = {}
@@ -382,49 +397,68 @@ def _aggressive_dedup(colors: list[dict], log) -> list[dict]:
382
  result.extend(cat_colors)
383
  continue
384
 
385
- # Sort by frequency (most used first — these survive merges)
386
- cat_colors.sort(key=lambda x: -x["frequency"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
- merged = []
389
- used = set()
390
 
391
- for i, c1 in enumerate(cat_colors):
392
- if i in used:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  continue
 
 
 
 
394
 
395
- group = [c1]
396
- for j, c2 in enumerate(cat_colors[i+1:], i+1):
397
- if j in used:
398
- continue
399
- dist = _rgb_distance(c1["hex"], c2["hex"])
400
- if dist < 30:
401
- group.append(c2)
402
- used.add(j)
403
-
404
- # Merge into the highest-frequency color
405
- primary = group[0]
406
- merged_hexes = []
407
- for other in group[1:]:
408
- primary["frequency"] += other["frequency"]
409
- primary["css_properties"] = list(set(primary["css_properties"] + other["css_properties"]))
410
- primary["elements"] = list(set(primary["elements"] + other["elements"]))
411
- primary["contexts"] = list(set(primary["contexts"] + other["contexts"]))
412
- merged_hexes.append(other["hex"])
413
-
414
- primary["merged_from"] = merged_hexes
415
- merged.append(primary)
416
- used.add(i)
417
-
418
- if merged_hexes:
419
- total_merged += len(merged_hexes)
420
- log(f"[DEDUP] {cat}: {primary['hex']} absorbed {merged_hexes} (dist<30)")
421
-
422
- result.extend(merged)
423
 
424
- if total_merged > 0:
425
- log(f"[DEDUP] Total: {total_merged} near-duplicate colors merged")
 
426
 
427
- return result
 
 
 
 
428
 
429
 
430
  # =============================================================================
@@ -438,14 +472,18 @@ CATEGORY_CAPS = {
438
  "bg": 3, # primary, secondary, tertiary
439
  "border": 3, # light, default, dark
440
  "feedback": 4, # error, warning, success, info
441
- "palette": 20, # generous cap for remaining
442
  }
443
 
 
 
 
444
 
445
  def _cap_per_category(colors: list[dict], log) -> list[dict]:
446
  """
447
- Limit colors per category. Excess become palette colors.
448
- Within each category, keep by frequency (most used survives).
 
449
  """
450
  by_category = {}
451
  for c in colors:
@@ -457,21 +495,31 @@ def _cap_per_category(colors: list[dict], log) -> list[dict]:
457
  result = []
458
 
459
  for cat, cat_colors in by_category.items():
460
- cap = CATEGORY_CAPS.get(cat, 10)
461
  cat_colors.sort(key=lambda x: -x["frequency"])
462
 
463
- kept = cat_colors[:cap]
464
- overflow = cat_colors[cap:]
465
-
466
- result.extend(kept)
467
-
468
- # Overflow colors become palette
469
- for c in overflow:
470
- old_cat = c["category"]
471
- c["category"] = "palette"
472
- log(f"[CAP] {c['hex']} demoted: {old_cat} → palette (category full, freq={c['frequency']})")
473
-
474
- result.extend(overflow)
 
 
 
 
 
 
 
 
 
 
 
475
 
476
  return result
477
 
@@ -483,6 +531,10 @@ def _cap_per_category(colors: list[dict], log) -> list[dict]:
483
  def _assign_names(colors: list[dict], convention: str, log) -> list[ClassifiedColor]:
484
  """
485
  Assign final token names based on chosen convention.
 
 
 
 
486
  """
487
  conv = CONVENTIONS.get(convention, CONVENTIONS["semantic"])
488
  prefix = conv["prefix"]
@@ -507,28 +559,16 @@ def _assign_names(colors: list[dict], convention: str, log) -> list[ClassifiedCo
507
  # Sort by frequency for consistent ordering
508
  cat_colors.sort(key=lambda x: -x["frequency"])
509
 
510
- for idx, c in enumerate(cat_colors):
511
- name_cat = cat # Local var don't override loop variable
 
 
512
 
 
513
  if cat == "feedback":
514
  role = _assign_feedback_role(c, idx, by_category.get("feedback", []))
515
- elif cat == "palette":
516
- # Palette: use hue family + numeric shade (ALWAYS)
517
- name_cat = c["hue_family"] # Override with hue family
518
- parsed = parse_color(c["hex"])
519
- if parsed:
520
- role = _lightness_to_shade(parsed.hsl[2])
521
- else:
522
- role = "500"
523
- elif convention == "semantic":
524
- # Semantic: use role names (primary, secondary, muted, etc.)
525
- role_names = ROLE_SHADE_NAMES.get(c["category"], ["primary", "secondary", "tertiary"])
526
- if idx < len(role_names):
527
- role = role_names[idx]
528
- else:
529
- role = f"{idx + 1}"
530
  else:
531
- # Tailwind/Material: even role colors get descriptive names
532
  role_names = ROLE_SHADE_NAMES.get(c["category"], ["primary", "secondary", "tertiary"])
533
  if idx < len(role_names):
534
  role = role_names[idx]
@@ -537,21 +577,20 @@ def _assign_names(colors: list[dict], convention: str, log) -> list[ClassifiedCo
537
 
538
  # Build token name
539
  if convention == "tailwind":
540
- token_name = f"{name_cat}{sep}{role}"
541
  else:
542
- token_name = f"{prefix}{name_cat}{sep}{role}"
543
-
544
- # Handle name collisions
545
- base_name = token_name
546
- suffix = 2
547
- while token_name in used_names:
548
- token_name = f"{base_name}{sep}{suffix}"
549
- suffix += 1
 
550
  used_names.add(token_name)
551
 
552
- # Build evidence
553
  evidence = _build_evidence(c)
554
-
555
  log(f"[NAME] {c['hex']} → {token_name} ({c['category']}, freq={c['frequency']})")
556
 
557
  result.append(ClassifiedColor(
@@ -574,6 +613,89 @@ def _assign_names(colors: list[dict], convention: str, log) -> list[ClassifiedCo
574
  return result
575
 
576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  def _assign_feedback_role(c: dict, idx: int, all_feedback: list) -> str:
578
  """Assign feedback role by hue matching."""
579
  hue = c["hue"]
 
204
  # STEP 1: Build flat color list with metadata
205
  # =========================================================================
206
  raw_colors = []
207
+ skipped_invalid = 0
208
  for name, c in colors_dict.items():
209
  hex_val = c.value if hasattr(c, 'value') else c.get('value', '')
210
  hex_val = normalize_hex(hex_val)
211
+
212
+ # Validate hex: must be exactly #RRGGBB (7 chars) or #RGB (4 chars)
213
+ if not hex_val or len(hex_val) not in (4, 7) or not hex_val.startswith('#'):
214
+ skipped_invalid += 1
215
+ continue
216
+ # Verify all chars after # are hex digits
217
+ hex_digits = hex_val[1:]
218
+ if not all(ch in '0123456789abcdefABCDEF' for ch in hex_digits):
219
+ skipped_invalid += 1
220
+ continue
221
+
222
  freq = c.frequency if hasattr(c, 'frequency') else c.get('frequency', 0)
223
  css_props = c.css_properties if hasattr(c, 'css_properties') else c.get('css_properties', [])
224
  elements = c.elements if hasattr(c, 'elements') else c.get('elements', [])
 
238
  "hue_family": categorize_color(hex_val),
239
  })
240
 
241
+ log(f"Input: {len(raw_colors)} unique colors" + (f" ({skipped_invalid} invalid hex values skipped)" if skipped_invalid else ""))
242
 
243
  # =========================================================================
244
  # STEP 2: Classify each color by CSS evidence
 
376
  def _aggressive_dedup(colors: list[dict], log) -> list[dict]:
377
  """
378
  Aggressively merge similar colors WITHIN the same category.
379
+
380
+ Thresholds:
381
+ - Semantic categories (brand, text, bg, border, feedback): RGB distance < 30
382
+ - Palette: RGB distance < 50 within same hue family (more aggressive)
383
  """
384
  # Group by category
385
  by_category = {}
 
397
  result.extend(cat_colors)
398
  continue
399
 
400
+ if cat == "palette":
401
+ # For palette: dedup within each hue family with higher threshold
402
+ by_hue = {}
403
+ for c in cat_colors:
404
+ hf = c["hue_family"]
405
+ if hf not in by_hue:
406
+ by_hue[hf] = []
407
+ by_hue[hf].append(c)
408
+
409
+ for hue_fam, hue_colors in by_hue.items():
410
+ merged_hue, merged_count = _dedup_group(hue_colors, threshold=50, label=f"palette/{hue_fam}", log=log)
411
+ result.extend(merged_hue)
412
+ total_merged += merged_count
413
+ else:
414
+ merged_cat, merged_count = _dedup_group(cat_colors, threshold=30, label=cat, log=log)
415
+ result.extend(merged_cat)
416
+ total_merged += merged_count
417
 
418
+ if total_merged > 0:
419
+ log(f"[DEDUP] Total: {total_merged} near-duplicate colors merged")
420
 
421
+ return result
422
+
423
+
424
+ def _dedup_group(colors: list[dict], threshold: float, label: str, log) -> tuple[list[dict], int]:
425
+ """Dedup a group of colors with given RGB distance threshold."""
426
+ colors.sort(key=lambda x: -x["frequency"])
427
+ merged = []
428
+ used = set()
429
+ merged_count = 0
430
+
431
+ for i, c1 in enumerate(colors):
432
+ if i in used:
433
+ continue
434
+
435
+ group = [c1]
436
+ for j, c2 in enumerate(colors[i+1:], i+1):
437
+ if j in used:
438
  continue
439
+ dist = _rgb_distance(c1["hex"], c2["hex"])
440
+ if dist < threshold:
441
+ group.append(c2)
442
+ used.add(j)
443
 
444
+ primary = group[0]
445
+ merged_hexes = []
446
+ for other in group[1:]:
447
+ primary["frequency"] += other["frequency"]
448
+ primary["css_properties"] = list(set(primary["css_properties"] + other["css_properties"]))
449
+ primary["elements"] = list(set(primary["elements"] + other["elements"]))
450
+ primary["contexts"] = list(set(primary["contexts"] + other["contexts"]))
451
+ merged_hexes.append(other["hex"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
+ primary["merged_from"] = merged_hexes
454
+ merged.append(primary)
455
+ used.add(i)
456
 
457
+ if merged_hexes:
458
+ merged_count += len(merged_hexes)
459
+ log(f"[DEDUP] {label}: {primary['hex']} absorbed {len(merged_hexes)} similar (dist<{threshold})")
460
+
461
+ return merged, merged_count
462
 
463
 
464
  # =============================================================================
 
472
  "bg": 3, # primary, secondary, tertiary
473
  "border": 3, # light, default, dark
474
  "feedback": 4, # error, warning, success, info
475
+ "palette": 999, # palette cap is enforced per-hue-family below
476
  }
477
 
478
+ # Maximum palette colors PER hue family (e.g., max 4 blues, max 4 reds)
479
+ PALETTE_PER_HUE_CAP = 4
480
+
481
 
482
  def _cap_per_category(colors: list[dict], log) -> list[dict]:
483
  """
484
+ Limit colors per category. Excess get dropped (not demoted).
485
+ For palette: enforce a per-hue-family cap (max 4 per hue).
486
+ Within each group, keep highest-frequency colors.
487
  """
488
  by_category = {}
489
  for c in colors:
 
495
  result = []
496
 
497
  for cat, cat_colors in by_category.items():
 
498
  cat_colors.sort(key=lambda x: -x["frequency"])
499
 
500
+ if cat == "palette":
501
+ # Enforce per-hue-family cap
502
+ by_hue = {}
503
+ for c in cat_colors:
504
+ hf = c["hue_family"]
505
+ if hf not in by_hue:
506
+ by_hue[hf] = []
507
+ by_hue[hf].append(c)
508
+
509
+ for hue_fam, hue_colors in by_hue.items():
510
+ hue_colors.sort(key=lambda x: -x["frequency"])
511
+ kept = hue_colors[:PALETTE_PER_HUE_CAP]
512
+ dropped = hue_colors[PALETTE_PER_HUE_CAP:]
513
+ result.extend(kept)
514
+ if dropped:
515
+ log(f"[CAP] {hue_fam}: kept top {len(kept)}, dropped {len(dropped)} low-freq palette colors")
516
+ else:
517
+ cap = CATEGORY_CAPS.get(cat, 3)
518
+ kept = cat_colors[:cap]
519
+ dropped = cat_colors[cap:]
520
+ result.extend(kept)
521
+ if dropped:
522
+ log(f"[CAP] {cat}: kept {len(kept)}, dropped {len(dropped)} overflow colors")
523
 
524
  return result
525
 
 
531
  def _assign_names(colors: list[dict], convention: str, log) -> list[ClassifiedColor]:
532
  """
533
  Assign final token names based on chosen convention.
534
+
535
+ For palette colors: distributes across unique shade slots per hue family
536
+ (no .2/.3 suffixes). If 4 blues exist, they get shades spread across the
537
+ full 50-900 range based on relative lightness ordering.
538
  """
539
  conv = CONVENTIONS.get(convention, CONVENTIONS["semantic"])
540
  prefix = conv["prefix"]
 
559
  # Sort by frequency for consistent ordering
560
  cat_colors.sort(key=lambda x: -x["frequency"])
561
 
562
+ if cat == "palette":
563
+ # PALETTE: Group by hue family, then distribute across shade slots
564
+ result.extend(_assign_palette_names(cat_colors, convention, prefix, sep, used_names, log))
565
+ continue
566
 
567
+ for idx, c in enumerate(cat_colors):
568
  if cat == "feedback":
569
  role = _assign_feedback_role(c, idx, by_category.get("feedback", []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  else:
571
+ # Semantic / Tailwind / Material: use role names
572
  role_names = ROLE_SHADE_NAMES.get(c["category"], ["primary", "secondary", "tertiary"])
573
  if idx < len(role_names):
574
  role = role_names[idx]
 
577
 
578
  # Build token name
579
  if convention == "tailwind":
580
+ token_name = f"{cat}{sep}{role}"
581
  else:
582
+ token_name = f"{prefix}{cat}{sep}{role}"
583
+
584
+ # Collision guard (should be rare for non-palette)
585
+ if token_name in used_names:
586
+ base_name = token_name
587
+ suffix = 2
588
+ while token_name in used_names:
589
+ token_name = f"{base_name}{sep}{suffix}"
590
+ suffix += 1
591
  used_names.add(token_name)
592
 
 
593
  evidence = _build_evidence(c)
 
594
  log(f"[NAME] {c['hex']} → {token_name} ({c['category']}, freq={c['frequency']})")
595
 
596
  result.append(ClassifiedColor(
 
613
  return result
614
 
615
 
616
+ # Shade slots ordered by lightness (lightest first)
617
+ _SHADE_SLOTS = ["50", "100", "200", "300", "400", "500", "600", "700", "800", "900"]
618
+
619
+
620
+ def _assign_palette_names(
621
+ palette_colors: list[dict],
622
+ convention: str,
623
+ prefix: str,
624
+ sep: str,
625
+ used_names: set,
626
+ log,
627
+ ) -> list[ClassifiedColor]:
628
+ """
629
+ Assign palette names by hue family with unique shade per color.
630
+
631
+ For N colors in a hue family, picks N evenly-spaced shade slots
632
+ sorted by lightness (lightest color → lightest shade).
633
+ No .2/.3 suffixes ever.
634
+ """
635
+ # Group by hue family
636
+ by_hue = {}
637
+ for c in palette_colors:
638
+ hf = c["hue_family"]
639
+ if hf not in by_hue:
640
+ by_hue[hf] = []
641
+ by_hue[hf].append(c)
642
+
643
+ result = []
644
+
645
+ for hue_fam, hue_colors in sorted(by_hue.items()):
646
+ n = len(hue_colors)
647
+
648
+ # Sort by luminance: lightest first → gets lightest shade slot
649
+ hue_colors.sort(key=lambda x: -x["luminance"])
650
+
651
+ # Pick N evenly-spaced shade slots from the 10 available
652
+ if n == 1:
653
+ slots = ["500"]
654
+ elif n == 2:
655
+ slots = ["300", "700"]
656
+ elif n == 3:
657
+ slots = ["200", "500", "800"]
658
+ elif n == 4:
659
+ slots = ["100", "400", "600", "900"]
660
+ else:
661
+ # For n > 4 (shouldn't happen with cap=4, but safety)
662
+ step = max(1, len(_SHADE_SLOTS) // n)
663
+ slots = _SHADE_SLOTS[::step][:n]
664
+
665
+ for idx, c in enumerate(hue_colors):
666
+ role = slots[idx] if idx < len(slots) else str((idx + 1) * 100)
667
+
668
+ if convention == "tailwind":
669
+ token_name = f"{hue_fam}{sep}{role}"
670
+ else:
671
+ token_name = f"{prefix}{hue_fam}{sep}{role}"
672
+
673
+ # Name should be unique (cap guarantees max 4 per hue)
674
+ used_names.add(token_name)
675
+
676
+ evidence = _build_evidence(c)
677
+ log(f"[NAME] {c['hex']} → {token_name} (palette/{hue_fam}, freq={c['frequency']})")
678
+
679
+ result.append(ClassifiedColor(
680
+ hex=c["hex"],
681
+ frequency=c["frequency"],
682
+ category="palette",
683
+ role=role,
684
+ token_name=token_name,
685
+ evidence=evidence,
686
+ confidence="high" if c["frequency"] > 10 else "medium" if c["frequency"] > 3 else "low",
687
+ css_properties=c["css_properties"],
688
+ elements=c["elements"],
689
+ contexts=c["contexts"],
690
+ merged_from=c.get("merged_from", []),
691
+ hue_family=hue_fam,
692
+ luminance=c["luminance"],
693
+ saturation=c["saturation"],
694
+ ))
695
+
696
+ return result
697
+
698
+
699
  def _assign_feedback_role(c: dict, idx: int, all_feedback: list) -> str:
700
  """Assign feedback role by hue matching."""
701
  hue = c["hue"]
core/color_utils.py CHANGED
@@ -354,9 +354,9 @@ def categorize_color(color: str) -> str:
354
  return "green"
355
  elif h < 190:
356
  return "cyan"
357
- elif h < 260:
358
  return "blue"
359
- elif h < 290:
360
  return "purple"
361
  else:
362
  return "pink"
 
354
  return "green"
355
  elif h < 190:
356
  return "cyan"
357
+ elif h <= 240:
358
  return "blue"
359
+ elif h < 295:
360
  return "purple"
361
  else:
362
  return "pink"