riazmo Claude Opus 4.6 commited on
Commit
8d1b9cb
Β·
1 Parent(s): 24adae3

v3.3: Benchmarking overhaul + naming preview fix + UX improvements

Browse files

Priority 2 β€” Make benchmarking useful:
- Add radius + shadow data to all 8 fallback benchmarks (Material,
Apple HIG, Polaris, Atlassian, Carbon, Tailwind, Ant, Chakra)
- Expand comparison from 3 metrics to 6 categories: type, spacing,
colors, radius, shadows, base size β€” each with match %
- Show per-category match table with βœ…/🟑/πŸ”΄ indicators
- Add detailed gap analysis for top benchmark (your vs benchmark values)
- Show pros/cons and alignment changes with token-type icons
- BenchmarkData now stores radius and shadows fields

Fix: Color naming convention preview not visible:
- Add naming convention dropdown + preview inside Stage 2 Colors section
(visible BEFORE export, not hidden inside collapsed Stage 3 accordion)
- Auto-generate color classification preview when Stage 2 completes
- Sync naming convention between Stage 2 and Stage 3 dropdowns
- Return auto_color_preview as 17th output from analysis function
- Update all error return paths to match new 17-value tuple

Priority 3 β€” Better UX:
- Auto-open Stage 2 accordion when extraction completes
- Auto-open Stage 3 accordion when analysis completes
- Users no longer need to manually expand hidden accordions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. agents/benchmark_researcher.py +153 -42
  2. app.py +135 -25
agents/benchmark_researcher.py CHANGED
@@ -139,13 +139,19 @@ class BenchmarkData:
139
  # Extracted specifications
140
  typography: dict = field(default_factory=dict)
141
  # Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}
142
-
143
  spacing: dict = field(default_factory=dict)
144
  # Expected: {base, scale[], grid}
145
-
146
  colors: dict = field(default_factory=dict)
147
  # Expected: {palette_size, uses_ramps, ramp_steps}
148
-
 
 
 
 
 
 
149
  # Metadata
150
  fetched_at: str = ""
151
  confidence: str = "low" # high, medium, low
@@ -162,6 +168,8 @@ class BenchmarkData:
162
  "typography": self.typography,
163
  "spacing": self.spacing,
164
  "colors": self.colors,
 
 
165
  "fetched_at": self.fetched_at,
166
  "confidence": self.confidence,
167
  "best_for": self.best_for,
@@ -170,20 +178,28 @@ class BenchmarkData:
170
 
171
  @dataclass
172
  class BenchmarkComparison:
173
- """Comparison result between user's tokens and a benchmark."""
174
  benchmark: BenchmarkData
175
  similarity_score: float # Lower = more similar
176
-
177
  # Individual comparisons
178
  type_ratio_diff: float
179
  base_size_diff: int
180
  spacing_grid_diff: int
181
-
182
- # Match percentages
183
  type_match_pct: float
184
  spacing_match_pct: float
185
- overall_match_pct: float
186
-
 
 
 
 
 
 
 
 
187
  def to_dict(self) -> dict:
188
  return {
189
  "name": self.benchmark.name,
@@ -203,11 +219,17 @@ class BenchmarkComparison:
203
  "diff": self.spacing_grid_diff,
204
  "match_pct": round(self.spacing_match_pct, 1),
205
  },
 
 
 
206
  },
207
  "benchmark_values": {
208
  "type_ratio": self.benchmark.typography.get("scale_ratio"),
209
  "base_size": self.benchmark.typography.get("base_size"),
210
  "spacing_grid": self.benchmark.spacing.get("base"),
 
 
 
211
  },
212
  "best_for": self.benchmark.best_for,
213
  "confidence": self.benchmark.confidence,
@@ -274,6 +296,8 @@ class BenchmarkCache:
274
  typography=entry.get("typography", {}),
275
  spacing=entry.get("spacing", {}),
276
  colors=entry.get("colors", {}),
 
 
277
  fetched_at=entry.get("fetched_at", ""),
278
  confidence=entry.get("confidence", "low"),
279
  source_urls=entry.get("source_urls", []),
@@ -315,42 +339,58 @@ FALLBACK_BENCHMARKS = {
315
  "material_design_3": {
316
  "typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
317
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
318
- "colors": {"palette_size": 13, "uses_ramps": True},
 
 
319
  },
320
  "apple_hig": {
321
  "typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
322
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
323
- "colors": {"palette_size": 9, "uses_ramps": True},
 
 
324
  },
325
  "shopify_polaris": {
326
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
327
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
328
- "colors": {"palette_size": 11, "uses_ramps": True},
 
 
329
  },
330
  "atlassian_design": {
331
  "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
332
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
333
- "colors": {"palette_size": 15, "uses_ramps": True},
 
 
334
  },
335
  "ibm_carbon": {
336
  "typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
337
  "spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
338
- "colors": {"palette_size": 12, "uses_ramps": True},
 
 
339
  },
340
  "tailwind_css": {
341
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
342
  "spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
343
- "colors": {"palette_size": 22, "uses_ramps": True},
 
 
344
  },
345
  "ant_design": {
346
  "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
347
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
348
- "colors": {"palette_size": 13, "uses_ramps": True},
 
 
349
  },
350
  "chakra_ui": {
351
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
352
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
353
- "colors": {"palette_size": 15, "uses_ramps": True},
 
 
354
  },
355
  }
356
 
@@ -461,6 +501,8 @@ class BenchmarkResearcher:
461
  typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
462
  spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
463
  colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
 
 
464
  fetched_at=datetime.now().isoformat(),
465
  confidence=confidence,
466
  source_urls=list(source["urls"].values()),
@@ -601,6 +643,8 @@ Return ONLY valid JSON, no explanation."""
601
  typography=fallback.get("typography", {}),
602
  spacing=fallback.get("spacing", {}),
603
  colors=fallback.get("colors", {}),
 
 
604
  fetched_at=datetime.now().isoformat(),
605
  confidence="fallback",
606
  best_for=source["best_for"],
@@ -618,53 +662,113 @@ Return ONLY valid JSON, no explanation."""
618
  your_spacing_grid: int,
619
  benchmarks: list[BenchmarkData],
620
  log_callback: Callable = None,
 
 
 
621
  ) -> list[BenchmarkComparison]:
622
  """
623
- Compare user's tokens to researched benchmarks.
624
-
625
  Args:
626
  your_ratio: Detected type scale ratio
627
  your_base_size: Detected base font size
628
  your_spacing_grid: Detected spacing grid base
629
  benchmarks: List of researched BenchmarkData
630
  log_callback: Function to log progress
631
-
 
 
 
632
  Returns:
633
  List of BenchmarkComparison sorted by similarity
634
  """
635
  def log(msg: str):
636
  if log_callback:
637
  log_callback(msg)
638
-
639
  log("")
640
- log(" πŸ“Š BENCHMARK COMPARISON")
641
  log(" " + "─" * 40)
642
- log(f" Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px")
 
643
  log("")
644
-
645
  comparisons = []
646
-
647
  for b in benchmarks:
648
  b_ratio = b.typography.get("scale_ratio", 1.25)
649
  b_base = b.typography.get("base_size", 16)
650
  b_grid = b.spacing.get("base", 8)
651
-
652
- # Calculate differences
 
 
 
653
  ratio_diff = abs(your_ratio - b_ratio)
654
  base_diff = abs(your_base_size - b_base)
 
 
 
655
  grid_diff = abs(your_spacing_grid - b_grid)
656
-
657
- # Calculate match percentages
658
- type_match = max(0, 100 - (ratio_diff * 100)) # 0.1 diff = 90% match
659
- spacing_match = max(0, 100 - (grid_diff * 10)) # 4px diff = 60% match
660
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  # Weighted similarity score (lower = more similar)
662
- similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3)
663
-
664
- # Overall match percentage
665
- overall_match = (type_match * 0.5) + (spacing_match * 0.3) + (100 - base_diff * 5) * 0.2
 
 
 
 
 
 
 
 
666
  overall_match = max(0, min(100, overall_match))
667
-
668
  comparisons.append(BenchmarkComparison(
669
  benchmark=b,
670
  similarity_score=similarity,
@@ -673,19 +777,26 @@ Return ONLY valid JSON, no explanation."""
673
  spacing_grid_diff=grid_diff,
674
  type_match_pct=type_match,
675
  spacing_match_pct=spacing_match,
 
 
 
676
  overall_match_pct=overall_match,
 
 
 
677
  ))
678
-
679
  # Sort by similarity (lower = better)
680
  comparisons.sort(key=lambda x: x.similarity_score)
681
-
682
- # Log results
683
  medals = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"]
684
  for i, c in enumerate(comparisons[:5]):
685
  medal = medals[i] if i < 3 else " "
686
  b = c.benchmark
687
- log(f" {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% match (score: {c.similarity_score:.2f})")
688
- log(f" └─ ratio={b.typography.get('scale_ratio')}, base={b.typography.get('base_size')}px, grid={b.spacing.get('base')}px")
 
689
 
690
  return comparisons
691
 
 
139
  # Extracted specifications
140
  typography: dict = field(default_factory=dict)
141
  # Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}
142
+
143
  spacing: dict = field(default_factory=dict)
144
  # Expected: {base, scale[], grid}
145
+
146
  colors: dict = field(default_factory=dict)
147
  # Expected: {palette_size, uses_ramps, ramp_steps}
148
+
149
+ radius: dict = field(default_factory=dict)
150
+ # Expected: {tiers, values[], strategy, grid}
151
+
152
+ shadows: dict = field(default_factory=dict)
153
+ # Expected: {levels, blur_range[], system}
154
+
155
  # Metadata
156
  fetched_at: str = ""
157
  confidence: str = "low" # high, medium, low
 
168
  "typography": self.typography,
169
  "spacing": self.spacing,
170
  "colors": self.colors,
171
+ "radius": self.radius,
172
+ "shadows": self.shadows,
173
  "fetched_at": self.fetched_at,
174
  "confidence": self.confidence,
175
  "best_for": self.best_for,
 
178
 
179
  @dataclass
180
  class BenchmarkComparison:
181
+ """Comparison result between user's tokens and a benchmark β€” ALL 6 categories."""
182
  benchmark: BenchmarkData
183
  similarity_score: float # Lower = more similar
184
+
185
  # Individual comparisons
186
  type_ratio_diff: float
187
  base_size_diff: int
188
  spacing_grid_diff: int
189
+
190
+ # Match percentages β€” all 6 categories
191
  type_match_pct: float
192
  spacing_match_pct: float
193
+ color_match_pct: float = 50.0
194
+ radius_match_pct: float = 50.0
195
+ shadow_match_pct: float = 50.0
196
+ overall_match_pct: float = 0.0
197
+
198
+ # Gap descriptions per category
199
+ color_gap: str = ""
200
+ radius_gap: str = ""
201
+ shadow_gap: str = ""
202
+
203
  def to_dict(self) -> dict:
204
  return {
205
  "name": self.benchmark.name,
 
219
  "diff": self.spacing_grid_diff,
220
  "match_pct": round(self.spacing_match_pct, 1),
221
  },
222
+ "colors": {"match_pct": round(self.color_match_pct, 1), "gap": self.color_gap},
223
+ "radius": {"match_pct": round(self.radius_match_pct, 1), "gap": self.radius_gap},
224
+ "shadows": {"match_pct": round(self.shadow_match_pct, 1), "gap": self.shadow_gap},
225
  },
226
  "benchmark_values": {
227
  "type_ratio": self.benchmark.typography.get("scale_ratio"),
228
  "base_size": self.benchmark.typography.get("base_size"),
229
  "spacing_grid": self.benchmark.spacing.get("base"),
230
+ "color_palette_size": self.benchmark.colors.get("palette_size"),
231
+ "radius_tiers": self.benchmark.radius.get("tiers") if hasattr(self.benchmark, 'radius') and self.benchmark.radius else None,
232
+ "shadow_levels": self.benchmark.shadows.get("levels") if hasattr(self.benchmark, 'shadows') and self.benchmark.shadows else None,
233
  },
234
  "best_for": self.benchmark.best_for,
235
  "confidence": self.benchmark.confidence,
 
296
  typography=entry.get("typography", {}),
297
  spacing=entry.get("spacing", {}),
298
  colors=entry.get("colors", {}),
299
+ radius=entry.get("radius", {}),
300
+ shadows=entry.get("shadows", {}),
301
  fetched_at=entry.get("fetched_at", ""),
302
  confidence=entry.get("confidence", "low"),
303
  source_urls=entry.get("source_urls", []),
 
339
  "material_design_3": {
340
  "typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
341
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
342
+ "colors": {"palette_size": 13, "uses_ramps": True, "ramp_steps": 10},
343
+ "radius": {"tiers": 5, "values": [0, 4, 8, 12, 28], "strategy": "expressive", "grid": "base-4"},
344
+ "shadows": {"levels": 6, "blur_range": [0, 3, 6, 8, 12, 16], "system": "elevation dp (0-24dp)"},
345
  },
346
  "apple_hig": {
347
  "typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
348
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
349
+ "colors": {"palette_size": 9, "uses_ramps": True, "ramp_steps": 6},
350
+ "radius": {"tiers": 4, "values": [0, 6, 10, 14], "strategy": "rounded", "grid": "custom"},
351
+ "shadows": {"levels": 4, "blur_range": [2, 8, 20, 40], "system": "semantic (subtle/medium/prominent)"},
352
  },
353
  "shopify_polaris": {
354
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
355
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
356
+ "colors": {"palette_size": 11, "uses_ramps": True, "ramp_steps": 11},
357
+ "radius": {"tiers": 4, "values": [0, 4, 8, 12], "strategy": "moderate", "grid": "base-4"},
358
+ "shadows": {"levels": 5, "blur_range": [0, 4, 8, 16, 24], "system": "elevation tokens (transparent-500)"},
359
  },
360
  "atlassian_design": {
361
  "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
362
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
363
+ "colors": {"palette_size": 15, "uses_ramps": True, "ramp_steps": 10},
364
+ "radius": {"tiers": 3, "values": [0, 3, 8], "strategy": "tight", "grid": "custom"},
365
+ "shadows": {"levels": 4, "blur_range": [1, 4, 12, 24], "system": "elevation (raised/overlay/floating)"},
366
  },
367
  "ibm_carbon": {
368
  "typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
369
  "spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
370
+ "colors": {"palette_size": 12, "uses_ramps": True, "ramp_steps": 10},
371
+ "radius": {"tiers": 3, "values": [0, 2, 4], "strategy": "tight", "grid": "base-2"},
372
+ "shadows": {"levels": 4, "blur_range": [2, 6, 12, 24], "system": "layer tokens (sm/md/lg/xl)"},
373
  },
374
  "tailwind_css": {
375
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
376
  "spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
377
+ "colors": {"palette_size": 22, "uses_ramps": True, "ramp_steps": 11},
378
+ "radius": {"tiers": 7, "values": [0, 2, 4, 6, 8, 12, 9999], "strategy": "expressive", "grid": "base-2"},
379
+ "shadows": {"levels": 6, "blur_range": [1, 3, 6, 15, 25, 50], "system": "utility (sm/DEFAULT/md/lg/xl/2xl)"},
380
  },
381
  "ant_design": {
382
  "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
383
  "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
384
+ "colors": {"palette_size": 13, "uses_ramps": True, "ramp_steps": 10},
385
+ "radius": {"tiers": 4, "values": [0, 2, 4, 8], "strategy": "moderate", "grid": "base-2"},
386
+ "shadows": {"levels": 3, "blur_range": [6, 16, 48], "system": "3-tier (low/medium/high)"},
387
  },
388
  "chakra_ui": {
389
  "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
390
  "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
391
+ "colors": {"palette_size": 15, "uses_ramps": True, "ramp_steps": 10},
392
+ "radius": {"tiers": 6, "values": [0, 2, 4, 6, 8, 9999], "strategy": "expressive", "grid": "base-2"},
393
+ "shadows": {"levels": 6, "blur_range": [1, 3, 6, 10, 15, 25], "system": "utility (xs/sm/md/lg/xl/2xl)"},
394
  },
395
  }
396
 
 
501
  typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
502
  spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
503
  colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
504
+ radius=extracted.get("radius", FALLBACK_BENCHMARKS.get(system_key, {}).get("radius", {})),
505
+ shadows=extracted.get("shadows", FALLBACK_BENCHMARKS.get(system_key, {}).get("shadows", {})),
506
  fetched_at=datetime.now().isoformat(),
507
  confidence=confidence,
508
  source_urls=list(source["urls"].values()),
 
643
  typography=fallback.get("typography", {}),
644
  spacing=fallback.get("spacing", {}),
645
  colors=fallback.get("colors", {}),
646
+ radius=fallback.get("radius", {}),
647
+ shadows=fallback.get("shadows", {}),
648
  fetched_at=datetime.now().isoformat(),
649
  confidence="fallback",
650
  best_for=source["best_for"],
 
662
  your_spacing_grid: int,
663
  benchmarks: list[BenchmarkData],
664
  log_callback: Callable = None,
665
+ your_color_count: int = 0,
666
+ your_radius_tiers: int = 0,
667
+ your_shadow_levels: int = 0,
668
  ) -> list[BenchmarkComparison]:
669
  """
670
+ Compare user's tokens to researched benchmarks β€” ALL 6 categories.
671
+
672
  Args:
673
  your_ratio: Detected type scale ratio
674
  your_base_size: Detected base font size
675
  your_spacing_grid: Detected spacing grid base
676
  benchmarks: List of researched BenchmarkData
677
  log_callback: Function to log progress
678
+ your_color_count: Number of unique colors in palette
679
+ your_radius_tiers: Number of radius tier values
680
+ your_shadow_levels: Number of shadow elevation levels
681
+
682
  Returns:
683
  List of BenchmarkComparison sorted by similarity
684
  """
685
  def log(msg: str):
686
  if log_callback:
687
  log_callback(msg)
688
+
689
  log("")
690
+ log(" πŸ“Š BENCHMARK COMPARISON (6 categories)")
691
  log(" " + "─" * 40)
692
+ log(f" Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px, "
693
+ f"colors={your_color_count}, radius={your_radius_tiers} tiers, shadows={your_shadow_levels} levels")
694
  log("")
695
+
696
  comparisons = []
697
+
698
  for b in benchmarks:
699
  b_ratio = b.typography.get("scale_ratio", 1.25)
700
  b_base = b.typography.get("base_size", 16)
701
  b_grid = b.spacing.get("base", 8)
702
+ b_colors = b.colors.get("palette_size", 15)
703
+ b_radius_tiers = b.radius.get("tiers", 4) if b.radius else 4
704
+ b_shadow_levels = b.shadows.get("levels", 5) if b.shadows else 5
705
+
706
+ # 1. Typography match
707
  ratio_diff = abs(your_ratio - b_ratio)
708
  base_diff = abs(your_base_size - b_base)
709
+ type_match = max(0, 100 - (ratio_diff * 100) - (base_diff * 3))
710
+
711
+ # 2. Spacing match
712
  grid_diff = abs(your_spacing_grid - b_grid)
713
+ spacing_match = max(0, 100 - (grid_diff * 10))
714
+
715
+ # 3. Color match (palette size proximity)
716
+ color_diff = abs(your_color_count - b_colors) if your_color_count > 0 else 5
717
+ color_match = max(0, 100 - (color_diff * 5))
718
+ color_gap = ""
719
+ if your_color_count > 0:
720
+ if color_diff <= 2:
721
+ color_gap = "aligned"
722
+ elif your_color_count > b_colors:
723
+ color_gap = f"reduce by {your_color_count - b_colors}"
724
+ else:
725
+ color_gap = f"expand by {b_colors - your_color_count}"
726
+ else:
727
+ color_gap = "no data"
728
+
729
+ # 4. Radius match (tier count proximity + strategy)
730
+ radius_diff = abs(your_radius_tiers - b_radius_tiers) if your_radius_tiers > 0 else 2
731
+ radius_match = max(0, 100 - (radius_diff * 15))
732
+ radius_gap = ""
733
+ if your_radius_tiers > 0:
734
+ if radius_diff <= 1:
735
+ radius_gap = "aligned"
736
+ elif your_radius_tiers > b_radius_tiers:
737
+ radius_gap = f"reduce by {your_radius_tiers - b_radius_tiers} tiers"
738
+ else:
739
+ radius_gap = f"add {b_radius_tiers - your_radius_tiers} tiers"
740
+ else:
741
+ radius_gap = "no data"
742
+
743
+ # 5. Shadow match (level count proximity)
744
+ shadow_diff = abs(your_shadow_levels - b_shadow_levels) if your_shadow_levels > 0 else 3
745
+ shadow_match = max(0, 100 - (shadow_diff * 15))
746
+ shadow_gap = ""
747
+ if your_shadow_levels > 0:
748
+ if shadow_diff <= 1:
749
+ shadow_gap = "aligned"
750
+ elif your_shadow_levels > b_shadow_levels:
751
+ shadow_gap = f"reduce by {your_shadow_levels - b_shadow_levels} levels"
752
+ else:
753
+ shadow_gap = f"add {b_shadow_levels - your_shadow_levels} levels"
754
+ else:
755
+ shadow_gap = "no data"
756
+
757
  # Weighted similarity score (lower = more similar)
758
+ similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3) + \
759
+ (color_diff * 0.2) + (radius_diff * 0.3) + (shadow_diff * 0.3)
760
+
761
+ # Overall match percentage (weighted average of all 6)
762
+ overall_match = (
763
+ type_match * 0.25 +
764
+ spacing_match * 0.20 +
765
+ color_match * 0.20 +
766
+ radius_match * 0.15 +
767
+ shadow_match * 0.10 +
768
+ max(0, 100 - base_diff * 5) * 0.10
769
+ )
770
  overall_match = max(0, min(100, overall_match))
771
+
772
  comparisons.append(BenchmarkComparison(
773
  benchmark=b,
774
  similarity_score=similarity,
 
777
  spacing_grid_diff=grid_diff,
778
  type_match_pct=type_match,
779
  spacing_match_pct=spacing_match,
780
+ color_match_pct=color_match,
781
+ radius_match_pct=radius_match,
782
+ shadow_match_pct=shadow_match,
783
  overall_match_pct=overall_match,
784
+ color_gap=color_gap,
785
+ radius_gap=radius_gap,
786
+ shadow_gap=shadow_gap,
787
  ))
788
+
789
  # Sort by similarity (lower = better)
790
  comparisons.sort(key=lambda x: x.similarity_score)
791
+
792
+ # Log results with per-category breakdown
793
  medals = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"]
794
  for i, c in enumerate(comparisons[:5]):
795
  medal = medals[i] if i < 3 else " "
796
  b = c.benchmark
797
+ log(f" {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% overall match")
798
+ log(f" β”œβ”€ Type: {c.type_match_pct:.0f}% | Spacing: {c.spacing_match_pct:.0f}% | Colors: {c.color_match_pct:.0f}%")
799
+ log(f" └─ Radius: {c.radius_match_pct:.0f}% | Shadows: {c.shadow_match_pct:.0f}%")
800
 
801
  return comparisons
802
 
app.py CHANGED
@@ -970,6 +970,8 @@ async def run_stage2_analysis_v2(
970
  typography=data.get("typography", {}),
971
  spacing=data.get("spacing", {}),
972
  colors=data.get("colors", {}),
 
 
973
  fetched_at=datetime.now().isoformat(),
974
  confidence="fallback",
975
  best_for=[],
@@ -977,12 +979,20 @@ async def run_stage2_analysis_v2(
977
 
978
  # Compare to benchmarks
979
  if benchmarks and rule_results:
 
 
 
 
 
980
  benchmark_comparisons = researcher.compare_to_benchmarks(
981
  your_ratio=rule_results.typography.detected_ratio,
982
  your_base_size=int(rule_results.typography.base_size) if rule_results.typography.sizes_px else 16,
983
  your_spacing_grid=rule_results.spacing.detected_base,
984
  benchmarks=benchmarks,
985
  log_callback=state.log,
 
 
 
986
  )
987
  state.benchmark_comparisons = benchmark_comparisons
988
  state.log("")
@@ -1443,10 +1453,20 @@ async def run_stage2_analysis_v2(
1443
  "*Formatting error - color ramps unavailable*", # color_ramps_md
1444
  "*Formatting error - radius tokens unavailable*", # radius_md
1445
  "*Formatting error - shadow tokens unavailable*", # shadows_md
 
1446
  )
1447
 
 
 
 
 
 
 
 
 
 
1448
  progress(0.95, desc="βœ… Complete!")
1449
-
1450
  # Final log summary
1451
  state.log("")
1452
  state.log("═" * 60)
@@ -1494,6 +1514,7 @@ async def run_stage2_analysis_v2(
1494
  color_ramps_md,
1495
  radius_md,
1496
  shadows_md,
 
1497
  )
1498
 
1499
  except Exception as e:
@@ -1604,7 +1625,7 @@ def create_fallback_synthesis(rule_results, benchmark_comparisons, brand_result,
1604
 
1605
 
1606
  def create_stage2_error_response(error_msg: str):
1607
- """Create error response tuple for Stage 2 (must match 16 outputs)."""
1608
  return (
1609
  error_msg,
1610
  state.get_logs(),
@@ -1622,6 +1643,7 @@ def create_stage2_error_response(error_msg: str):
1622
  "*Run analysis to see color ramps*", # color_ramps_md
1623
  "*Run analysis to see radius tokens*", # radius_md
1624
  "*Run analysis to see shadow tokens*", # shadows_md
 
1625
  )
1626
 
1627
 
@@ -1664,46 +1686,87 @@ def format_stage2_status_v2(rule_results, final_synthesis, best_practices) -> st
1664
 
1665
 
1666
  def format_benchmark_comparison_v2(benchmark_comparisons, benchmark_advice) -> str:
1667
- """Format benchmark comparison results."""
1668
-
1669
  if not benchmark_comparisons:
1670
  return "*No benchmark comparison available*"
1671
-
1672
  lines = []
1673
- lines.append("## πŸ“Š Benchmark Comparison")
1674
  lines.append("")
1675
-
1676
  # Recommended benchmark
1677
  if benchmark_advice and benchmark_advice.recommended_benchmark_name:
1678
  lines.append(f"### πŸ† Recommended: {benchmark_advice.recommended_benchmark_name}")
1679
  if benchmark_advice.reasoning:
1680
- lines.append(f"*{benchmark_advice.reasoning[:200]}*")
1681
  lines.append("")
1682
-
1683
- # Comparison table
1684
  lines.append("### πŸ“ˆ Similarity Ranking")
1685
  lines.append("")
1686
- lines.append("| Rank | Design System | Match | Type Ratio | Base | Grid |")
1687
- lines.append("|------|---------------|-------|------------|------|------|")
1688
-
1689
  medals = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"]
1690
  for i, c in enumerate(benchmark_comparisons[:5]):
1691
  medal = medals[i] if i < 3 else str(i+1)
1692
  b = c.benchmark
 
 
 
 
 
 
1693
  lines.append(
1694
- f"| {medal} | {b.icon} {b.short_name} | {c.overall_match_pct:.0f}% | "
1695
- f"{b.typography.get('scale_ratio', '?')} | {b.typography.get('base_size', '?')}px | "
1696
- f"{b.spacing.get('base', '?')}px |"
 
1697
  )
1698
-
1699
  lines.append("")
1700
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1701
  # Alignment changes needed
1702
  if benchmark_advice and benchmark_advice.alignment_changes:
1703
  lines.append("### πŸ”§ Changes to Align")
1704
- for change in benchmark_advice.alignment_changes[:3]:
1705
- lines.append(f"- **{change.get('change', '?')}**: {change.get('from', '?')} β†’ {change.get('to', '?')} (effort: {change.get('effort', '?')})")
1706
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1707
  return "\n".join(lines)
1708
 
1709
 
@@ -4410,6 +4473,29 @@ def create_ui():
4410
  "(brand, text, background, border, feedback).*",
4411
  elem_classes=["section-desc"])
4412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4413
  # LLM Recommendations Section (NEW)
4414
  with gr.Accordion("πŸ€– LLM Color Recommendations", open=True):
4415
  gr.Markdown("*Four AI agents analyzed your colors: **Brand Identifier** (detects primary/secondary brand colors), "
@@ -4560,6 +4646,26 @@ def create_ui():
4560
  elem_classes=["section-desc"])
4561
  export_output = gr.Code(label="Tokens JSON", language="json", lines=25)
4562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4563
  preview_colors_btn.click(
4564
  preview_color_classification,
4565
  inputs=[naming_convention],
@@ -4607,10 +4713,10 @@ def create_ui():
4607
  inputs=[desktop_data],
4608
  outputs=[colors_table, typography_table, spacing_table, radius_table],
4609
  ).then(
4610
- fn=lambda: gr.update(open=True),
4611
- outputs=[stage1_accordion],
4612
  )
4613
-
4614
  # Viewport toggle
4615
  viewport_toggle.change(
4616
  fn=switch_viewport,
@@ -4639,9 +4745,13 @@ def create_ui():
4639
  color_ramps_display,
4640
  radius_display,
4641
  shadows_display,
 
4642
  ],
 
 
 
4643
  )
4644
-
4645
  # Stage 2: Apply upgrades
4646
  apply_upgrades_btn.click(
4647
  fn=apply_selected_upgrades,
 
970
  typography=data.get("typography", {}),
971
  spacing=data.get("spacing", {}),
972
  colors=data.get("colors", {}),
973
+ radius=data.get("radius", {}),
974
+ shadows=data.get("shadows", {}),
975
  fetched_at=datetime.now().isoformat(),
976
  confidence="fallback",
977
  best_for=[],
 
979
 
980
  # Compare to benchmarks
981
  if benchmarks and rule_results:
982
+ # Count user's radius tiers and shadow levels for comparison
983
+ _user_radius_tiers = len(desktop_dict.get("radius", {}))
984
+ _user_shadow_levels = len(desktop_dict.get("shadows", {}))
985
+ _user_color_count = len(desktop_dict.get("colors", {}))
986
+
987
  benchmark_comparisons = researcher.compare_to_benchmarks(
988
  your_ratio=rule_results.typography.detected_ratio,
989
  your_base_size=int(rule_results.typography.base_size) if rule_results.typography.sizes_px else 16,
990
  your_spacing_grid=rule_results.spacing.detected_base,
991
  benchmarks=benchmarks,
992
  log_callback=state.log,
993
+ your_color_count=_user_color_count,
994
+ your_radius_tiers=_user_radius_tiers,
995
+ your_shadow_levels=_user_shadow_levels,
996
  )
997
  state.benchmark_comparisons = benchmark_comparisons
998
  state.log("")
 
1453
  "*Formatting error - color ramps unavailable*", # color_ramps_md
1454
  "*Formatting error - radius tokens unavailable*", # radius_md
1455
  "*Formatting error - shadow tokens unavailable*", # shadows_md
1456
+ "⚠️ Color preview unavailable due to formatting errors.", # auto_color_preview
1457
  )
1458
 
1459
+ # Auto-generate color classification preview
1460
+ auto_color_preview = ""
1461
+ try:
1462
+ auto_color_preview = preview_color_classification("semantic")
1463
+ state.log(" βœ… Color classification preview auto-generated (semantic convention)")
1464
+ except Exception as cp_err:
1465
+ state.log(f" ⚠️ Auto color preview failed: {str(cp_err)}")
1466
+ auto_color_preview = "⚠️ Color preview unavailable β€” click 'Preview Color Names' button to generate."
1467
+
1468
  progress(0.95, desc="βœ… Complete!")
1469
+
1470
  # Final log summary
1471
  state.log("")
1472
  state.log("═" * 60)
 
1514
  color_ramps_md,
1515
  radius_md,
1516
  shadows_md,
1517
+ auto_color_preview,
1518
  )
1519
 
1520
  except Exception as e:
 
1625
 
1626
 
1627
  def create_stage2_error_response(error_msg: str):
1628
+ """Create error response tuple for Stage 2 (must match 17 outputs)."""
1629
  return (
1630
  error_msg,
1631
  state.get_logs(),
 
1643
  "*Run analysis to see color ramps*", # color_ramps_md
1644
  "*Run analysis to see radius tokens*", # radius_md
1645
  "*Run analysis to see shadow tokens*", # shadows_md
1646
+ "", # auto_color_preview
1647
  )
1648
 
1649
 
 
1686
 
1687
 
1688
  def format_benchmark_comparison_v2(benchmark_comparisons, benchmark_advice) -> str:
1689
+ """Format benchmark comparison results β€” ALL 6 categories."""
1690
+
1691
  if not benchmark_comparisons:
1692
  return "*No benchmark comparison available*"
1693
+
1694
  lines = []
1695
+ lines.append("## πŸ“Š Benchmark Comparison (6 Categories)")
1696
  lines.append("")
1697
+
1698
  # Recommended benchmark
1699
  if benchmark_advice and benchmark_advice.recommended_benchmark_name:
1700
  lines.append(f"### πŸ† Recommended: {benchmark_advice.recommended_benchmark_name}")
1701
  if benchmark_advice.reasoning:
1702
+ lines.append(f"*{benchmark_advice.reasoning}*")
1703
  lines.append("")
1704
+
1705
+ # Full comparison table with all 6 categories
1706
  lines.append("### πŸ“ˆ Similarity Ranking")
1707
  lines.append("")
1708
+ lines.append("| Rank | Design System | Overall | Type | Spacing | Colors | Radius | Shadows |")
1709
+ lines.append("|------|---------------|---------|------|---------|--------|--------|---------|")
1710
+
1711
  medals = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"]
1712
  for i, c in enumerate(benchmark_comparisons[:5]):
1713
  medal = medals[i] if i < 3 else str(i+1)
1714
  b = c.benchmark
1715
+
1716
+ def pct_icon(pct):
1717
+ if pct >= 80: return f"βœ… {pct:.0f}%"
1718
+ elif pct >= 50: return f"🟑 {pct:.0f}%"
1719
+ else: return f"πŸ”΄ {pct:.0f}%"
1720
+
1721
  lines.append(
1722
+ f"| {medal} | {b.icon} {b.short_name} | **{c.overall_match_pct:.0f}%** | "
1723
+ f"{pct_icon(c.type_match_pct)} | {pct_icon(c.spacing_match_pct)} | "
1724
+ f"{pct_icon(c.color_match_pct)} | {pct_icon(c.radius_match_pct)} | "
1725
+ f"{pct_icon(c.shadow_match_pct)} |"
1726
  )
1727
+
1728
  lines.append("")
1729
+
1730
+ # Detailed per-category comparison for top benchmark
1731
+ if benchmark_comparisons:
1732
+ top = benchmark_comparisons[0]
1733
+ b = top.benchmark
1734
+ lines.append(f"### πŸ” Detailed: Your Site vs {b.icon} {b.short_name}")
1735
+ lines.append("")
1736
+ lines.append("| Category | Your Value | Benchmark | Gap | Match |")
1737
+ lines.append("|----------|-----------|-----------|-----|-------|")
1738
+ lines.append(f"| **Typography** | ratio {top.type_ratio_diff + b.typography.get('scale_ratio', 1.25):.2f} | ratio {b.typography.get('scale_ratio', '?')} | diff {top.type_ratio_diff:.2f} | {top.type_match_pct:.0f}% |")
1739
+ lines.append(f"| **Base Size** | {top.base_size_diff + b.typography.get('base_size', 16)}px | {b.typography.get('base_size', '?')}px | diff {top.base_size_diff}px | β€” |")
1740
+ lines.append(f"| **Spacing** | {top.spacing_grid_diff + b.spacing.get('base', 8)}px grid | {b.spacing.get('base', '?')}px grid | diff {top.spacing_grid_diff}px | {top.spacing_match_pct:.0f}% |")
1741
+ lines.append(f"| **Colors** | β€” | {b.colors.get('palette_size', '?')} colors | {top.color_gap or 'N/A'} | {top.color_match_pct:.0f}% |")
1742
+ b_radius = b.radius if hasattr(b, 'radius') and b.radius else {}
1743
+ b_shadows = b.shadows if hasattr(b, 'shadows') and b.shadows else {}
1744
+ lines.append(f"| **Radius** | β€” | {b_radius.get('tiers', '?')} tiers ({b_radius.get('strategy', '?')}) | {top.radius_gap or 'N/A'} | {top.radius_match_pct:.0f}% |")
1745
+ lines.append(f"| **Shadows** | β€” | {b_shadows.get('levels', '?')} levels | {top.shadow_gap or 'N/A'} | {top.shadow_match_pct:.0f}% |")
1746
+
1747
+ lines.append("")
1748
+
1749
  # Alignment changes needed
1750
  if benchmark_advice and benchmark_advice.alignment_changes:
1751
  lines.append("### πŸ”§ Changes to Align")
1752
+ for change in benchmark_advice.alignment_changes[:5]:
1753
+ token_type = change.get('token_type', '')
1754
+ icon = {"typography": "πŸ“", "spacing": "πŸ“", "colors": "🎨", "radius": "πŸ”˜", "shadows": "πŸŒ—"}.get(token_type, "πŸ”§")
1755
+ lines.append(f"- {icon} **{change.get('change', '?')}**: {change.get('from', '?')} β†’ {change.get('to', '?')} (effort: {change.get('effort', '?')})")
1756
+ lines.append("")
1757
+
1758
+ # Pros and cons
1759
+ if benchmark_advice:
1760
+ if benchmark_advice.pros_of_alignment:
1761
+ lines.append("**βœ… Pros of aligning:**")
1762
+ for pro in benchmark_advice.pros_of_alignment[:3]:
1763
+ lines.append(f"- {pro}")
1764
+ if benchmark_advice.cons_of_alignment:
1765
+ lines.append("")
1766
+ lines.append("**⚠️ Considerations:**")
1767
+ for con in benchmark_advice.cons_of_alignment[:3]:
1768
+ lines.append(f"- {con}")
1769
+
1770
  return "\n".join(lines)
1771
 
1772
 
 
4473
  "(brand, text, background, border, feedback).*",
4474
  elem_classes=["section-desc"])
4475
 
4476
+ # ── Color Naming Convention Preview (visible BEFORE export) ──
4477
+ with gr.Accordion("🏷️ Color Naming Convention β€” Preview Before Export", open=True):
4478
+ gr.Markdown("**Choose how colors are named in your export.** Preview the classification to verify names before exporting. "
4479
+ "100% rule-based β€” no LLM involved. Change convention anytime and re-preview.",
4480
+ elem_classes=["section-desc"])
4481
+ with gr.Row():
4482
+ naming_convention_stage2 = gr.Dropdown(
4483
+ choices=["semantic", "tailwind", "material"],
4484
+ value="semantic",
4485
+ label="🎨 Naming Convention",
4486
+ info="semantic = color.brand.primary | tailwind = brand-primary | material = color.brand.primary",
4487
+ scale=2,
4488
+ )
4489
+ preview_colors_btn_stage2 = gr.Button("πŸ‘οΈ Preview Color Names", variant="secondary", scale=1)
4490
+ color_preview_output_stage2 = gr.Textbox(
4491
+ label="Color Classification Preview (Rule-Based β€” No LLM)",
4492
+ lines=18,
4493
+ max_lines=40,
4494
+ interactive=False,
4495
+ placeholder="Click 'Preview Color Names' above to see how colors will be named in the export. "
4496
+ "This runs AFTER extraction (Stage 1). No LLM cost.",
4497
+ )
4498
+
4499
  # LLM Recommendations Section (NEW)
4500
  with gr.Accordion("πŸ€– LLM Color Recommendations", open=True):
4501
  gr.Markdown("*Four AI agents analyzed your colors: **Brand Identifier** (detects primary/secondary brand colors), "
 
4646
  elem_classes=["section-desc"])
4647
  export_output = gr.Code(label="Tokens JSON", language="json", lines=25)
4648
 
4649
+ # Stage 2 color naming preview (primary β€” visible before export)
4650
+ preview_colors_btn_stage2.click(
4651
+ preview_color_classification,
4652
+ inputs=[naming_convention_stage2],
4653
+ outputs=[color_preview_output_stage2],
4654
+ )
4655
+ # Sync naming convention: Stage 2 dropdown β†’ Stage 3 dropdown
4656
+ naming_convention_stage2.change(
4657
+ lambda v: v,
4658
+ inputs=[naming_convention_stage2],
4659
+ outputs=[naming_convention],
4660
+ )
4661
+ # Stage 3 also syncs back
4662
+ naming_convention.change(
4663
+ lambda v: v,
4664
+ inputs=[naming_convention],
4665
+ outputs=[naming_convention_stage2],
4666
+ )
4667
+
4668
+ # Stage 3 preview (kept for convenience)
4669
  preview_colors_btn.click(
4670
  preview_color_classification,
4671
  inputs=[naming_convention],
 
4713
  inputs=[desktop_data],
4714
  outputs=[colors_table, typography_table, spacing_table, radius_table],
4715
  ).then(
4716
+ fn=lambda: (gr.update(open=True), gr.update(open=True)),
4717
+ outputs=[stage1_accordion, stage2_accordion],
4718
  )
4719
+
4720
  # Viewport toggle
4721
  viewport_toggle.change(
4722
  fn=switch_viewport,
 
4745
  color_ramps_display,
4746
  radius_display,
4747
  shadows_display,
4748
+ color_preview_output_stage2,
4749
  ],
4750
+ ).then(
4751
+ fn=lambda: gr.update(open=True),
4752
+ outputs=[stage3_accordion],
4753
  )
4754
+
4755
  # Stage 2: Apply upgrades
4756
  apply_upgrades_btn.click(
4757
  fn=apply_selected_upgrades,