Spaces:

riazmo
/

Design-System-Automation

Running

riazmo Claude Opus 4.6 commited on Feb 20

Commit

8d1b9cb

1 Parent(s): 24adae3

v3.3: Benchmarking overhaul + naming preview fix + UX improvements

Priority 2 — Make benchmarking useful:
- Add radius + shadow data to all 8 fallback benchmarks (Material,
Apple HIG, Polaris, Atlassian, Carbon, Tailwind, Ant, Chakra)
- Expand comparison from 3 metrics to 6 categories: type, spacing,
colors, radius, shadows, base size — each with match %
- Show per-category match table with ✅/🟡/🔴 indicators
- Add detailed gap analysis for top benchmark (your vs benchmark values)
- Show pros/cons and alignment changes with token-type icons
- BenchmarkData now stores radius and shadows fields

Fix: Color naming convention preview not visible:
- Add naming convention dropdown + preview inside Stage 2 Colors section
(visible BEFORE export, not hidden inside collapsed Stage 3 accordion)
- Auto-generate color classification preview when Stage 2 completes
- Sync naming convention between Stage 2 and Stage 3 dropdowns
- Return auto_color_preview as 17th output from analysis function
- Update all error return paths to match new 17-value tuple

Priority 3 — Better UX:
- Auto-open Stage 2 accordion when extraction completes
- Auto-open Stage 3 accordion when analysis completes
- Users no longer need to manually expand hidden accordions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

agents/benchmark_researcher.py +153 -42
app.py +135 -25

agents/benchmark_researcher.py CHANGED Viewed

@@ -139,13 +139,19 @@ class BenchmarkData:
     # Extracted specifications
     typography: dict = field(default_factory=dict)
     # Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}
     spacing: dict = field(default_factory=dict)
     # Expected: {base, scale[], grid}
     colors: dict = field(default_factory=dict)
     # Expected: {palette_size, uses_ramps, ramp_steps}
     # Metadata
     fetched_at: str = ""
     confidence: str = "low"  # high, medium, low
@@ -162,6 +168,8 @@ class BenchmarkData:
             "typography": self.typography,
             "spacing": self.spacing,
             "colors": self.colors,
             "fetched_at": self.fetched_at,
             "confidence": self.confidence,
             "best_for": self.best_for,
@@ -170,20 +178,28 @@ class BenchmarkData:
 @dataclass
 class BenchmarkComparison:
-    """Comparison result between user's tokens and a benchmark."""
     benchmark: BenchmarkData
     similarity_score: float  # Lower = more similar
     # Individual comparisons
     type_ratio_diff: float
     base_size_diff: int
     spacing_grid_diff: int
-    # Match percentages
     type_match_pct: float
     spacing_match_pct: float
-    overall_match_pct: float
     def to_dict(self) -> dict:
         return {
             "name": self.benchmark.name,
@@ -203,11 +219,17 @@ class BenchmarkComparison:
                     "diff": self.spacing_grid_diff,
                     "match_pct": round(self.spacing_match_pct, 1),
                 },
             },
             "benchmark_values": {
                 "type_ratio": self.benchmark.typography.get("scale_ratio"),
                 "base_size": self.benchmark.typography.get("base_size"),
                 "spacing_grid": self.benchmark.spacing.get("base"),
             },
             "best_for": self.benchmark.best_for,
             "confidence": self.benchmark.confidence,
@@ -274,6 +296,8 @@ class BenchmarkCache:
             typography=entry.get("typography", {}),
             spacing=entry.get("spacing", {}),
             colors=entry.get("colors", {}),
             fetched_at=entry.get("fetched_at", ""),
             confidence=entry.get("confidence", "low"),
             source_urls=entry.get("source_urls", []),
@@ -315,42 +339,58 @@ FALLBACK_BENCHMARKS = {
     "material_design_3": {
         "typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
-        "colors": {"palette_size": 13, "uses_ramps": True},
     },
     "apple_hig": {
         "typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
-        "colors": {"palette_size": 9, "uses_ramps": True},
     },
     "shopify_polaris": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
-        "colors": {"palette_size": 11, "uses_ramps": True},
     },
     "atlassian_design": {
         "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
-        "colors": {"palette_size": 15, "uses_ramps": True},
     },
     "ibm_carbon": {
         "typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
         "spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
-        "colors": {"palette_size": 12, "uses_ramps": True},
     },
     "tailwind_css": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
-        "colors": {"palette_size": 22, "uses_ramps": True},
     },
     "ant_design": {
         "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
-        "colors": {"palette_size": 13, "uses_ramps": True},
     },
     "chakra_ui": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
-        "colors": {"palette_size": 15, "uses_ramps": True},
     },
 }
@@ -461,6 +501,8 @@ class BenchmarkResearcher:
             typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
             spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
             colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
             fetched_at=datetime.now().isoformat(),
             confidence=confidence,
             source_urls=list(source["urls"].values()),
@@ -601,6 +643,8 @@ Return ONLY valid JSON, no explanation."""
                         typography=fallback.get("typography", {}),
                         spacing=fallback.get("spacing", {}),
                         colors=fallback.get("colors", {}),
                         fetched_at=datetime.now().isoformat(),
                         confidence="fallback",
                         best_for=source["best_for"],
@@ -618,53 +662,113 @@ Return ONLY valid JSON, no explanation."""
         your_spacing_grid: int,
         benchmarks: list[BenchmarkData],
         log_callback: Callable = None,
     ) -> list[BenchmarkComparison]:
         """
-        Compare user's tokens to researched benchmarks.
         Args:
             your_ratio: Detected type scale ratio
             your_base_size: Detected base font size
             your_spacing_grid: Detected spacing grid base
             benchmarks: List of researched BenchmarkData
             log_callback: Function to log progress
         Returns:
             List of BenchmarkComparison sorted by similarity
         """
         def log(msg: str):
             if log_callback:
                 log_callback(msg)
         log("")
-        log("   📊 BENCHMARK COMPARISON")
         log("   " + "─" * 40)
-        log(f"   Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px")
         log("")
         comparisons = []
         for b in benchmarks:
             b_ratio = b.typography.get("scale_ratio", 1.25)
             b_base = b.typography.get("base_size", 16)
             b_grid = b.spacing.get("base", 8)
-            # Calculate differences
             ratio_diff = abs(your_ratio - b_ratio)
             base_diff = abs(your_base_size - b_base)
             grid_diff = abs(your_spacing_grid - b_grid)
-            # Calculate match percentages
-            type_match = max(0, 100 - (ratio_diff * 100))  # 0.1 diff = 90% match
-            spacing_match = max(0, 100 - (grid_diff * 10))  # 4px diff = 60% match
             # Weighted similarity score (lower = more similar)
-            similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3)
-            # Overall match percentage
-            overall_match = (type_match * 0.5) + (spacing_match * 0.3) + (100 - base_diff * 5) * 0.2
             overall_match = max(0, min(100, overall_match))
             comparisons.append(BenchmarkComparison(
                 benchmark=b,
                 similarity_score=similarity,
@@ -673,19 +777,26 @@ Return ONLY valid JSON, no explanation."""
                 spacing_grid_diff=grid_diff,
                 type_match_pct=type_match,
                 spacing_match_pct=spacing_match,
                 overall_match_pct=overall_match,
             ))
         # Sort by similarity (lower = better)
         comparisons.sort(key=lambda x: x.similarity_score)
-        # Log results
         medals = ["🥇", "🥈", "🥉"]
         for i, c in enumerate(comparisons[:5]):
             medal = medals[i] if i < 3 else "  "
             b = c.benchmark
-            log(f"   {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% match (score: {c.similarity_score:.2f})")
-            log(f"      └─ ratio={b.typography.get('scale_ratio')}, base={b.typography.get('base_size')}px, grid={b.spacing.get('base')}px")
         return comparisons

     # Extracted specifications
     typography: dict = field(default_factory=dict)
     # Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}
     spacing: dict = field(default_factory=dict)
     # Expected: {base, scale[], grid}
     colors: dict = field(default_factory=dict)
     # Expected: {palette_size, uses_ramps, ramp_steps}
+    radius: dict = field(default_factory=dict)
+    # Expected: {tiers, values[], strategy, grid}
+    shadows: dict = field(default_factory=dict)
+    # Expected: {levels, blur_range[], system}
     # Metadata
     fetched_at: str = ""
     confidence: str = "low"  # high, medium, low
             "typography": self.typography,
             "spacing": self.spacing,
             "colors": self.colors,
+            "radius": self.radius,
+            "shadows": self.shadows,
             "fetched_at": self.fetched_at,
             "confidence": self.confidence,
             "best_for": self.best_for,
 @dataclass
 class BenchmarkComparison:
+    """Comparison result between user's tokens and a benchmark — ALL 6 categories."""
     benchmark: BenchmarkData
     similarity_score: float  # Lower = more similar
     # Individual comparisons
     type_ratio_diff: float
     base_size_diff: int
     spacing_grid_diff: int
+    # Match percentages — all 6 categories
     type_match_pct: float
     spacing_match_pct: float
+    color_match_pct: float = 50.0
+    radius_match_pct: float = 50.0
+    shadow_match_pct: float = 50.0
+    overall_match_pct: float = 0.0
+    # Gap descriptions per category
+    color_gap: str = ""
+    radius_gap: str = ""
+    shadow_gap: str = ""
     def to_dict(self) -> dict:
         return {
             "name": self.benchmark.name,
                     "diff": self.spacing_grid_diff,
                     "match_pct": round(self.spacing_match_pct, 1),
                 },
+                "colors": {"match_pct": round(self.color_match_pct, 1), "gap": self.color_gap},
+                "radius": {"match_pct": round(self.radius_match_pct, 1), "gap": self.radius_gap},
+                "shadows": {"match_pct": round(self.shadow_match_pct, 1), "gap": self.shadow_gap},
             },
             "benchmark_values": {
                 "type_ratio": self.benchmark.typography.get("scale_ratio"),
                 "base_size": self.benchmark.typography.get("base_size"),
                 "spacing_grid": self.benchmark.spacing.get("base"),
+                "color_palette_size": self.benchmark.colors.get("palette_size"),
+                "radius_tiers": self.benchmark.radius.get("tiers") if hasattr(self.benchmark, 'radius') and self.benchmark.radius else None,
+                "shadow_levels": self.benchmark.shadows.get("levels") if hasattr(self.benchmark, 'shadows') and self.benchmark.shadows else None,
             },
             "best_for": self.benchmark.best_for,
             "confidence": self.benchmark.confidence,
             typography=entry.get("typography", {}),
             spacing=entry.get("spacing", {}),
             colors=entry.get("colors", {}),
+            radius=entry.get("radius", {}),
+            shadows=entry.get("shadows", {}),
             fetched_at=entry.get("fetched_at", ""),
             confidence=entry.get("confidence", "low"),
             source_urls=entry.get("source_urls", []),
     "material_design_3": {
         "typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
+        "colors": {"palette_size": 13, "uses_ramps": True, "ramp_steps": 10},
+        "radius": {"tiers": 5, "values": [0, 4, 8, 12, 28], "strategy": "expressive", "grid": "base-4"},
+        "shadows": {"levels": 6, "blur_range": [0, 3, 6, 8, 12, 16], "system": "elevation dp (0-24dp)"},
     },
     "apple_hig": {
         "typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
+        "colors": {"palette_size": 9, "uses_ramps": True, "ramp_steps": 6},
+        "radius": {"tiers": 4, "values": [0, 6, 10, 14], "strategy": "rounded", "grid": "custom"},
+        "shadows": {"levels": 4, "blur_range": [2, 8, 20, 40], "system": "semantic (subtle/medium/prominent)"},
     },
     "shopify_polaris": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
+        "colors": {"palette_size": 11, "uses_ramps": True, "ramp_steps": 11},
+        "radius": {"tiers": 4, "values": [0, 4, 8, 12], "strategy": "moderate", "grid": "base-4"},
+        "shadows": {"levels": 5, "blur_range": [0, 4, 8, 16, 24], "system": "elevation tokens (transparent-500)"},
     },
     "atlassian_design": {
         "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
+        "colors": {"palette_size": 15, "uses_ramps": True, "ramp_steps": 10},
+        "radius": {"tiers": 3, "values": [0, 3, 8], "strategy": "tight", "grid": "custom"},
+        "shadows": {"levels": 4, "blur_range": [1, 4, 12, 24], "system": "elevation (raised/overlay/floating)"},
     },
     "ibm_carbon": {
         "typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
         "spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
+        "colors": {"palette_size": 12, "uses_ramps": True, "ramp_steps": 10},
+        "radius": {"tiers": 3, "values": [0, 2, 4], "strategy": "tight", "grid": "base-2"},
+        "shadows": {"levels": 4, "blur_range": [2, 6, 12, 24], "system": "layer tokens (sm/md/lg/xl)"},
     },
     "tailwind_css": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
+        "colors": {"palette_size": 22, "uses_ramps": True, "ramp_steps": 11},
+        "radius": {"tiers": 7, "values": [0, 2, 4, 6, 8, 12, 9999], "strategy": "expressive", "grid": "base-2"},
+        "shadows": {"levels": 6, "blur_range": [1, 3, 6, 15, 25, 50], "system": "utility (sm/DEFAULT/md/lg/xl/2xl)"},
     },
     "ant_design": {
         "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
         "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
+        "colors": {"palette_size": 13, "uses_ramps": True, "ramp_steps": 10},
+        "radius": {"tiers": 4, "values": [0, 2, 4, 8], "strategy": "moderate", "grid": "base-2"},
+        "shadows": {"levels": 3, "blur_range": [6, 16, 48], "system": "3-tier (low/medium/high)"},
     },
     "chakra_ui": {
         "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
         "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
+        "colors": {"palette_size": 15, "uses_ramps": True, "ramp_steps": 10},
+        "radius": {"tiers": 6, "values": [0, 2, 4, 6, 8, 9999], "strategy": "expressive", "grid": "base-2"},
+        "shadows": {"levels": 6, "blur_range": [1, 3, 6, 10, 15, 25], "system": "utility (xs/sm/md/lg/xl/2xl)"},
     },
 }
             typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
             spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
             colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
+            radius=extracted.get("radius", FALLBACK_BENCHMARKS.get(system_key, {}).get("radius", {})),
+            shadows=extracted.get("shadows", FALLBACK_BENCHMARKS.get(system_key, {}).get("shadows", {})),
             fetched_at=datetime.now().isoformat(),
             confidence=confidence,
             source_urls=list(source["urls"].values()),
                         typography=fallback.get("typography", {}),
                         spacing=fallback.get("spacing", {}),
                         colors=fallback.get("colors", {}),
+                        radius=fallback.get("radius", {}),
+                        shadows=fallback.get("shadows", {}),
                         fetched_at=datetime.now().isoformat(),
                         confidence="fallback",
                         best_for=source["best_for"],
         your_spacing_grid: int,
         benchmarks: list[BenchmarkData],
         log_callback: Callable = None,
+        your_color_count: int = 0,
+        your_radius_tiers: int = 0,
+        your_shadow_levels: int = 0,
     ) -> list[BenchmarkComparison]:
         """
+        Compare user's tokens to researched benchmarks — ALL 6 categories.
         Args:
             your_ratio: Detected type scale ratio
             your_base_size: Detected base font size
             your_spacing_grid: Detected spacing grid base
             benchmarks: List of researched BenchmarkData
             log_callback: Function to log progress
+            your_color_count: Number of unique colors in palette
+            your_radius_tiers: Number of radius tier values
+            your_shadow_levels: Number of shadow elevation levels
         Returns:
             List of BenchmarkComparison sorted by similarity
         """
         def log(msg: str):
             if log_callback:
                 log_callback(msg)
         log("")
+        log("   📊 BENCHMARK COMPARISON (6 categories)")
         log("   " + "─" * 40)
+        log(f"   Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px, "
+            f"colors={your_color_count}, radius={your_radius_tiers} tiers, shadows={your_shadow_levels} levels")
         log("")
         comparisons = []
         for b in benchmarks:
             b_ratio = b.typography.get("scale_ratio", 1.25)
             b_base = b.typography.get("base_size", 16)
             b_grid = b.spacing.get("base", 8)
+            b_colors = b.colors.get("palette_size", 15)
+            b_radius_tiers = b.radius.get("tiers", 4) if b.radius else 4
+            b_shadow_levels = b.shadows.get("levels", 5) if b.shadows else 5
+            # 1. Typography match
             ratio_diff = abs(your_ratio - b_ratio)
             base_diff = abs(your_base_size - b_base)
+            type_match = max(0, 100 - (ratio_diff * 100) - (base_diff * 3))
+            # 2. Spacing match
             grid_diff = abs(your_spacing_grid - b_grid)
+            spacing_match = max(0, 100 - (grid_diff * 10))
+            # 3. Color match (palette size proximity)
+            color_diff = abs(your_color_count - b_colors) if your_color_count > 0 else 5
+            color_match = max(0, 100 - (color_diff * 5))
+            color_gap = ""
+            if your_color_count > 0:
+                if color_diff <= 2:
+                    color_gap = "aligned"
+                elif your_color_count > b_colors:
+                    color_gap = f"reduce by {your_color_count - b_colors}"
+                else:
+                    color_gap = f"expand by {b_colors - your_color_count}"
+            else:
+                color_gap = "no data"
+            # 4. Radius match (tier count proximity + strategy)
+            radius_diff = abs(your_radius_tiers - b_radius_tiers) if your_radius_tiers > 0 else 2
+            radius_match = max(0, 100 - (radius_diff * 15))
+            radius_gap = ""
+            if your_radius_tiers > 0:
+                if radius_diff <= 1:
+                    radius_gap = "aligned"
+                elif your_radius_tiers > b_radius_tiers:
+                    radius_gap = f"reduce by {your_radius_tiers - b_radius_tiers} tiers"
+                else:
+                    radius_gap = f"add {b_radius_tiers - your_radius_tiers} tiers"
+            else:
+                radius_gap = "no data"
+            # 5. Shadow match (level count proximity)
+            shadow_diff = abs(your_shadow_levels - b_shadow_levels) if your_shadow_levels > 0 else 3
+            shadow_match = max(0, 100 - (shadow_diff * 15))
+            shadow_gap = ""
+            if your_shadow_levels > 0:
+                if shadow_diff <= 1:
+                    shadow_gap = "aligned"
+                elif your_shadow_levels > b_shadow_levels:
+                    shadow_gap = f"reduce by {your_shadow_levels - b_shadow_levels} levels"
+                else:
+                    shadow_gap = f"add {b_shadow_levels - your_shadow_levels} levels"
+            else:
+                shadow_gap = "no data"
             # Weighted similarity score (lower = more similar)
+            similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3) + \
+                         (color_diff * 0.2) + (radius_diff * 0.3) + (shadow_diff * 0.3)
+            # Overall match percentage (weighted average of all 6)
+            overall_match = (
+                type_match * 0.25 +
+                spacing_match * 0.20 +
+                color_match * 0.20 +
+                radius_match * 0.15 +
+                shadow_match * 0.10 +
+                max(0, 100 - base_diff * 5) * 0.10
+            )
             overall_match = max(0, min(100, overall_match))
             comparisons.append(BenchmarkComparison(
                 benchmark=b,
                 similarity_score=similarity,
                 spacing_grid_diff=grid_diff,
                 type_match_pct=type_match,
                 spacing_match_pct=spacing_match,
+                color_match_pct=color_match,
+                radius_match_pct=radius_match,
+                shadow_match_pct=shadow_match,
                 overall_match_pct=overall_match,
+                color_gap=color_gap,
+                radius_gap=radius_gap,
+                shadow_gap=shadow_gap,
             ))
         # Sort by similarity (lower = better)
         comparisons.sort(key=lambda x: x.similarity_score)
+        # Log results with per-category breakdown
         medals = ["🥇", "🥈", "🥉"]
         for i, c in enumerate(comparisons[:5]):
             medal = medals[i] if i < 3 else "  "
             b = c.benchmark
+            log(f"   {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% overall match")
+            log(f"      ├─ Type: {c.type_match_pct:.0f}% | Spacing: {c.spacing_match_pct:.0f}% | Colors: {c.color_match_pct:.0f}%")
+            log(f"      └─ Radius: {c.radius_match_pct:.0f}% | Shadows: {c.shadow_match_pct:.0f}%")
         return comparisons

app.py CHANGED Viewed

@@ -970,6 +970,8 @@ async def run_stage2_analysis_v2(
                             typography=data.get("typography", {}),
                             spacing=data.get("spacing", {}),
                             colors=data.get("colors", {}),
                             fetched_at=datetime.now().isoformat(),
                             confidence="fallback",
                             best_for=[],
@@ -977,12 +979,20 @@ async def run_stage2_analysis_v2(
             # Compare to benchmarks
             if benchmarks and rule_results:
                 benchmark_comparisons = researcher.compare_to_benchmarks(
                     your_ratio=rule_results.typography.detected_ratio,
                     your_base_size=int(rule_results.typography.base_size) if rule_results.typography.sizes_px else 16,
                     your_spacing_grid=rule_results.spacing.detected_base,
                     benchmarks=benchmarks,
                     log_callback=state.log,
                 )
                 state.benchmark_comparisons = benchmark_comparisons
                 state.log("")
@@ -1443,10 +1453,20 @@ async def run_stage2_analysis_v2(
                 "*Formatting error - color ramps unavailable*",  # color_ramps_md
                 "*Formatting error - radius tokens unavailable*",  # radius_md
                 "*Formatting error - shadow tokens unavailable*",  # shadows_md
             )
         progress(0.95, desc="✅ Complete!")
         # Final log summary
         state.log("")
         state.log("═" * 60)
@@ -1494,6 +1514,7 @@ async def run_stage2_analysis_v2(
             color_ramps_md,
             radius_md,
             shadows_md,
         )
     except Exception as e:
@@ -1604,7 +1625,7 @@ def create_fallback_synthesis(rule_results, benchmark_comparisons, brand_result,
 def create_stage2_error_response(error_msg: str):
-    """Create error response tuple for Stage 2 (must match 16 outputs)."""
     return (
         error_msg,
         state.get_logs(),
@@ -1622,6 +1643,7 @@ def create_stage2_error_response(error_msg: str):
         "*Run analysis to see color ramps*",  # color_ramps_md
         "*Run analysis to see radius tokens*",  # radius_md
         "*Run analysis to see shadow tokens*",  # shadows_md
     )
@@ -1664,46 +1686,87 @@ def format_stage2_status_v2(rule_results, final_synthesis, best_practices) -> st
 def format_benchmark_comparison_v2(benchmark_comparisons, benchmark_advice) -> str:
-    """Format benchmark comparison results."""
     if not benchmark_comparisons:
         return "*No benchmark comparison available*"
     lines = []
-    lines.append("## 📊 Benchmark Comparison")
     lines.append("")
     # Recommended benchmark
     if benchmark_advice and benchmark_advice.recommended_benchmark_name:
         lines.append(f"### 🏆 Recommended: {benchmark_advice.recommended_benchmark_name}")
         if benchmark_advice.reasoning:
-            lines.append(f"*{benchmark_advice.reasoning[:200]}*")
         lines.append("")
-    # Comparison table
     lines.append("### 📈 Similarity Ranking")
     lines.append("")
-    lines.append("| Rank | Design System | Match | Type Ratio | Base | Grid |")
-    lines.append("|------|---------------|-------|------------|------|------|")
     medals = ["🥇", "🥈", "🥉"]
     for i, c in enumerate(benchmark_comparisons[:5]):
         medal = medals[i] if i < 3 else str(i+1)
         b = c.benchmark
         lines.append(
-            f"| {medal} | {b.icon} {b.short_name} | {c.overall_match_pct:.0f}% | "
-            f"{b.typography.get('scale_ratio', '?')} | {b.typography.get('base_size', '?')}px | "
-            f"{b.spacing.get('base', '?')}px |"
         )
     lines.append("")
     # Alignment changes needed
     if benchmark_advice and benchmark_advice.alignment_changes:
         lines.append("### 🔧 Changes to Align")
-        for change in benchmark_advice.alignment_changes[:3]:
-            lines.append(f"- **{change.get('change', '?')}**: {change.get('from', '?')} → {change.get('to', '?')} (effort: {change.get('effort', '?')})")
     return "\n".join(lines)
@@ -4410,6 +4473,29 @@ def create_ui():
                         "(brand, text, background, border, feedback).*",
                         elem_classes=["section-desc"])
             # LLM Recommendations Section (NEW)
             with gr.Accordion("🤖 LLM Color Recommendations", open=True):
                 gr.Markdown("*Four AI agents analyzed your colors: **Brand Identifier** (detects primary/secondary brand colors), "
@@ -4560,6 +4646,26 @@ def create_ui():
                         elem_classes=["section-desc"])
             export_output = gr.Code(label="Tokens JSON", language="json", lines=25)
             preview_colors_btn.click(
                 preview_color_classification,
                 inputs=[naming_convention],
@@ -4607,10 +4713,10 @@ def create_ui():
             inputs=[desktop_data],
             outputs=[colors_table, typography_table, spacing_table, radius_table],
         ).then(
-            fn=lambda: gr.update(open=True),
-            outputs=[stage1_accordion],
         )
         # Viewport toggle
         viewport_toggle.change(
             fn=switch_viewport,
@@ -4639,9 +4745,13 @@ def create_ui():
                 color_ramps_display,
                 radius_display,
                 shadows_display,
             ],
         )
         # Stage 2: Apply upgrades
         apply_upgrades_btn.click(
             fn=apply_selected_upgrades,

                             typography=data.get("typography", {}),
                             spacing=data.get("spacing", {}),
                             colors=data.get("colors", {}),
+                            radius=data.get("radius", {}),
+                            shadows=data.get("shadows", {}),
                             fetched_at=datetime.now().isoformat(),
                             confidence="fallback",
                             best_for=[],
             # Compare to benchmarks
             if benchmarks and rule_results:
+                # Count user's radius tiers and shadow levels for comparison
+                _user_radius_tiers = len(desktop_dict.get("radius", {}))
+                _user_shadow_levels = len(desktop_dict.get("shadows", {}))
+                _user_color_count = len(desktop_dict.get("colors", {}))
                 benchmark_comparisons = researcher.compare_to_benchmarks(
                     your_ratio=rule_results.typography.detected_ratio,
                     your_base_size=int(rule_results.typography.base_size) if rule_results.typography.sizes_px else 16,
                     your_spacing_grid=rule_results.spacing.detected_base,
                     benchmarks=benchmarks,
                     log_callback=state.log,
+                    your_color_count=_user_color_count,
+                    your_radius_tiers=_user_radius_tiers,
+                    your_shadow_levels=_user_shadow_levels,
                 )
                 state.benchmark_comparisons = benchmark_comparisons
                 state.log("")
                 "*Formatting error - color ramps unavailable*",  # color_ramps_md
                 "*Formatting error - radius tokens unavailable*",  # radius_md
                 "*Formatting error - shadow tokens unavailable*",  # shadows_md
+                "⚠️ Color preview unavailable due to formatting errors.",  # auto_color_preview
             )
+        # Auto-generate color classification preview
+        auto_color_preview = ""
+        try:
+            auto_color_preview = preview_color_classification("semantic")
+            state.log("   ✅ Color classification preview auto-generated (semantic convention)")
+        except Exception as cp_err:
+            state.log(f"   ⚠️ Auto color preview failed: {str(cp_err)}")
+            auto_color_preview = "⚠️ Color preview unavailable — click 'Preview Color Names' button to generate."
         progress(0.95, desc="✅ Complete!")
         # Final log summary
         state.log("")
         state.log("═" * 60)
             color_ramps_md,
             radius_md,
             shadows_md,
+            auto_color_preview,
         )
     except Exception as e:
 def create_stage2_error_response(error_msg: str):
+    """Create error response tuple for Stage 2 (must match 17 outputs)."""
     return (
         error_msg,
         state.get_logs(),
         "*Run analysis to see color ramps*",  # color_ramps_md
         "*Run analysis to see radius tokens*",  # radius_md
         "*Run analysis to see shadow tokens*",  # shadows_md
+        "",  # auto_color_preview
     )
 def format_benchmark_comparison_v2(benchmark_comparisons, benchmark_advice) -> str:
+    """Format benchmark comparison results — ALL 6 categories."""
     if not benchmark_comparisons:
         return "*No benchmark comparison available*"
     lines = []
+    lines.append("## 📊 Benchmark Comparison (6 Categories)")
     lines.append("")
     # Recommended benchmark
     if benchmark_advice and benchmark_advice.recommended_benchmark_name:
         lines.append(f"### 🏆 Recommended: {benchmark_advice.recommended_benchmark_name}")
         if benchmark_advice.reasoning:
+            lines.append(f"*{benchmark_advice.reasoning}*")
         lines.append("")
+    # Full comparison table with all 6 categories
     lines.append("### 📈 Similarity Ranking")
     lines.append("")
+    lines.append("| Rank | Design System | Overall | Type | Spacing | Colors | Radius | Shadows |")
+    lines.append("|------|---------------|---------|------|---------|--------|--------|---------|")
     medals = ["🥇", "🥈", "🥉"]
     for i, c in enumerate(benchmark_comparisons[:5]):
         medal = medals[i] if i < 3 else str(i+1)
         b = c.benchmark
+        def pct_icon(pct):
+            if pct >= 80: return f"✅ {pct:.0f}%"
+            elif pct >= 50: return f"🟡 {pct:.0f}%"
+            else: return f"🔴 {pct:.0f}%"
         lines.append(
+            f"| {medal} | {b.icon} {b.short_name} | **{c.overall_match_pct:.0f}%** | "
+            f"{pct_icon(c.type_match_pct)} | {pct_icon(c.spacing_match_pct)} | "
+            f"{pct_icon(c.color_match_pct)} | {pct_icon(c.radius_match_pct)} | "
+            f"{pct_icon(c.shadow_match_pct)} |"
         )
     lines.append("")
+    # Detailed per-category comparison for top benchmark
+    if benchmark_comparisons:
+        top = benchmark_comparisons[0]
+        b = top.benchmark
+        lines.append(f"### 🔍 Detailed: Your Site vs {b.icon} {b.short_name}")
+        lines.append("")
+        lines.append("| Category | Your Value | Benchmark | Gap | Match |")
+        lines.append("|----------|-----------|-----------|-----|-------|")
+        lines.append(f"| **Typography** | ratio {top.type_ratio_diff + b.typography.get('scale_ratio', 1.25):.2f} | ratio {b.typography.get('scale_ratio', '?')} | diff {top.type_ratio_diff:.2f} | {top.type_match_pct:.0f}% |")
+        lines.append(f"| **Base Size** | {top.base_size_diff + b.typography.get('base_size', 16)}px | {b.typography.get('base_size', '?')}px | diff {top.base_size_diff}px | — |")
+        lines.append(f"| **Spacing** | {top.spacing_grid_diff + b.spacing.get('base', 8)}px grid | {b.spacing.get('base', '?')}px grid | diff {top.spacing_grid_diff}px | {top.spacing_match_pct:.0f}% |")
+        lines.append(f"| **Colors** | — | {b.colors.get('palette_size', '?')} colors | {top.color_gap or 'N/A'} | {top.color_match_pct:.0f}% |")
+        b_radius = b.radius if hasattr(b, 'radius') and b.radius else {}
+        b_shadows = b.shadows if hasattr(b, 'shadows') and b.shadows else {}
+        lines.append(f"| **Radius** | — | {b_radius.get('tiers', '?')} tiers ({b_radius.get('strategy', '?')}) | {top.radius_gap or 'N/A'} | {top.radius_match_pct:.0f}% |")
+        lines.append(f"| **Shadows** | — | {b_shadows.get('levels', '?')} levels | {top.shadow_gap or 'N/A'} | {top.shadow_match_pct:.0f}% |")
+    lines.append("")
     # Alignment changes needed
     if benchmark_advice and benchmark_advice.alignment_changes:
         lines.append("### 🔧 Changes to Align")
+        for change in benchmark_advice.alignment_changes[:5]:
+            token_type = change.get('token_type', '')
+            icon = {"typography": "📐", "spacing": "📏", "colors": "🎨", "radius": "🔘", "shadows": "🌗"}.get(token_type, "🔧")
+            lines.append(f"- {icon} **{change.get('change', '?')}**: {change.get('from', '?')} → {change.get('to', '?')} (effort: {change.get('effort', '?')})")
+    lines.append("")
+    # Pros and cons
+    if benchmark_advice:
+        if benchmark_advice.pros_of_alignment:
+            lines.append("**✅ Pros of aligning:**")
+            for pro in benchmark_advice.pros_of_alignment[:3]:
+                lines.append(f"- {pro}")
+        if benchmark_advice.cons_of_alignment:
+            lines.append("")
+            lines.append("**⚠️ Considerations:**")
+            for con in benchmark_advice.cons_of_alignment[:3]:
+                lines.append(f"- {con}")
     return "\n".join(lines)
                         "(brand, text, background, border, feedback).*",
                         elem_classes=["section-desc"])
+            # ── Color Naming Convention Preview (visible BEFORE export) ──
+            with gr.Accordion("🏷️ Color Naming Convention — Preview Before Export", open=True):
+                gr.Markdown("**Choose how colors are named in your export.** Preview the classification to verify names before exporting. "
+                            "100% rule-based — no LLM involved. Change convention anytime and re-preview.",
+                            elem_classes=["section-desc"])
+                with gr.Row():
+                    naming_convention_stage2 = gr.Dropdown(
+                        choices=["semantic", "tailwind", "material"],
+                        value="semantic",
+                        label="🎨 Naming Convention",
+                        info="semantic = color.brand.primary | tailwind = brand-primary | material = color.brand.primary",
+                        scale=2,
+                    )
+                    preview_colors_btn_stage2 = gr.Button("👁️ Preview Color Names", variant="secondary", scale=1)
+                color_preview_output_stage2 = gr.Textbox(
+                    label="Color Classification Preview (Rule-Based — No LLM)",
+                    lines=18,
+                    max_lines=40,
+                    interactive=False,
+                    placeholder="Click 'Preview Color Names' above to see how colors will be named in the export. "
+                                "This runs AFTER extraction (Stage 1). No LLM cost.",
+                )
             # LLM Recommendations Section (NEW)
             with gr.Accordion("🤖 LLM Color Recommendations", open=True):
                 gr.Markdown("*Four AI agents analyzed your colors: **Brand Identifier** (detects primary/secondary brand colors), "
                         elem_classes=["section-desc"])
             export_output = gr.Code(label="Tokens JSON", language="json", lines=25)
+            # Stage 2 color naming preview (primary — visible before export)
+            preview_colors_btn_stage2.click(
+                preview_color_classification,
+                inputs=[naming_convention_stage2],
+                outputs=[color_preview_output_stage2],
+            )
+            # Sync naming convention: Stage 2 dropdown → Stage 3 dropdown
+            naming_convention_stage2.change(
+                lambda v: v,
+                inputs=[naming_convention_stage2],
+                outputs=[naming_convention],
+            )
+            # Stage 3 also syncs back
+            naming_convention.change(
+                lambda v: v,
+                inputs=[naming_convention],
+                outputs=[naming_convention_stage2],
+            )
+            # Stage 3 preview (kept for convenience)
             preview_colors_btn.click(
                 preview_color_classification,
                 inputs=[naming_convention],
             inputs=[desktop_data],
             outputs=[colors_table, typography_table, spacing_table, radius_table],
         ).then(
+            fn=lambda: (gr.update(open=True), gr.update(open=True)),
+            outputs=[stage1_accordion, stage2_accordion],
         )
         # Viewport toggle
         viewport_toggle.change(
             fn=switch_viewport,
                 color_ramps_display,
                 radius_display,
                 shadows_display,
+                color_preview_output_stage2,
             ],
+        ).then(
+            fn=lambda: gr.update(open=True),
+            outputs=[stage3_accordion],
         )
         # Stage 2: Apply upgrades
         apply_upgrades_btn.click(
             fn=apply_selected_upgrades,