riazmo Claude Opus 4.6 commited on
Commit
24adae3
·
1 Parent(s): e866214

v3.2: Transparent logging — full reasoning visibility

Browse files

- Remove all LLM reasoning truncation (90→unlimited, 60→unlimited)
- Add step-by-step reasoning to every rule engine check:
• Type scale: show ratio calculations between consecutive sizes
• Accessibility: explain WCAG criteria and pass/fail methodology
• Spacing: show GCD detection and grid alignment counts
• Color stats: explain dedup process and palette size assessment
• Radius: show base-4/base-8 alignment and strategy explanation
• Shadow: show elevation hierarchy analysis
- Add shadow insufficiency warnings with industry benchmarks
(Material 6 levels, Tailwind 6, Polaris 5, Carbon 4, Chakra 6)
and specific blur value suggestions for missing levels
- Add score breakdown in summary (component scores out of max)
- Increase log capacity from 100→500 entries
- Expand log textbox from 20→30 lines
- Show all failing colors with both white/black contrast ratios
- Expand post-validation display from 5→10 issues

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. agents/llm_agents.py +23 -8
  2. app.py +10 -8
  3. core/rule_engine.py +144 -23
agents/llm_agents.py CHANGED
@@ -272,15 +272,30 @@ def _fmt_shadows(tokens: dict, limit: int = 10) -> str:
272
 
273
 
274
  def _log_reasoning(steps: list, log_fn: Callable):
275
- """Log ReAct reasoning steps with icons."""
276
  icons = {"THINK": "🧠", "ACT": "⚡", "OBSERVE": "👁️", "VERIFY": "✅"}
277
  for step in (steps or []):
278
  if isinstance(step, dict):
279
  st = step.get("step", "?")
280
  area = step.get("area", "")
281
- content = step.get("content", "")[:90]
282
  icon = icons.get(st, "📝")
283
- log_fn(f" {icon} [{area}] {content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
 
286
  def _extract_hexes(tokens: dict) -> list:
@@ -448,10 +463,10 @@ Use ReAct for each area. Name EVERY color in naming_map."""
448
  log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')})")
449
  log(f" ├─ Palette: {result.palette_strategy} · Cohesion: {result.cohesion_score}/10")
450
  log(f" ├─ Colors Named: {len(result.naming_map)}/{len(input_hexes)}")
451
- log(f" ├─ Typography: {(result.typography_notes or 'N/A')[:60]}")
452
- log(f" ├─ Spacing: {(result.spacing_notes or 'N/A')[:60]}")
453
- log(f" ├─ Radius: {(result.radius_notes or 'N/A')[:60]}")
454
- log(f" ├─ Shadows: {(result.shadow_notes or 'N/A')[:60]}")
455
  log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FALLBACK'}")
456
  return result
457
 
@@ -1069,7 +1084,7 @@ Evaluate from TWO perspectives (Tree of Thought). Choose one. Recommend for ALL
1069
  log(f" ├─ Perspective A: {pa}/100")
1070
  log(f" ├─ Perspective B: {pb}/100")
1071
  log(f" ├─ Chosen: {result.chosen_perspective}")
1072
- log(f" ├─ Why: {(result.choice_reasoning or 'N/A')[:80]}")
1073
  log(f" ├─ Final Score: {result.scores.get('overall', '?')}/100" if result.scores else " ├─ Scores: N/A")
1074
  log(f" ├─ Actions: {len(result.top_3_actions)} | Color Recs: {len(result.color_recommendations)}")
1075
  log(f" ├─ Typography: {_s(result.type_scale_recommendation)}")
 
272
 
273
 
274
  def _log_reasoning(steps: list, log_fn: Callable):
275
+ """Log ReAct reasoning steps with full content (no truncation)."""
276
  icons = {"THINK": "🧠", "ACT": "⚡", "OBSERVE": "👁️", "VERIFY": "✅"}
277
  for step in (steps or []):
278
  if isinstance(step, dict):
279
  st = step.get("step", "?")
280
  area = step.get("area", "")
281
+ content = step.get("content", "")
282
  icon = icons.get(st, "📝")
283
+ # Show full reasoning — wrap long lines for readability
284
+ if len(content) > 120:
285
+ log_fn(f" {icon} [{st}] {area}:")
286
+ # Word-wrap at ~100 chars per line
287
+ words = content.split()
288
+ line = " "
289
+ for word in words:
290
+ if len(line) + len(word) + 1 > 105:
291
+ log_fn(line)
292
+ line = " " + word
293
+ else:
294
+ line = line + " " + word if line.strip() else " " + word
295
+ if line.strip():
296
+ log_fn(line)
297
+ else:
298
+ log_fn(f" {icon} [{st}] {area}: {content}")
299
 
300
 
301
  def _extract_hexes(tokens: dict) -> list:
 
463
  log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')})")
464
  log(f" ├─ Palette: {result.palette_strategy} · Cohesion: {result.cohesion_score}/10")
465
  log(f" ├─ Colors Named: {len(result.naming_map)}/{len(input_hexes)}")
466
+ log(f" ├─ Typography: {result.typography_notes or 'N/A'}")
467
+ log(f" ├─ Spacing: {result.spacing_notes or 'N/A'}")
468
+ log(f" ├─ Radius: {result.radius_notes or 'N/A'}")
469
+ log(f" ├─ Shadows: {result.shadow_notes or 'N/A'}")
470
  log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FALLBACK'}")
471
  return result
472
 
 
1084
  log(f" ├─ Perspective A: {pa}/100")
1085
  log(f" ├─ Perspective B: {pb}/100")
1086
  log(f" ├─ Chosen: {result.chosen_perspective}")
1087
+ log(f" ├─ Why: {result.choice_reasoning or 'N/A'}")
1088
  log(f" ├─ Final Score: {result.scores.get('overall', '?')}/100" if result.scores else " ├─ Scores: N/A")
1089
  log(f" ├─ Actions: {len(result.top_3_actions)} | Color Recs: {len(result.color_recommendations)}")
1090
  log(f" ├─ Typography: {_s(result.type_scale_recommendation)}")
app.py CHANGED
@@ -48,7 +48,7 @@ class AppState:
48
  def log(self, message: str):
49
  timestamp = datetime.now().strftime("%H:%M:%S")
50
  self.logs.append(f"[{timestamp}] {message}")
51
- if len(self.logs) > 100:
52
  self.logs.pop(0)
53
 
54
  def get_logs(self) -> str:
@@ -1159,13 +1159,15 @@ async def run_stage2_analysis_v2(
1159
  )
1160
  if pv_issues:
1161
  state.log("")
1162
- state.log(" POST-VALIDATION: Issues found")
1163
- for issue in pv_issues[:5]:
1164
- state.log(f" {issue}")
 
 
1165
  else:
1166
- state.log(" POST-VALIDATION: All checks passed")
1167
  except Exception as pv_err:
1168
- state.log(f" POST-VALIDATION error: {str(pv_err)[:80]}")
1169
 
1170
  # Create fallback synthesis if needed
1171
  if not final_synthesis:
@@ -4303,8 +4305,8 @@ def create_ui():
4303
  "Scroll through to see detailed statistics and individual agent outputs.*",
4304
  elem_classes=["section-desc"])
4305
  stage2_log = gr.Textbox(
4306
- label="Log",
4307
- lines=20,
4308
  interactive=False,
4309
  elem_classes=["log-container"]
4310
  )
 
48
  def log(self, message: str):
49
  timestamp = datetime.now().strftime("%H:%M:%S")
50
  self.logs.append(f"[{timestamp}] {message}")
51
+ if len(self.logs) > 500:
52
  self.logs.pop(0)
53
 
54
  def get_logs(self) -> str:
 
1159
  )
1160
  if pv_issues:
1161
  state.log("")
1162
+ state.log(f" POST-VALIDATION: {len(pv_issues)} issues found")
1163
+ for issue in pv_issues[:10]:
1164
+ state.log(f" ├─ {issue}")
1165
+ if len(pv_issues) > 10:
1166
+ state.log(f" └─ ... and {len(pv_issues) - 10} more")
1167
  else:
1168
+ state.log(" POST-VALIDATION: All checks passed")
1169
  except Exception as pv_err:
1170
+ state.log(f" POST-VALIDATION error: {str(pv_err)}")
1171
 
1172
  # Create fallback synthesis if needed
1173
  if not final_synthesis:
 
4305
  "Scroll through to see detailed statistics and individual agent outputs.*",
4306
  elem_classes=["section-desc"])
4307
  stage2_log = gr.Textbox(
4308
+ label="📋 Analysis Log (full step-by-step reasoning)",
4309
+ lines=30,
4310
  interactive=False,
4311
  elem_classes=["log-container"]
4312
  )
core/rule_engine.py CHANGED
@@ -988,7 +988,19 @@ def run_rule_engine(
988
  log(" 📐 TYPE SCALE ANALYSIS")
989
  log(" " + "─" * 40)
990
  typography = analyze_type_scale(typography_tokens)
991
-
 
 
 
 
 
 
 
 
 
 
 
 
992
  consistency_icon = "✅" if typography.is_consistent else "⚠️"
993
  log(f" ├─ Detected Ratio: {typography.detected_ratio:.3f}")
994
  log(f" ├─ Closest Standard: {typography.scale_name} ({typography.closest_standard_ratio})")
@@ -1003,14 +1015,23 @@ def run_rule_engine(
1003
  log(" ♿ ACCESSIBILITY CHECK (WCAG AA/AAA)")
1004
  log(" " + "─" * 40)
1005
  accessibility = analyze_accessibility(color_tokens, fg_bg_pairs=fg_bg_pairs)
1006
-
1007
  # Separate individual-color failures from real FG/BG pair failures
1008
  pair_failures = [a for a in accessibility if not a.passes_aa_normal and a.name.startswith("fg:")]
1009
  color_only_failures = [a for a in accessibility if not a.passes_aa_normal and not a.name.startswith("fg:")]
1010
  failures = [a for a in accessibility if not a.passes_aa_normal]
1011
  passes = len(accessibility) - len(failures)
1012
 
 
1013
  pair_count = len(fg_bg_pairs) if fg_bg_pairs else 0
 
 
 
 
 
 
 
 
1014
  log(f" ├─ Colors Analyzed: {len(accessibility)}")
1015
  log(f" ├─ FG/BG Pairs Checked: {pair_count}")
1016
  log(f" ├─ AA Pass: {passes} ✅")
@@ -1020,21 +1041,21 @@ def run_rule_engine(
1020
  if color_only_failures:
1021
  log(" │")
1022
  log(" │ ⚠️ FAILING COLORS (vs white/black):")
1023
- for i, f in enumerate(color_only_failures[:5]):
1024
  fix_info = f" → 💡 Fix: {f.suggested_fix} ({f.suggested_fix_contrast:.1f}:1)" if f.suggested_fix else ""
1025
- log(f" │ ├─ {f.name}: {f.hex_color} ({f.contrast_on_white:.1f}:1 on white){fix_info}")
1026
- if len(color_only_failures) > 5:
1027
- log(f" │ └─ ... and {len(color_only_failures) - 5} more")
1028
 
1029
  if pair_failures:
1030
  log(" │")
1031
  log(" │ ❌ FAILING FG/BG PAIRS (actual on-page combinations):")
1032
- for i, f in enumerate(pair_failures[:5]):
1033
  fix_info = f" → 💡 Fix: {f.suggested_fix} ({f.suggested_fix_contrast:.1f}:1)" if f.suggested_fix else ""
1034
  log(f" │ ├─ {f.name}{fix_info}")
1035
- if len(pair_failures) > 5:
1036
- log(f" │ └─ ... and {len(pair_failures) - 5} more")
1037
-
1038
  log("")
1039
 
1040
  # ─────────────────────────────────────────────────────────────
@@ -1043,15 +1064,24 @@ def run_rule_engine(
1043
  log(" 📏 SPACING GRID ANALYSIS")
1044
  log(" " + "─" * 40)
1045
  spacing = analyze_spacing_grid(spacing_tokens)
1046
-
 
 
 
 
 
 
 
 
 
1047
  alignment_icon = "✅" if spacing.is_aligned else "⚠️"
1048
  log(f" ├─ Detected Base: {spacing.detected_base}px")
1049
  log(f" ├─ Grid Aligned: {alignment_icon} {spacing.alignment_percentage:.0f}%")
1050
-
1051
  if spacing.misaligned_values:
1052
- log(f" ├─ Misaligned Values: {spacing.misaligned_values[:8]}{'...' if len(spacing.misaligned_values) > 8 else ''}")
1053
-
1054
- log(f" ├─ Suggested Scale: {spacing.suggested_scale[:10]}...")
1055
  log(f" └─ 💡 Recommendation: {spacing.recommendation}px ({spacing.recommendation_reason})")
1056
  log("")
1057
 
@@ -1061,16 +1091,33 @@ def run_rule_engine(
1061
  log(" 🎨 COLOR PALETTE STATISTICS")
1062
  log(" " + "─" * 40)
1063
  color_stats = analyze_color_statistics(color_tokens)
1064
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1065
  dup_icon = "⚠️" if color_stats.duplicate_count > 10 else "✅"
1066
  unique_icon = "⚠️" if color_stats.unique_count > 30 else "✅"
1067
-
1068
  log(f" ├─ Total Colors: {color_stats.total_count}")
1069
  log(f" ├─ Unique Colors: {color_stats.unique_count} {unique_icon}")
1070
  log(f" ├─ Exact Duplicates: {color_stats.duplicate_count} {dup_icon}")
1071
  log(f" ├─ Near-Duplicates: {len(color_stats.near_duplicates)}")
1072
  log(f" ├─ Grays: {color_stats.gray_count} | Saturated: {color_stats.saturated_count}")
1073
- log(f" └─ Hue Distribution: {dict(list(color_stats.hue_distribution.items())[:5])}...")
1074
  log("")
1075
 
1076
  # ─────────────────────────────────────────────────────────────
@@ -1080,8 +1127,25 @@ def run_rule_engine(
1080
  if radius_result.tier_count > 0:
1081
  log(" 🔘 RADIUS GRID ANALYSIS")
1082
  log(" " + "─" * 40)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1083
  align_icon = "✅" if radius_result.alignment_pct >= 80 else "⚠️"
1084
- log(f" ├─ Tiers: {radius_result.tier_count} | Values: {radius_result.values_px[:8]}")
1085
  log(f" ├─ Grid: base-{radius_result.grid_base} | Aligned: {align_icon} {radius_result.alignment_pct:.0f}%")
1086
  log(f" ├─ Strategy: {radius_result.strategy} | Has full: {radius_result.has_full}")
1087
  log(f" └─ Base-4: {radius_result.base_4_aligned}/{radius_result.tier_count} | Base-8: {radius_result.base_8_aligned}/{radius_result.tier_count}")
@@ -1091,16 +1155,66 @@ def run_rule_engine(
1091
  # v3: Shadow Elevation Analysis
1092
  # ─────────────────────────────────────────────────────────────
1093
  shadow_result = analyze_shadow_elevation(shadow_tokens or {})
 
 
1094
  if shadow_result.level_count > 0:
1095
- log(" 🌗 SHADOW ELEVATION ANALYSIS")
1096
- log(" " + "─" * 40)
 
 
 
 
 
 
 
1097
  mono_icon = "✅" if shadow_result.is_monotonic else "⚠️"
1098
  color_icon = "✅" if shadow_result.color_consistent else "⚠️"
1099
  log(f" ├─ Levels: {shadow_result.level_count} | Blur: {shadow_result.blur_values}")
1100
  log(f" ├─ Monotonic Blur: {mono_icon} {'Yes' if shadow_result.is_monotonic else 'No — progression is non-linear'}")
1101
  log(f" ├─ Color Consistent: {color_icon} {'Yes' if shadow_result.color_consistent else 'No — mixed shadow colors'}")
1102
- log(f" ─ Verdict: {shadow_result.elevation_verdict}")
1103
- log("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1104
 
1105
  # ───────────────────────────────���─────────────────────────────
1106
  # Calculate Summary Scores
@@ -1119,6 +1233,13 @@ def run_rule_engine(
1119
  log(" " + "─" * 40)
1120
  log(f" RULE ENGINE SUMMARY")
1121
  log(f" ├─ Consistency Score: {consistency_score}/100")
 
 
 
 
 
 
 
1122
  log(f" ├─ AA Failures: {len(failures)}")
1123
  log(f" ├─ Radius: {radius_result.tier_count} tiers ({radius_result.strategy})")
1124
  log(f" ├─ Shadows: {shadow_result.level_count} levels ({shadow_result.elevation_verdict})")
 
988
  log(" 📐 TYPE SCALE ANALYSIS")
989
  log(" " + "─" * 40)
990
  typography = analyze_type_scale(typography_tokens)
991
+
992
+ # Step-by-step reasoning
993
+ if typography.sizes_px and len(typography.sizes_px) >= 2:
994
+ sizes = sorted(typography.sizes_px)
995
+ log(f" │ Step 1: Found {len(sizes)} font sizes: {sizes}")
996
+ if len(sizes) >= 2:
997
+ ratios = [round(sizes[i+1]/sizes[i], 3) for i in range(len(sizes)-1) if sizes[i] > 0]
998
+ log(f" │ Step 2: Computed ratios between consecutive sizes: {ratios[:8]}{'...' if len(ratios) > 8 else ''}")
999
+ if ratios:
1000
+ avg_ratio = sum(ratios) / len(ratios)
1001
+ log(f" │ Step 3: Average ratio = {avg_ratio:.3f}, variance = {typography.variance:.3f}")
1002
+ log(f" │ Step 4: {'Variance ≤ 0.15 → consistent ✅' if typography.is_consistent else f'Variance {typography.variance:.3f} > 0.15 → inconsistent ⚠️'}")
1003
+
1004
  consistency_icon = "✅" if typography.is_consistent else "⚠️"
1005
  log(f" ├─ Detected Ratio: {typography.detected_ratio:.3f}")
1006
  log(f" ├─ Closest Standard: {typography.scale_name} ({typography.closest_standard_ratio})")
 
1015
  log(" ♿ ACCESSIBILITY CHECK (WCAG AA/AAA)")
1016
  log(" " + "─" * 40)
1017
  accessibility = analyze_accessibility(color_tokens, fg_bg_pairs=fg_bg_pairs)
1018
+
1019
  # Separate individual-color failures from real FG/BG pair failures
1020
  pair_failures = [a for a in accessibility if not a.passes_aa_normal and a.name.startswith("fg:")]
1021
  color_only_failures = [a for a in accessibility if not a.passes_aa_normal and not a.name.startswith("fg:")]
1022
  failures = [a for a in accessibility if not a.passes_aa_normal]
1023
  passes = len(accessibility) - len(failures)
1024
 
1025
+ # Step-by-step reasoning
1026
  pair_count = len(fg_bg_pairs) if fg_bg_pairs else 0
1027
+ log(f" │ Step 1: Testing each color against white (#fff) and black (#000)")
1028
+ log(f" │ Step 2: WCAG AA requires ≥4.5:1 for normal text, ≥3.0:1 for large text")
1029
+ log(f" │ Step 3: A color passes if it achieves ≥4.5:1 against EITHER white or black")
1030
+ if pair_count > 0:
1031
+ log(f" │ Step 4: Also testing {pair_count} real foreground/background pairs from the page")
1032
+ pass_rate = round(passes / max(len(accessibility), 1) * 100)
1033
+ log(f" │ Result: {passes}/{len(accessibility)} pass ({pass_rate}%)")
1034
+
1035
  log(f" ├─ Colors Analyzed: {len(accessibility)}")
1036
  log(f" ├─ FG/BG Pairs Checked: {pair_count}")
1037
  log(f" ├─ AA Pass: {passes} ✅")
 
1041
  if color_only_failures:
1042
  log(" │")
1043
  log(" │ ⚠️ FAILING COLORS (vs white/black):")
1044
+ for i, f in enumerate(color_only_failures[:8]):
1045
  fix_info = f" → 💡 Fix: {f.suggested_fix} ({f.suggested_fix_contrast:.1f}:1)" if f.suggested_fix else ""
1046
+ log(f" │ ├─ {f.name}: {f.hex_color} (white:{f.contrast_on_white:.1f}:1, black:{f.contrast_on_black:.1f}:1){fix_info}")
1047
+ if len(color_only_failures) > 8:
1048
+ log(f" │ └─ ... and {len(color_only_failures) - 8} more")
1049
 
1050
  if pair_failures:
1051
  log(" │")
1052
  log(" │ ❌ FAILING FG/BG PAIRS (actual on-page combinations):")
1053
+ for i, f in enumerate(pair_failures[:8]):
1054
  fix_info = f" → 💡 Fix: {f.suggested_fix} ({f.suggested_fix_contrast:.1f}:1)" if f.suggested_fix else ""
1055
  log(f" │ ├─ {f.name}{fix_info}")
1056
+ if len(pair_failures) > 8:
1057
+ log(f" │ └─ ... and {len(pair_failures) - 8} more")
1058
+
1059
  log("")
1060
 
1061
  # ─────────────────────────────────────────────────────────────
 
1064
  log(" 📏 SPACING GRID ANALYSIS")
1065
  log(" " + "─" * 40)
1066
  spacing = analyze_spacing_grid(spacing_tokens)
1067
+
1068
+ # Step-by-step reasoning
1069
+ log(f" │ Step 1: Extracted all spacing values (margin, padding, gap)")
1070
+ log(f" │ Step 2: Detected base unit via GCD: {spacing.detected_base}px")
1071
+ aligned_count = round(spacing.alignment_percentage / 100 * max(len(spacing_tokens), 1))
1072
+ total = max(len(spacing_tokens), 1)
1073
+ log(f" │ Step 3: Checking divisibility: {aligned_count}/{total} values are multiples of {spacing.detected_base}px")
1074
+ if spacing.misaligned_values:
1075
+ log(f" │ Step 4: Off-grid values: {spacing.misaligned_values[:10]}{'...' if len(spacing.misaligned_values) > 10 else ''}")
1076
+
1077
  alignment_icon = "✅" if spacing.is_aligned else "⚠️"
1078
  log(f" ├─ Detected Base: {spacing.detected_base}px")
1079
  log(f" ├─ Grid Aligned: {alignment_icon} {spacing.alignment_percentage:.0f}%")
1080
+
1081
  if spacing.misaligned_values:
1082
+ log(f" ├─ Misaligned Values: {spacing.misaligned_values[:10]}{'...' if len(spacing.misaligned_values) > 10 else ''}")
1083
+
1084
+ log(f" ├─ Suggested Scale: {spacing.suggested_scale[:12]}{'...' if len(spacing.suggested_scale) > 12 else ''}")
1085
  log(f" └─ 💡 Recommendation: {spacing.recommendation}px ({spacing.recommendation_reason})")
1086
  log("")
1087
 
 
1091
  log(" 🎨 COLOR PALETTE STATISTICS")
1092
  log(" " + "─" * 40)
1093
  color_stats = analyze_color_statistics(color_tokens)
1094
+
1095
+ # Step-by-step reasoning
1096
+ log(f" │ Step 1: Counted {color_stats.total_count} total color tokens from extraction")
1097
+ log(f" │ Step 2: After exact-hex dedup: {color_stats.unique_count} unique colors")
1098
+ if color_stats.duplicate_count > 0:
1099
+ log(f" │ Step 3: Found {color_stats.duplicate_count} exact duplicates (same hex, different usage)")
1100
+ if len(color_stats.near_duplicates) > 0:
1101
+ log(f" │ Step 4: Found {len(color_stats.near_duplicates)} near-duplicate pairs (RGB distance < 10)")
1102
+ for nd in color_stats.near_duplicates[:3]:
1103
+ if isinstance(nd, (tuple, list)) and len(nd) >= 2:
1104
+ log(f" │ └─ {nd[0]} ≈ {nd[1]}")
1105
+ if color_stats.unique_count > 30:
1106
+ log(f" │ ⚠️ {color_stats.unique_count} unique colors is high — most design systems use 15-25")
1107
+ elif color_stats.unique_count < 8:
1108
+ log(f" │ ⚠️ Only {color_stats.unique_count} unique colors — may need more semantic variety")
1109
+ else:
1110
+ log(f" │ ✅ {color_stats.unique_count} unique colors — reasonable palette size")
1111
+
1112
  dup_icon = "⚠️" if color_stats.duplicate_count > 10 else "✅"
1113
  unique_icon = "⚠️" if color_stats.unique_count > 30 else "✅"
1114
+
1115
  log(f" ├─ Total Colors: {color_stats.total_count}")
1116
  log(f" ├─ Unique Colors: {color_stats.unique_count} {unique_icon}")
1117
  log(f" ├─ Exact Duplicates: {color_stats.duplicate_count} {dup_icon}")
1118
  log(f" ├─ Near-Duplicates: {len(color_stats.near_duplicates)}")
1119
  log(f" ├─ Grays: {color_stats.gray_count} | Saturated: {color_stats.saturated_count}")
1120
+ log(f" └─ Hue Distribution: {dict(list(color_stats.hue_distribution.items())[:7])}{'...' if len(color_stats.hue_distribution) > 7 else ''}")
1121
  log("")
1122
 
1123
  # ─────────────────────────────────────────────────────────────
 
1127
  if radius_result.tier_count > 0:
1128
  log(" 🔘 RADIUS GRID ANALYSIS")
1129
  log(" " + "─" * 40)
1130
+ # Step-by-step reasoning
1131
+ log(f" │ Step 1: Found {radius_result.tier_count} unique radius values: {radius_result.values_px[:10]}{'...' if len(radius_result.values_px) > 10 else ''}")
1132
+ log(f" │ Step 2: Checking base-4 alignment: {radius_result.base_4_aligned}/{radius_result.tier_count} values divisible by 4")
1133
+ log(f" │ Step 3: Checking base-8 alignment: {radius_result.base_8_aligned}/{radius_result.tier_count} values divisible by 8")
1134
+ grid_choice = "base-4" if radius_result.base_4_aligned >= radius_result.base_8_aligned else "base-8"
1135
+ log(f" │ Step 4: Best fit grid: {grid_choice} ({radius_result.alignment_pct:.0f}% aligned)")
1136
+ if radius_result.has_full:
1137
+ log(f" │ Step 5: Full radius (9999px/50%) detected — used for pills/circles ✅")
1138
+ strategy_explanation = {
1139
+ "tight": "small range (1-8px), subtle rounding",
1140
+ "moderate": "medium range, balanced approach",
1141
+ "expressive": "wide range including large radii, expressive design",
1142
+ "mixed": "inconsistent strategy, values don't follow clear pattern",
1143
+ }
1144
+ strat_desc = strategy_explanation.get(radius_result.strategy, radius_result.strategy)
1145
+ log(f" │ Strategy: {radius_result.strategy} — {strat_desc}")
1146
+
1147
  align_icon = "✅" if radius_result.alignment_pct >= 80 else "⚠️"
1148
+ log(f" ├─ Tiers: {radius_result.tier_count} | Values: {radius_result.values_px[:10]}")
1149
  log(f" ├─ Grid: base-{radius_result.grid_base} | Aligned: {align_icon} {radius_result.alignment_pct:.0f}%")
1150
  log(f" ├─ Strategy: {radius_result.strategy} | Has full: {radius_result.has_full}")
1151
  log(f" └─ Base-4: {radius_result.base_4_aligned}/{radius_result.tier_count} | Base-8: {radius_result.base_8_aligned}/{radius_result.tier_count}")
 
1155
  # v3: Shadow Elevation Analysis
1156
  # ─────────────────────────────────────────────────────────────
1157
  shadow_result = analyze_shadow_elevation(shadow_tokens or {})
1158
+ log(" 🌗 SHADOW ELEVATION ANALYSIS")
1159
+ log(" " + "─" * 40)
1160
  if shadow_result.level_count > 0:
1161
+ # Step-by-step reasoning
1162
+ log(f" │ Step 1: Found {shadow_result.level_count} shadow definitions")
1163
+ log(f" │ Step 2: Sorted by blur radius: {shadow_result.blur_values}")
1164
+ if shadow_result.is_monotonic:
1165
+ log(f" │ Step 3: Blur values increase monotonically ✅ (proper elevation hierarchy)")
1166
+ else:
1167
+ log(f" │ Step 3: Blur values are NOT monotonic ⚠️ (shadows don't form proper hierarchy)")
1168
+ log(f" │ Step 4: Shadow colors {'are consistent ✅' if shadow_result.color_consistent else 'vary ⚠️ — should use same base color with different alpha'}")
1169
+
1170
  mono_icon = "✅" if shadow_result.is_monotonic else "⚠️"
1171
  color_icon = "✅" if shadow_result.color_consistent else "⚠️"
1172
  log(f" ├─ Levels: {shadow_result.level_count} | Blur: {shadow_result.blur_values}")
1173
  log(f" ├─ Monotonic Blur: {mono_icon} {'Yes' if shadow_result.is_monotonic else 'No — progression is non-linear'}")
1174
  log(f" ├─ Color Consistent: {color_icon} {'Yes' if shadow_result.color_consistent else 'No — mixed shadow colors'}")
1175
+ log(f" ─ Verdict: {shadow_result.elevation_verdict}")
1176
+
1177
+ # Specific recommendations for insufficient levels
1178
+ if shadow_result.level_count < 4:
1179
+ log(f" │")
1180
+ log(f" │ ⚠️ INSUFFICIENT SHADOW LEVELS ({shadow_result.level_count} found, 4-6 recommended)")
1181
+ log(f" │ Industry standard elevation systems:")
1182
+ log(f" │ ├─ Material Design: 6 levels (0dp–24dp)")
1183
+ log(f" │ ├─ Tailwind CSS: 6 levels (sm, DEFAULT, md, lg, xl, 2xl)")
1184
+ log(f" │ ├─ Shopify Polaris: 5 levels (transparent–500)")
1185
+ log(f" │ ├─ IBM Carbon: 4 levels (sm, md, lg, xl)")
1186
+ log(f" │ └─ Chakra UI: 6 levels (xs, sm, md, lg, xl, 2xl)")
1187
+ log(f" │")
1188
+ log(f" │ 💡 Recommendation: Add {4 - shadow_result.level_count} more shadow levels for a complete elevation system.")
1189
+ log(f" │ Suggested additions (blur values):")
1190
+ # Generate suggested blur values based on what exists
1191
+ existing = shadow_result.blur_values
1192
+ if len(existing) == 1:
1193
+ suggested = [round(existing[0] * 0.5, 1), round(existing[0] * 2, 1), round(existing[0] * 4, 1)]
1194
+ log(f" │ ├─ xs: {suggested[0]}px blur (subtle)")
1195
+ log(f" │ ├─ md: {suggested[1]}px blur (cards/dropdowns)")
1196
+ log(f" │ └─ lg: {suggested[2]}px blur (modals/overlays)")
1197
+ elif len(existing) == 2:
1198
+ mid = round((existing[0] + existing[1]) / 2, 1)
1199
+ large = round(existing[1] * 2, 1)
1200
+ log(f" │ ├─ md: {mid}px blur (between existing levels)")
1201
+ log(f" │ └─ lg: {large}px blur (modals/overlays)")
1202
+ elif len(existing) == 3:
1203
+ large = round(existing[-1] * 1.5, 1)
1204
+ log(f" │ └─ xl: {large}px blur (maximum elevation)")
1205
+ elif not shadow_result.is_monotonic:
1206
+ log(f" │")
1207
+ log(f" │ 💡 Recommendation: Re-order shadows so blur increases with elevation level.")
1208
+ log(f" │ Current blur order: {shadow_result.blur_values}")
1209
+ log(f" │ Expected: monotonically increasing (e.g., 2→4→8→16→24)")
1210
+
1211
+ log(f" └─ Score Impact: {'10/10 (good)' if shadow_result.elevation_verdict == 'good' else '5/10 (partial)' if shadow_result.level_count >= 3 else '2/10 (insufficient)'}")
1212
+ else:
1213
+ log(f" │ No shadow tokens found in extraction.")
1214
+ log(f" │ ⚠️ Most design systems define 4-6 shadow levels for elevation hierarchy.")
1215
+ log(f" │ This site may use flat design or shadows weren't captured.")
1216
+ log(f" └─ Score Impact: 2/10 (no shadows)")
1217
+ log("")
1218
 
1219
  # ───────────────────────────────���─────────────────────────────
1220
  # Calculate Summary Scores
 
1233
  log(" " + "─" * 40)
1234
  log(f" RULE ENGINE SUMMARY")
1235
  log(f" ├─ Consistency Score: {consistency_score}/100")
1236
+ log(f" │ Breakdown:")
1237
+ log(f" │ ├─ Type Scale: {type_score:.0f}/20 {'✅' if type_score >= 15 else '⚠️'}")
1238
+ log(f" │ ├─ Accessibility: {aa_score:.0f}/20 {'✅' if aa_score >= 15 else '⚠️' if aa_score >= 10 else '❌'}")
1239
+ log(f" │ ├─ Spacing Grid: {spacing_score:.0f}/20 {'✅' if spacing_score >= 15 else '⚠️'}")
1240
+ log(f" │ ├─ Color Palette: {color_score:.0f}/20 {'✅' if color_score >= 15 else '⚠️'}")
1241
+ log(f" │ ├─ Radius: {radius_score:.0f}/10 {'✅' if radius_score >= 7 else '⚠️'}")
1242
+ log(f" │ └─ Shadows: {shadow_score:.0f}/10 {'✅' if shadow_score >= 7 else '⚠️'}")
1243
  log(f" ├─ AA Failures: {len(failures)}")
1244
  log(f" ├─ Radius: {radius_result.tier_count} tiers ({radius_result.strategy})")
1245
  log(f" ├─ Shadows: {shadow_result.level_count} levels ({shadow_result.elevation_verdict})")