Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

riazmo commited on Feb 2

Commit

5cc6b41

verified ·

1 Parent(s): 13b3320

Delete agents/llm_agents.py

Browse files

Files changed (1) hide show

agents/llm_agents.py +0 -904

agents/llm_agents.py DELETED Viewed

@@ -1,904 +0,0 @@
-"""
-Stage 2 LLM Agents — Specialized Analysis Tasks
-=================================================
-These agents handle tasks that REQUIRE LLM reasoning:
-- Brand Identifier: Identify brand colors from usage context
-- Benchmark Advisor: Recommend best-fit design system
-- Best Practices Validator: Prioritize fixes by business impact
-- HEAD Synthesizer: Combine all outputs into final recommendations
-Each agent has a focused prompt for its specific task.
-"""
-import json
-import re
-from dataclasses import dataclass, field
-from typing import Optional, Callable, Any
-from datetime import datetime
-# =============================================================================
-# DATA CLASSES
-# =============================================================================
-@dataclass
-class BrandIdentification:
-    """Results from Brand Identifier agent."""
-    brand_primary: dict = field(default_factory=dict)
-    # {color, confidence, reasoning, usage_count}
-    brand_secondary: dict = field(default_factory=dict)
-    brand_accent: dict = field(default_factory=dict)
-    palette_strategy: str = ""  # complementary, analogous, triadic, monochromatic, random
-    cohesion_score: int = 5  # 1-10
-    cohesion_notes: str = ""
-    semantic_names: dict = field(default_factory=dict)
-    # {hex_color: suggested_name}
-    def to_dict(self) -> dict:
-        return {
-            "brand_primary": self.brand_primary,
-            "brand_secondary": self.brand_secondary,
-            "brand_accent": self.brand_accent,
-            "palette_strategy": self.palette_strategy,
-            "cohesion_score": self.cohesion_score,
-            "cohesion_notes": self.cohesion_notes,
-            "semantic_names": self.semantic_names,
-        }
-@dataclass
-class BenchmarkAdvice:
-    """Results from Benchmark Advisor agent."""
-    recommended_benchmark: str = ""
-    recommended_benchmark_name: str = ""
-    reasoning: str = ""
-    alignment_changes: list = field(default_factory=list)
-    # [{change, from, to, effort}]
-    pros_of_alignment: list = field(default_factory=list)
-    cons_of_alignment: list = field(default_factory=list)
-    alternative_benchmarks: list = field(default_factory=list)
-    # [{name, reason}]
-    def to_dict(self) -> dict:
-        return {
-            "recommended_benchmark": self.recommended_benchmark,
-            "recommended_benchmark_name": self.recommended_benchmark_name,
-            "reasoning": self.reasoning,
-            "alignment_changes": self.alignment_changes,
-            "pros": self.pros_of_alignment,
-            "cons": self.cons_of_alignment,
-            "alternatives": self.alternative_benchmarks,
-        }
-@dataclass
-class BestPracticesResult:
-    """Results from Best Practices Validator agent."""
-    overall_score: int = 50  # 0-100
-    checks: dict = field(default_factory=dict)
-    # {check_name: {status: pass/warn/fail, note: str}}
-    priority_fixes: list = field(default_factory=list)
-    # [{rank, issue, impact, effort, action}]
-    passing_practices: list = field(default_factory=list)
-    failing_practices: list = field(default_factory=list)
-    def to_dict(self) -> dict:
-        return {
-            "overall_score": self.overall_score,
-            "checks": self.checks,
-            "priority_fixes": self.priority_fixes,
-            "passing": self.passing_practices,
-            "failing": self.failing_practices,
-        }
-@dataclass
-class HeadSynthesis:
-    """Final synthesized output from HEAD agent."""
-    executive_summary: str = ""
-    scores: dict = field(default_factory=dict)
-    # {overall, accessibility, consistency, organization}
-    benchmark_fit: dict = field(default_factory=dict)
-    # {closest, similarity, recommendation}
-    brand_analysis: dict = field(default_factory=dict)
-    # {primary, secondary, cohesion}
-    top_3_actions: list = field(default_factory=list)
-    # [{action, impact, effort, details}]
-    color_recommendations: list = field(default_factory=list)
-    # [{role, current, suggested, reason, accept}]
-    type_scale_recommendation: dict = field(default_factory=dict)
-    spacing_recommendation: dict = field(default_factory=dict)
-    def to_dict(self) -> dict:
-        return {
-            "executive_summary": self.executive_summary,
-            "scores": self.scores,
-            "benchmark_fit": self.benchmark_fit,
-            "brand_analysis": self.brand_analysis,
-            "top_3_actions": self.top_3_actions,
-            "color_recommendations": self.color_recommendations,
-            "type_scale_recommendation": self.type_scale_recommendation,
-            "spacing_recommendation": self.spacing_recommendation,
-        }
-# =============================================================================
-# BRAND IDENTIFIER AGENT
-# =============================================================================
-class BrandIdentifierAgent:
-    """
-    Identifies brand colors from usage context.
-    WHY LLM: Requires understanding context (33 buttons = likely brand primary),
-    not just color math.
-    """
-    PROMPT_TEMPLATE = """You are a senior design system analyst. Identify the brand colors from this color usage data.
-## COLOR DATA WITH USAGE CONTEXT
-{color_data}
-## SEMANTIC ANALYSIS (from CSS properties)
-{semantic_analysis}
-## YOUR TASK
-1. **Identify Brand Colors**:
-   - Brand Primary: The main action/CTA color (highest visibility)
-   - Brand Secondary: Supporting brand color
-   - Brand Accent: Highlight color for emphasis
-2. **Assess Palette Strategy**:
-   - Is it complementary, analogous, triadic, monochromatic, or random?
-3. **Rate Cohesion** (1-10):
-   - Do the colors work together?
-   - Is there a clear color story?
-4. **Suggest Semantic Names** for top 10 most-used colors
-## OUTPUT FORMAT (JSON only)
-{{
-  "brand_primary": {{
-    "color": "#hex",
-    "confidence": "high|medium|low",
-    "reasoning": "Why this is brand primary",
-    "usage_count": <number>
-  }},
-  "brand_secondary": {{
-    "color": "#hex",
-    "confidence": "high|medium|low",
-    "reasoning": "..."
-  }},
-  "brand_accent": {{
-    "color": "#hex or null",
-    "confidence": "...",
-    "reasoning": "..."
-  }},
-  "palette_strategy": "complementary|analogous|triadic|monochromatic|random",
-  "cohesion_score": <1-10>,
-  "cohesion_notes": "Assessment of how well colors work together",
-  "semantic_names": {{
-    "#hex1": "brand.primary",
-    "#hex2": "text.primary",
-    "#hex3": "background.primary"
-  }}
-}}
-Return ONLY valid JSON."""
-    def __init__(self, hf_client):
-        self.hf_client = hf_client
-    async def analyze(
-        self,
-        color_tokens: dict,
-        semantic_analysis: dict,
-        log_callback: Callable = None,
-    ) -> BrandIdentification:
-        """
-        Identify brand colors from usage context.
-        Args:
-            color_tokens: Dict of color tokens with usage data
-            semantic_analysis: Semantic categorization from Stage 1
-            log_callback: Progress logging function
-        Returns:
-            BrandIdentification with identified colors
-        """
-        def log(msg: str):
-            if log_callback:
-                log_callback(msg)
-        log("   🎨 Brand Identifier (Llama 70B)")
-        log("   └─ Analyzing color context and usage patterns...")
-        # Format color data
-        color_data = self._format_color_data(color_tokens)
-        semantic_str = self._format_semantic_analysis(semantic_analysis)
-        prompt = self.PROMPT_TEMPLATE.format(
-            color_data=color_data,
-            semantic_analysis=semantic_str,
-        )
-        try:
-            start_time = datetime.now()
-            # Use the correct method signature
-            response = await self.hf_client.complete_async(
-                agent_name="brand_identifier",
-                system_prompt="You are a senior design system analyst specializing in brand color identification.",
-                user_message=prompt,
-                max_tokens=800,
-                json_mode=True,
-            )
-            duration = (datetime.now() - start_time).total_seconds()
-            # Parse response
-            result = self._parse_response(response)
-            log(f"   ────────────────────────────────────────────────")
-            log(f"   🎨 Brand Identifier: COMPLETE ({duration:.1f}s)")
-            log(f"   ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')} confidence)")
-            log(f"   ├─ Brand Secondary: {result.brand_secondary.get('color', '?')}")
-            log(f"   ├─ Palette Strategy: {result.palette_strategy}")
-            log(f"   └─ Cohesion Score: {result.cohesion_score}/10")
-            return result
-        except Exception as e:
-            error_msg = str(e)
-            # Always log full error for diagnosis
-            log(f"   ⚠️ Brand Identifier failed: {error_msg[:120]}")
-            if "gated" in error_msg.lower() or "access" in error_msg.lower():
-                log(f"   └─ Model may require license acceptance at huggingface.co")
-            elif "Rate limit" in error_msg or "429" in error_msg:
-                log(f"   └─ HF free tier rate limit — wait or upgrade to Pro")
-            return BrandIdentification()
-    def _format_color_data(self, color_tokens: dict) -> str:
-        """Format color tokens for prompt."""
-        lines = []
-        for name, token in list(color_tokens.items())[:30]:
-            if isinstance(token, dict):
-                hex_val = token.get("value", token.get("hex", ""))
-                usage = token.get("usage_count", token.get("count", 1))
-                context = token.get("context", token.get("css_property", ""))
-            else:
-                hex_val = getattr(token, "value", "")
-                usage = getattr(token, "usage_count", 1)
-                context = getattr(token, "context", "")
-            if hex_val:
-                lines.append(f"- {hex_val}: used {usage}x, context: {context or 'unknown'}")
-        return "\n".join(lines) if lines else "No color data available"
-    def _format_semantic_analysis(self, semantic: dict) -> str:
-        """Format semantic analysis for prompt."""
-        if not semantic:
-            return "No semantic analysis available"
-        lines = []
-        try:
-            for category, value in semantic.items():
-                if not value:
-                    continue
-                if isinstance(value, list):
-                    # List of colors
-                    color_list = []
-                    for c in value[:5]:
-                        if isinstance(c, dict):
-                            color_list.append(c.get("hex", c.get("value", str(c))))
-                        else:
-                            color_list.append(str(c))
-                    lines.append(f"- {category}: {', '.join(color_list)}")
-                elif isinstance(value, dict):
-                    # Could be a nested dict of sub-roles → color dicts
-                    # e.g. {"primary": {"hex": "#007bff", ...}, "secondary": {...}}
-                    # or a flat color dict {"hex": "#...", "confidence": "..."}
-                    # or a summary dict {"total_colors_analyzed": 50, ...}
-                    if "hex" in value:
-                        # Flat color dict
-                        lines.append(f"- {category}: {value['hex']}")
-                    else:
-                        # Nested dict — iterate sub-roles
-                        sub_items = []
-                        for sub_role, sub_val in list(value.items())[:5]:
-                            if isinstance(sub_val, dict) and "hex" in sub_val:
-                                sub_items.append(f"{sub_role}={sub_val['hex']}")
-                            elif isinstance(sub_val, (str, int, float, bool)):
-                                sub_items.append(f"{sub_role}={sub_val}")
-                        if sub_items:
-                            lines.append(f"- {category}: {', '.join(sub_items)}")
-                else:
-                    lines.append(f"- {category}: {value}")
-        except Exception as e:
-            return f"Error formatting semantic analysis: {str(e)[:50]}"
-        return "\n".join(lines) if lines else "No semantic analysis available"
-    def _parse_response(self, response: str) -> BrandIdentification:
-        """Parse LLM response into BrandIdentification."""
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                data = json.loads(json_match.group())
-                return BrandIdentification(
-                    brand_primary=data.get("brand_primary", {}),
-                    brand_secondary=data.get("brand_secondary", {}),
-                    brand_accent=data.get("brand_accent", {}),
-                    palette_strategy=data.get("palette_strategy", "unknown"),
-                    cohesion_score=data.get("cohesion_score", 5),
-                    cohesion_notes=data.get("cohesion_notes", ""),
-                    semantic_names=data.get("semantic_names", {}),
-                )
-        except Exception:
-            pass
-        return BrandIdentification()
-# =============================================================================
-# BENCHMARK ADVISOR AGENT
-# =============================================================================
-class BenchmarkAdvisorAgent:
-    """
-    Recommends best-fit design system based on comparison data.
-    WHY LLM: Requires reasoning about trade-offs and use-case fit,
-    not just similarity scores.
-    """
-    PROMPT_TEMPLATE = """You are a senior design system consultant. Recommend the best design system alignment.
-## USER'S CURRENT VALUES
-- Type Scale Ratio: {user_ratio}
-- Base Font Size: {user_base}px
-- Spacing Grid: {user_spacing}px
-## BENCHMARK COMPARISON
-{benchmark_comparison}
-## YOUR TASK
-1. **Recommend Best Fit**: Which design system should they align with?
-2. **Explain Why**: Consider similarity scores AND use-case fit
-3. **List Changes Needed**: What would they need to change to align?
-4. **Pros/Cons**: Benefits and drawbacks of alignment
-## OUTPUT FORMAT (JSON only)
-{{
-  "recommended_benchmark": "<system_key>",
-  "recommended_benchmark_name": "<full name>",
-  "reasoning": "Why this is the best fit for their use case",
-  "alignment_changes": [
-    {{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium"}},
-    {{"change": "Spacing grid", "from": "mixed", "to": "4px", "effort": "high"}}
-  ],
-  "pros_of_alignment": [
-    "Familiar patterns for users",
-    "Well-tested accessibility"
-  ],
-  "cons_of_alignment": [
-    "May lose brand uniqueness"
-  ],
-  "alternative_benchmarks": [
-    {{"name": "Material Design 3", "reason": "Good for Android-first products"}}
-  ]
-}}
-Return ONLY valid JSON."""
-    def __init__(self, hf_client):
-        self.hf_client = hf_client
-    async def analyze(
-        self,
-        user_ratio: float,
-        user_base: int,
-        user_spacing: int,
-        benchmark_comparisons: list,
-        log_callback: Callable = None,
-    ) -> BenchmarkAdvice:
-        """
-        Recommend best-fit design system.
-        Args:
-            user_ratio: User's detected type scale ratio
-            user_base: User's base font size
-            user_spacing: User's spacing grid base
-            benchmark_comparisons: List of BenchmarkComparison objects
-            log_callback: Progress logging function
-        Returns:
-            BenchmarkAdvice with recommendations
-        """
-        def log(msg: str):
-            if log_callback:
-                log_callback(msg)
-        log("")
-        log("   🏢 Benchmark Advisor (Qwen 72B)")
-        log("   └─ Evaluating benchmark fit for your use case...")
-        # Format comparison data
-        comparison_str = self._format_comparisons(benchmark_comparisons)
-        prompt = self.PROMPT_TEMPLATE.format(
-            user_ratio=user_ratio,
-            user_base=user_base,
-            user_spacing=user_spacing,
-            benchmark_comparison=comparison_str,
-        )
-        try:
-            start_time = datetime.now()
-            response = await self.hf_client.complete_async(
-                agent_name="benchmark_advisor",
-                system_prompt="You are a senior design system consultant specializing in design system architecture.",
-                user_message=prompt,
-                max_tokens=700,
-                json_mode=True,
-            )
-            duration = (datetime.now() - start_time).total_seconds()
-            result = self._parse_response(response)
-            log(f"   ────────────────────────────────────────────────")
-            log(f"   🏢 Benchmark Advisor: COMPLETE ({duration:.1f}s)")
-            log(f"   ├─ Recommended: {result.recommended_benchmark_name}")
-            log(f"   ├─ Changes Needed: {len(result.alignment_changes)}")
-            log(f"   └─ Key Change: {result.alignment_changes[0].get('change', 'N/A') if result.alignment_changes else 'None'}")
-            return result
-        except Exception as e:
-            log(f"   ├─ ⚠️ Benchmark Advisor failed: {str(e)[:120]}")
-            return BenchmarkAdvice()
-    def _format_comparisons(self, comparisons: list) -> str:
-        """Format benchmark comparisons for prompt."""
-        lines = []
-        for i, c in enumerate(comparisons[:5]):
-            b = c.benchmark
-            lines.append(f"""
-{i+1}. {b.icon} {b.name}
-   - Similarity Score: {c.similarity_score:.2f} (lower = better)
-   - Match: {c.overall_match_pct:.0f}%
-   - Type Ratio: {b.typography.get('scale_ratio', '?')} (diff: {c.type_ratio_diff:.3f})
-   - Base Size: {b.typography.get('base_size', '?')}px (diff: {c.base_size_diff})
-   - Spacing: {b.spacing.get('base', '?')}px (diff: {c.spacing_grid_diff})
-   - Best For: {', '.join(b.best_for)}""")
-        return "\n".join(lines)
-    def _parse_response(self, response: str) -> BenchmarkAdvice:
-        """Parse LLM response into BenchmarkAdvice."""
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                data = json.loads(json_match.group())
-                return BenchmarkAdvice(
-                    recommended_benchmark=data.get("recommended_benchmark", ""),
-                    recommended_benchmark_name=data.get("recommended_benchmark_name", ""),
-                    reasoning=data.get("reasoning", ""),
-                    alignment_changes=data.get("alignment_changes", []),
-                    pros_of_alignment=data.get("pros_of_alignment", []),
-                    cons_of_alignment=data.get("cons_of_alignment", []),
-                    alternative_benchmarks=data.get("alternative_benchmarks", []),
-                )
-        except Exception:
-            pass
-        return BenchmarkAdvice()
-# =============================================================================
-# BEST PRACTICES VALIDATOR AGENT
-# =============================================================================
-class BestPracticesValidatorAgent:
-    """
-    Validates against design system best practices and prioritizes fixes.
-    WHY LLM: Prioritization requires judgment about business impact,
-    not just checking boxes.
-    """
-    PROMPT_TEMPLATE = """You are a design system auditor. Validate these tokens against best practices.
-## RULE ENGINE ANALYSIS RESULTS
-### Typography
-- Detected Ratio: {type_ratio} ({type_consistent})
-- Base Size: {base_size}px
-- Recommendation: {type_recommendation}
-### Accessibility
-- Total Colors: {total_colors}
-- AA Pass: {aa_pass}
-- AA Fail: {aa_fail}
-- Failing Colors: {failing_colors}
-### Spacing
-- Detected Base: {spacing_base}px
-- Grid Aligned: {spacing_aligned}%
-- Recommendation: {spacing_recommendation}px
-### Color Statistics
-- Unique Colors: {unique_colors}
-- Duplicates: {duplicates}
-- Near-Duplicates: {near_duplicates}
-## BEST PRACTICES CHECKLIST
-1. Type scale uses standard ratio (1.2, 1.25, 1.333, 1.5, 1.618)
-2. Type scale is consistent (variance < 0.15)
-3. Base font size >= 16px (accessibility)
-4. Line height >= 1.5 for body text
-5. All interactive colors pass AA (4.5:1)
-6. Spacing uses consistent grid (4px or 8px)
-7. Limited color palette (< 20 unique semantic colors)
-8. No near-duplicate colors
-## YOUR TASK
-1. Score each practice: pass/warn/fail
-2. Calculate overall score (0-100)
-3. Identify TOP 3 priority fixes with impact assessment
-## OUTPUT FORMAT (JSON only)
-{{
-  "overall_score": <0-100>,
-  "checks": {{
-    "type_scale_standard": {{"status": "pass|warn|fail", "note": "..."}},
-    "type_scale_consistent": {{"status": "...", "note": "..."}},
-    "base_size_accessible": {{"status": "...", "note": "..."}},
-    "aa_compliance": {{"status": "...", "note": "..."}},
-    "spacing_grid": {{"status": "...", "note": "..."}},
-    "color_count": {{"status": "...", "note": "..."}}
-  }},
-  "priority_fixes": [
-    {{
-      "rank": 1,
-      "issue": "Brand primary fails AA",
-      "impact": "high|medium|low",
-      "effort": "low|medium|high",
-      "action": "Change #06b2c4 → #0891a8"
-    }}
-  ],
-  "passing_practices": ["Base font size", "..."],
-  "failing_practices": ["AA compliance", "..."]
-}}
-Return ONLY valid JSON."""
-    def __init__(self, hf_client):
-        self.hf_client = hf_client
-    async def analyze(
-        self,
-        rule_engine_results: Any,
-        log_callback: Callable = None,
-    ) -> BestPracticesResult:
-        """
-        Validate against best practices.
-        Args:
-            rule_engine_results: Results from rule engine
-            log_callback: Progress logging function
-        Returns:
-            BestPracticesResult with validation
-        """
-        def log(msg: str):
-            if log_callback:
-                log_callback(msg)
-        log("")
-        log("   ✅ Best Practices Validator (Qwen 72B)")
-        log("   └─ Checking against design system standards...")
-        # Extract data from rule engine
-        typo = rule_engine_results.typography
-        spacing = rule_engine_results.spacing
-        color_stats = rule_engine_results.color_stats
-        accessibility = rule_engine_results.accessibility
-        failures = [a for a in accessibility if not a.passes_aa_normal]
-        failing_colors_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:5]])
-        prompt = self.PROMPT_TEMPLATE.format(
-            type_ratio=f"{typo.detected_ratio:.3f}",
-            type_consistent="consistent" if typo.is_consistent else f"inconsistent, variance={typo.variance:.2f}",
-            base_size=typo.sizes_px[0] if typo.sizes_px else 16,
-            type_recommendation=f"{typo.recommendation} ({typo.recommendation_name})",
-            total_colors=len(accessibility),
-            aa_pass=len(accessibility) - len(failures),
-            aa_fail=len(failures),
-            failing_colors=failing_colors_str or "None",
-            spacing_base=spacing.detected_base,
-            spacing_aligned=f"{spacing.alignment_percentage:.0f}",
-            spacing_recommendation=spacing.recommendation,
-            unique_colors=color_stats.unique_count,
-            duplicates=color_stats.duplicate_count,
-            near_duplicates=len(color_stats.near_duplicates),
-        )
-        try:
-            start_time = datetime.now()
-            response = await self.hf_client.complete_async(
-                agent_name="best_practices_validator",
-                system_prompt="You are a design system auditor specializing in best practices validation.",
-                user_message=prompt,
-                max_tokens=800,
-                json_mode=True,
-            )
-            duration = (datetime.now() - start_time).total_seconds()
-            result = self._parse_response(response)
-            log(f"   ────────────────────────────────────────────────")
-            log(f"   ✅ Best Practices: COMPLETE ({duration:.1f}s)")
-            log(f"   ├─ Overall Score: {result.overall_score}/100")
-            log(f"   ├─ Passing: {len(result.passing_practices)} | Failing: {len(result.failing_practices)}")
-            if result.priority_fixes:
-                log(f"   └─ Top Fix: {result.priority_fixes[0].get('issue', 'N/A')}")
-            return result
-        except Exception as e:
-            log(f"   ├─ ⚠️ Best Practices Validator failed: {str(e)[:120]}")
-            return BestPracticesResult()
-    def _parse_response(self, response: str) -> BestPracticesResult:
-        """Parse LLM response into BestPracticesResult."""
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                data = json.loads(json_match.group())
-                return BestPracticesResult(
-                    overall_score=data.get("overall_score", 50),
-                    checks=data.get("checks", {}),
-                    priority_fixes=data.get("priority_fixes", []),
-                    passing_practices=data.get("passing_practices", []),
-                    failing_practices=data.get("failing_practices", []),
-                )
-        except Exception:
-            pass
-        return BestPracticesResult()
-# =============================================================================
-# HEAD SYNTHESIZER AGENT
-# =============================================================================
-class HeadSynthesizerAgent:
-    """
-    Combines all agent outputs into final recommendations.
-    This is the final step that produces actionable output for the user.
-    """
-    PROMPT_TEMPLATE = """You are a senior design system architect. Synthesize these analysis results into final recommendations.
-## RULE ENGINE FACTS
-- Type Scale: {type_ratio} ({type_status})
-- Base Size: {base_size}px
-- AA Failures: {aa_failures}
-- Spacing Grid: {spacing_status}
-- Unique Colors: {unique_colors}
-- Consistency Score: {consistency_score}/100
-## BENCHMARK COMPARISON
-Closest Match: {closest_benchmark}
-Match Percentage: {match_pct}%
-Recommended Changes: {benchmark_changes}
-## BRAND IDENTIFICATION
-- Brand Primary: {brand_primary}
-- Brand Secondary: {brand_secondary}
-- Palette Cohesion: {cohesion_score}/10
-## BEST PRACTICES VALIDATION
-Overall Score: {best_practices_score}/100
-Priority Fixes: {priority_fixes}
-## ACCESSIBILITY FIXES NEEDED
-{accessibility_fixes}
-## YOUR TASK
-Synthesize ALL the above into:
-1. Executive Summary (2-3 sentences)
-2. Overall Scores
-3. Top 3 Priority Actions (with effort estimates)
-4. Specific Color Recommendations (with accept/reject defaults)
-5. Type Scale Recommendation
-6. Spacing Recommendation
-## OUTPUT FORMAT (JSON only)
-{{
-  "executive_summary": "Your design system scores X/100. Key issues are Y. Priority action is Z.",
-  "scores": {{
-    "overall": <0-100>,
-    "accessibility": <0-100>,
-    "consistency": <0-100>,
-    "organization": <0-100>
-  }},
-  "benchmark_fit": {{
-    "closest": "<name>",
-    "similarity": "<X%>",
-    "recommendation": "Align type scale to 1.25"
-  }},
-  "brand_analysis": {{
-    "primary": "#hex",
-    "secondary": "#hex",
-    "cohesion": <1-10>
-  }},
-  "top_3_actions": [
-    {{"action": "Fix brand color AA", "impact": "high", "effort": "5 min", "details": "Change #X to #Y"}}
-  ],
-  "color_recommendations": [
-    {{"role": "brand.primary", "current": "#06b2c4", "suggested": "#0891a8", "reason": "AA compliance", "accept": true}}
-  ],
-  "type_scale_recommendation": {{
-    "current_ratio": 1.18,
-    "recommended_ratio": 1.25,
-    "reason": "Align with industry standard"
-  }},
-  "spacing_recommendation": {{
-    "current": "mixed",
-    "recommended": "8px",
-    "reason": "Consistent grid improves maintainability"
-  }}
-}}
-Return ONLY valid JSON."""
-    def __init__(self, hf_client):
-        self.hf_client = hf_client
-    async def synthesize(
-        self,
-        rule_engine_results: Any,
-        benchmark_comparisons: list,
-        brand_identification: BrandIdentification,
-        benchmark_advice: BenchmarkAdvice,
-        best_practices: BestPracticesResult,
-        log_callback: Callable = None,
-    ) -> HeadSynthesis:
-        """
-        Synthesize all results into final recommendations.
-        """
-        def log(msg: str):
-            if log_callback:
-                log_callback(msg)
-        log("")
-        log("═" * 60)
-        log("🧠 LAYER 4: HEAD SYNTHESIZER")
-        log("═" * 60)
-        log("")
-        log("   Combining: Rule Engine + Benchmarks + Brand + Best Practices...")
-        # Extract data
-        typo = rule_engine_results.typography
-        spacing = rule_engine_results.spacing
-        color_stats = rule_engine_results.color_stats
-        accessibility = rule_engine_results.accessibility
-        failures = [a for a in accessibility if not a.passes_aa_normal]
-        aa_fixes_str = "\n".join([
-            f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
-            for a in failures[:5] if a.suggested_fix
-        ])
-        closest = benchmark_comparisons[0] if benchmark_comparisons else None
-        prompt = self.PROMPT_TEMPLATE.format(
-            type_ratio=f"{typo.detected_ratio:.3f}",
-            type_status="consistent" if typo.is_consistent else "inconsistent",
-            base_size=typo.sizes_px[0] if typo.sizes_px else 16,
-            aa_failures=len(failures),
-            spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
-            unique_colors=color_stats.unique_count,
-            consistency_score=rule_engine_results.consistency_score,
-            closest_benchmark=closest.benchmark.name if closest else "Unknown",
-            match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
-            benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:3]]),
-            brand_primary=brand_identification.brand_primary.get("color", "Unknown"),
-            brand_secondary=brand_identification.brand_secondary.get("color", "Unknown"),
-            cohesion_score=brand_identification.cohesion_score,
-            best_practices_score=best_practices.overall_score,
-            priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:3]]),
-            accessibility_fixes=aa_fixes_str or "None needed",
-        )
-        try:
-            start_time = datetime.now()
-            response = await self.hf_client.complete_async(
-                agent_name="head_synthesizer",
-                system_prompt="You are a senior design system architect specializing in synthesis and recommendations.",
-                user_message=prompt,
-                max_tokens=1000,
-                json_mode=True,
-            )
-            duration = (datetime.now() - start_time).total_seconds()
-            result = self._parse_response(response)
-            log("")
-            log(f"   ✅ HEAD Synthesizer: COMPLETE ({duration:.1f}s)")
-            log("")
-            return result
-        except Exception as e:
-            log(f"   ├─ ⚠️ Head Synthesizer failed: {str(e)[:120]}")
-            return HeadSynthesis()
-    def _parse_response(self, response: str) -> HeadSynthesis:
-        """Parse LLM response into HeadSynthesis."""
-        try:
-            json_match = re.search(r'\{[\s\S]*\}', response)
-            if json_match:
-                data = json.loads(json_match.group())
-                return HeadSynthesis(
-                    executive_summary=data.get("executive_summary", ""),
-                    scores=data.get("scores", {}),
-                    benchmark_fit=data.get("benchmark_fit", {}),
-                    brand_analysis=data.get("brand_analysis", {}),
-                    top_3_actions=data.get("top_3_actions", []),
-                    color_recommendations=data.get("color_recommendations", []),
-                    type_scale_recommendation=data.get("type_scale_recommendation", {}),
-                    spacing_recommendation=data.get("spacing_recommendation", {}),
-                )
-        except Exception:
-            pass
-        return HeadSynthesis()