riazmo's picture
Upload llm_agents.py
28d32bc verified
raw
history blame
33.2 kB
"""
Stage 2 LLM Agents — Specialized Analysis Tasks
=================================================
These agents handle tasks that REQUIRE LLM reasoning:
- Brand Identifier: Identify brand colors from usage context
- Benchmark Advisor: Recommend best-fit design system
- Best Practices Validator: Prioritize fixes by business impact
- HEAD Synthesizer: Combine all outputs into final recommendations
Each agent has a focused prompt for its specific task.
"""
import json
import re
from dataclasses import dataclass, field
from typing import Optional, Callable, Any
from datetime import datetime
# =============================================================================
# DATA CLASSES
# =============================================================================
@dataclass
class BrandIdentification:
"""Results from Brand Identifier agent."""
brand_primary: dict = field(default_factory=dict)
# {color, confidence, reasoning, usage_count}
brand_secondary: dict = field(default_factory=dict)
brand_accent: dict = field(default_factory=dict)
palette_strategy: str = "" # complementary, analogous, triadic, monochromatic, random
cohesion_score: int = 5 # 1-10
cohesion_notes: str = ""
semantic_names: dict = field(default_factory=dict)
# {hex_color: suggested_name}
def to_dict(self) -> dict:
return {
"brand_primary": self.brand_primary,
"brand_secondary": self.brand_secondary,
"brand_accent": self.brand_accent,
"palette_strategy": self.palette_strategy,
"cohesion_score": self.cohesion_score,
"cohesion_notes": self.cohesion_notes,
"semantic_names": self.semantic_names,
}
@dataclass
class BenchmarkAdvice:
"""Results from Benchmark Advisor agent."""
recommended_benchmark: str = ""
recommended_benchmark_name: str = ""
reasoning: str = ""
alignment_changes: list = field(default_factory=list)
# [{change, from, to, effort}]
pros_of_alignment: list = field(default_factory=list)
cons_of_alignment: list = field(default_factory=list)
alternative_benchmarks: list = field(default_factory=list)
# [{name, reason}]
def to_dict(self) -> dict:
return {
"recommended_benchmark": self.recommended_benchmark,
"recommended_benchmark_name": self.recommended_benchmark_name,
"reasoning": self.reasoning,
"alignment_changes": self.alignment_changes,
"pros": self.pros_of_alignment,
"cons": self.cons_of_alignment,
"alternatives": self.alternative_benchmarks,
}
@dataclass
class BestPracticesResult:
"""Results from Best Practices Validator agent."""
overall_score: int = 50 # 0-100
checks: dict = field(default_factory=dict)
# {check_name: {status: pass/warn/fail, note: str}}
priority_fixes: list = field(default_factory=list)
# [{rank, issue, impact, effort, action}]
passing_practices: list = field(default_factory=list)
failing_practices: list = field(default_factory=list)
def to_dict(self) -> dict:
return {
"overall_score": self.overall_score,
"checks": self.checks,
"priority_fixes": self.priority_fixes,
"passing": self.passing_practices,
"failing": self.failing_practices,
}
@dataclass
class HeadSynthesis:
"""Final synthesized output from HEAD agent."""
executive_summary: str = ""
scores: dict = field(default_factory=dict)
# {overall, accessibility, consistency, organization}
benchmark_fit: dict = field(default_factory=dict)
# {closest, similarity, recommendation}
brand_analysis: dict = field(default_factory=dict)
# {primary, secondary, cohesion}
top_3_actions: list = field(default_factory=list)
# [{action, impact, effort, details}]
color_recommendations: list = field(default_factory=list)
# [{role, current, suggested, reason, accept}]
type_scale_recommendation: dict = field(default_factory=dict)
spacing_recommendation: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"executive_summary": self.executive_summary,
"scores": self.scores,
"benchmark_fit": self.benchmark_fit,
"brand_analysis": self.brand_analysis,
"top_3_actions": self.top_3_actions,
"color_recommendations": self.color_recommendations,
"type_scale_recommendation": self.type_scale_recommendation,
"spacing_recommendation": self.spacing_recommendation,
}
# =============================================================================
# BRAND IDENTIFIER AGENT
# =============================================================================
class BrandIdentifierAgent:
"""
Identifies brand colors from usage context.
WHY LLM: Requires understanding context (33 buttons = likely brand primary),
not just color math.
"""
PROMPT_TEMPLATE = """You are a senior design system analyst. Identify the brand colors from this color usage data.
## COLOR DATA WITH USAGE CONTEXT
{color_data}
## SEMANTIC ANALYSIS (from CSS properties)
{semantic_analysis}
## YOUR TASK
1. **Identify Brand Colors**:
- Brand Primary: The main action/CTA color (highest visibility)
- Brand Secondary: Supporting brand color
- Brand Accent: Highlight color for emphasis
2. **Assess Palette Strategy**:
- Is it complementary, analogous, triadic, monochromatic, or random?
3. **Rate Cohesion** (1-10):
- Do the colors work together?
- Is there a clear color story?
4. **Suggest Semantic Names** for top 10 most-used colors
## OUTPUT FORMAT (JSON only)
{{
"brand_primary": {{
"color": "#hex",
"confidence": "high|medium|low",
"reasoning": "Why this is brand primary",
"usage_count": <number>
}},
"brand_secondary": {{
"color": "#hex",
"confidence": "high|medium|low",
"reasoning": "..."
}},
"brand_accent": {{
"color": "#hex or null",
"confidence": "...",
"reasoning": "..."
}},
"palette_strategy": "complementary|analogous|triadic|monochromatic|random",
"cohesion_score": <1-10>,
"cohesion_notes": "Assessment of how well colors work together",
"semantic_names": {{
"#hex1": "brand.primary",
"#hex2": "text.primary",
"#hex3": "background.primary"
}}
}}
Return ONLY valid JSON."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
color_tokens: dict,
semantic_analysis: dict,
log_callback: Callable = None,
) -> BrandIdentification:
"""
Identify brand colors from usage context.
Args:
color_tokens: Dict of color tokens with usage data
semantic_analysis: Semantic categorization from Stage 1
log_callback: Progress logging function
Returns:
BrandIdentification with identified colors
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log(" 🎨 Brand Identifier (Llama 70B)")
log(" └─ Analyzing color context and usage patterns...")
# Format color data
color_data = self._format_color_data(color_tokens)
semantic_str = self._format_semantic_analysis(semantic_analysis)
prompt = self.PROMPT_TEMPLATE.format(
color_data=color_data,
semantic_analysis=semantic_str,
)
try:
start_time = datetime.now()
# Use the correct method signature
response = await self.hf_client.complete_async(
agent_name="brand_identifier",
system_prompt="You are a senior design system analyst specializing in brand color identification.",
user_message=prompt,
max_tokens=800,
json_mode=True,
)
duration = (datetime.now() - start_time).total_seconds()
# Parse response
result = self._parse_response(response)
log(f" ────────────────────────────────────────────────")
log(f" 🎨 Brand Identifier: COMPLETE ({duration:.1f}s)")
log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')} confidence)")
log(f" ├─ Brand Secondary: {result.brand_secondary.get('color', '?')}")
log(f" ├─ Palette Strategy: {result.palette_strategy}")
log(f" └─ Cohesion Score: {result.cohesion_score}/10")
return result
except Exception as e:
error_msg = str(e)
# Always log full error for diagnosis
log(f" ⚠️ Brand Identifier failed: {error_msg[:120]}")
if "gated" in error_msg.lower() or "access" in error_msg.lower():
log(f" └─ Model may require license acceptance at huggingface.co")
elif "Rate limit" in error_msg or "429" in error_msg:
log(f" └─ HF free tier rate limit — wait or upgrade to Pro")
return BrandIdentification()
def _format_color_data(self, color_tokens: dict) -> str:
"""Format color tokens for prompt."""
lines = []
for name, token in list(color_tokens.items())[:30]:
if isinstance(token, dict):
hex_val = token.get("value", token.get("hex", ""))
usage = token.get("usage_count", token.get("count", 1))
context = token.get("context", token.get("css_property", ""))
else:
hex_val = getattr(token, "value", "")
usage = getattr(token, "usage_count", 1)
context = getattr(token, "context", "")
if hex_val:
lines.append(f"- {hex_val}: used {usage}x, context: {context or 'unknown'}")
return "\n".join(lines) if lines else "No color data available"
def _format_semantic_analysis(self, semantic: dict) -> str:
"""Format semantic analysis for prompt."""
if not semantic:
return "No semantic analysis available"
lines = []
try:
for category, value in semantic.items():
if not value:
continue
if isinstance(value, list):
# List of colors
color_list = []
for c in value[:5]:
if isinstance(c, dict):
color_list.append(c.get("hex", c.get("value", str(c))))
else:
color_list.append(str(c))
lines.append(f"- {category}: {', '.join(color_list)}")
elif isinstance(value, dict):
# Could be a nested dict of sub-roles → color dicts
# e.g. {"primary": {"hex": "#007bff", ...}, "secondary": {...}}
# or a flat color dict {"hex": "#...", "confidence": "..."}
# or a summary dict {"total_colors_analyzed": 50, ...}
if "hex" in value:
# Flat color dict
lines.append(f"- {category}: {value['hex']}")
else:
# Nested dict — iterate sub-roles
sub_items = []
for sub_role, sub_val in list(value.items())[:5]:
if isinstance(sub_val, dict) and "hex" in sub_val:
sub_items.append(f"{sub_role}={sub_val['hex']}")
elif isinstance(sub_val, (str, int, float, bool)):
sub_items.append(f"{sub_role}={sub_val}")
if sub_items:
lines.append(f"- {category}: {', '.join(sub_items)}")
else:
lines.append(f"- {category}: {value}")
except Exception as e:
return f"Error formatting semantic analysis: {str(e)[:50]}"
return "\n".join(lines) if lines else "No semantic analysis available"
def _parse_response(self, response: str) -> BrandIdentification:
"""Parse LLM response into BrandIdentification."""
try:
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
data = json.loads(json_match.group())
return BrandIdentification(
brand_primary=data.get("brand_primary", {}),
brand_secondary=data.get("brand_secondary", {}),
brand_accent=data.get("brand_accent", {}),
palette_strategy=data.get("palette_strategy", "unknown"),
cohesion_score=data.get("cohesion_score", 5),
cohesion_notes=data.get("cohesion_notes", ""),
semantic_names=data.get("semantic_names", {}),
)
except Exception:
pass
return BrandIdentification()
# =============================================================================
# BENCHMARK ADVISOR AGENT
# =============================================================================
class BenchmarkAdvisorAgent:
"""
Recommends best-fit design system based on comparison data.
WHY LLM: Requires reasoning about trade-offs and use-case fit,
not just similarity scores.
"""
PROMPT_TEMPLATE = """You are a senior design system consultant. Recommend the best design system alignment.
## USER'S CURRENT VALUES
- Type Scale Ratio: {user_ratio}
- Base Font Size: {user_base}px
- Spacing Grid: {user_spacing}px
## BENCHMARK COMPARISON
{benchmark_comparison}
## YOUR TASK
1. **Recommend Best Fit**: Which design system should they align with?
2. **Explain Why**: Consider similarity scores AND use-case fit
3. **List Changes Needed**: What would they need to change to align?
4. **Pros/Cons**: Benefits and drawbacks of alignment
## OUTPUT FORMAT (JSON only)
{{
"recommended_benchmark": "<system_key>",
"recommended_benchmark_name": "<full name>",
"reasoning": "Why this is the best fit for their use case",
"alignment_changes": [
{{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium"}},
{{"change": "Spacing grid", "from": "mixed", "to": "4px", "effort": "high"}}
],
"pros_of_alignment": [
"Familiar patterns for users",
"Well-tested accessibility"
],
"cons_of_alignment": [
"May lose brand uniqueness"
],
"alternative_benchmarks": [
{{"name": "Material Design 3", "reason": "Good for Android-first products"}}
]
}}
Return ONLY valid JSON."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
user_ratio: float,
user_base: int,
user_spacing: int,
benchmark_comparisons: list,
log_callback: Callable = None,
) -> BenchmarkAdvice:
"""
Recommend best-fit design system.
Args:
user_ratio: User's detected type scale ratio
user_base: User's base font size
user_spacing: User's spacing grid base
benchmark_comparisons: List of BenchmarkComparison objects
log_callback: Progress logging function
Returns:
BenchmarkAdvice with recommendations
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log("")
log(" 🏢 Benchmark Advisor (Qwen 72B)")
log(" └─ Evaluating benchmark fit for your use case...")
# Format comparison data
comparison_str = self._format_comparisons(benchmark_comparisons)
prompt = self.PROMPT_TEMPLATE.format(
user_ratio=user_ratio,
user_base=user_base,
user_spacing=user_spacing,
benchmark_comparison=comparison_str,
)
try:
start_time = datetime.now()
response = await self.hf_client.complete_async(
agent_name="benchmark_advisor",
system_prompt="You are a senior design system consultant specializing in design system architecture.",
user_message=prompt,
max_tokens=700,
json_mode=True,
)
duration = (datetime.now() - start_time).total_seconds()
result = self._parse_response(response)
log(f" ────────────────────────────────────────────────")
log(f" 🏢 Benchmark Advisor: COMPLETE ({duration:.1f}s)")
log(f" ├─ Recommended: {result.recommended_benchmark_name}")
log(f" ├─ Changes Needed: {len(result.alignment_changes)}")
log(f" └─ Key Change: {result.alignment_changes[0].get('change', 'N/A') if result.alignment_changes else 'None'}")
return result
except Exception as e:
log(f" ├─ ⚠️ Benchmark Advisor failed: {str(e)[:120]}")
return BenchmarkAdvice()
def _format_comparisons(self, comparisons: list) -> str:
"""Format benchmark comparisons for prompt."""
lines = []
for i, c in enumerate(comparisons[:5]):
b = c.benchmark
lines.append(f"""
{i+1}. {b.icon} {b.name}
- Similarity Score: {c.similarity_score:.2f} (lower = better)
- Match: {c.overall_match_pct:.0f}%
- Type Ratio: {b.typography.get('scale_ratio', '?')} (diff: {c.type_ratio_diff:.3f})
- Base Size: {b.typography.get('base_size', '?')}px (diff: {c.base_size_diff})
- Spacing: {b.spacing.get('base', '?')}px (diff: {c.spacing_grid_diff})
- Best For: {', '.join(b.best_for)}""")
return "\n".join(lines)
def _parse_response(self, response: str) -> BenchmarkAdvice:
"""Parse LLM response into BenchmarkAdvice."""
try:
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
data = json.loads(json_match.group())
return BenchmarkAdvice(
recommended_benchmark=data.get("recommended_benchmark", ""),
recommended_benchmark_name=data.get("recommended_benchmark_name", ""),
reasoning=data.get("reasoning", ""),
alignment_changes=data.get("alignment_changes", []),
pros_of_alignment=data.get("pros_of_alignment", []),
cons_of_alignment=data.get("cons_of_alignment", []),
alternative_benchmarks=data.get("alternative_benchmarks", []),
)
except Exception:
pass
return BenchmarkAdvice()
# =============================================================================
# BEST PRACTICES VALIDATOR AGENT
# =============================================================================
class BestPracticesValidatorAgent:
"""
Validates against design system best practices and prioritizes fixes.
WHY LLM: Prioritization requires judgment about business impact,
not just checking boxes.
"""
PROMPT_TEMPLATE = """You are a design system auditor. Validate these tokens against best practices.
## RULE ENGINE ANALYSIS RESULTS
### Typography
- Detected Ratio: {type_ratio} ({type_consistent})
- Base Size: {base_size}px
- Recommendation: {type_recommendation}
### Accessibility
- Total Colors: {total_colors}
- AA Pass: {aa_pass}
- AA Fail: {aa_fail}
- Failing Colors: {failing_colors}
### Spacing
- Detected Base: {spacing_base}px
- Grid Aligned: {spacing_aligned}%
- Recommendation: {spacing_recommendation}px
### Color Statistics
- Unique Colors: {unique_colors}
- Duplicates: {duplicates}
- Near-Duplicates: {near_duplicates}
## BEST PRACTICES CHECKLIST
1. Type scale uses standard ratio (1.2, 1.25, 1.333, 1.5, 1.618)
2. Type scale is consistent (variance < 0.15)
3. Base font size >= 16px (accessibility)
4. Line height >= 1.5 for body text
5. All interactive colors pass AA (4.5:1)
6. Spacing uses consistent grid (4px or 8px)
7. Limited color palette (< 20 unique semantic colors)
8. No near-duplicate colors
## YOUR TASK
1. Score each practice: pass/warn/fail
2. Calculate overall score (0-100)
3. Identify TOP 3 priority fixes with impact assessment
## OUTPUT FORMAT (JSON only)
{{
"overall_score": <0-100>,
"checks": {{
"type_scale_standard": {{"status": "pass|warn|fail", "note": "..."}},
"type_scale_consistent": {{"status": "...", "note": "..."}},
"base_size_accessible": {{"status": "...", "note": "..."}},
"aa_compliance": {{"status": "...", "note": "..."}},
"spacing_grid": {{"status": "...", "note": "..."}},
"color_count": {{"status": "...", "note": "..."}}
}},
"priority_fixes": [
{{
"rank": 1,
"issue": "Brand primary fails AA",
"impact": "high|medium|low",
"effort": "low|medium|high",
"action": "Change #06b2c4 → #0891a8"
}}
],
"passing_practices": ["Base font size", "..."],
"failing_practices": ["AA compliance", "..."]
}}
Return ONLY valid JSON."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
rule_engine_results: Any,
log_callback: Callable = None,
) -> BestPracticesResult:
"""
Validate against best practices.
Args:
rule_engine_results: Results from rule engine
log_callback: Progress logging function
Returns:
BestPracticesResult with validation
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log("")
log(" ✅ Best Practices Validator (Qwen 72B)")
log(" └─ Checking against design system standards...")
# Extract data from rule engine
typo = rule_engine_results.typography
spacing = rule_engine_results.spacing
color_stats = rule_engine_results.color_stats
accessibility = rule_engine_results.accessibility
failures = [a for a in accessibility if not a.passes_aa_normal]
failing_colors_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:5]])
prompt = self.PROMPT_TEMPLATE.format(
type_ratio=f"{typo.detected_ratio:.3f}",
type_consistent="consistent" if typo.is_consistent else f"inconsistent, variance={typo.variance:.2f}",
base_size=typo.sizes_px[0] if typo.sizes_px else 16,
type_recommendation=f"{typo.recommendation} ({typo.recommendation_name})",
total_colors=len(accessibility),
aa_pass=len(accessibility) - len(failures),
aa_fail=len(failures),
failing_colors=failing_colors_str or "None",
spacing_base=spacing.detected_base,
spacing_aligned=f"{spacing.alignment_percentage:.0f}",
spacing_recommendation=spacing.recommendation,
unique_colors=color_stats.unique_count,
duplicates=color_stats.duplicate_count,
near_duplicates=len(color_stats.near_duplicates),
)
try:
start_time = datetime.now()
response = await self.hf_client.complete_async(
agent_name="best_practices_validator",
system_prompt="You are a design system auditor specializing in best practices validation.",
user_message=prompt,
max_tokens=800,
json_mode=True,
)
duration = (datetime.now() - start_time).total_seconds()
result = self._parse_response(response)
log(f" ────────────────────────────────────────────────")
log(f" ✅ Best Practices: COMPLETE ({duration:.1f}s)")
log(f" ├─ Overall Score: {result.overall_score}/100")
log(f" ├─ Passing: {len(result.passing_practices)} | Failing: {len(result.failing_practices)}")
if result.priority_fixes:
log(f" └─ Top Fix: {result.priority_fixes[0].get('issue', 'N/A')}")
return result
except Exception as e:
log(f" ├─ ⚠️ Best Practices Validator failed: {str(e)[:120]}")
return BestPracticesResult()
def _parse_response(self, response: str) -> BestPracticesResult:
"""Parse LLM response into BestPracticesResult."""
try:
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
data = json.loads(json_match.group())
return BestPracticesResult(
overall_score=data.get("overall_score", 50),
checks=data.get("checks", {}),
priority_fixes=data.get("priority_fixes", []),
passing_practices=data.get("passing_practices", []),
failing_practices=data.get("failing_practices", []),
)
except Exception:
pass
return BestPracticesResult()
# =============================================================================
# HEAD SYNTHESIZER AGENT
# =============================================================================
class HeadSynthesizerAgent:
"""
Combines all agent outputs into final recommendations.
This is the final step that produces actionable output for the user.
"""
PROMPT_TEMPLATE = """You are a senior design system architect. Synthesize these analysis results into final recommendations.
## RULE ENGINE FACTS
- Type Scale: {type_ratio} ({type_status})
- Base Size: {base_size}px
- AA Failures: {aa_failures}
- Spacing Grid: {spacing_status}
- Unique Colors: {unique_colors}
- Consistency Score: {consistency_score}/100
## BENCHMARK COMPARISON
Closest Match: {closest_benchmark}
Match Percentage: {match_pct}%
Recommended Changes: {benchmark_changes}
## BRAND IDENTIFICATION
- Brand Primary: {brand_primary}
- Brand Secondary: {brand_secondary}
- Palette Cohesion: {cohesion_score}/10
## BEST PRACTICES VALIDATION
Overall Score: {best_practices_score}/100
Priority Fixes: {priority_fixes}
## ACCESSIBILITY FIXES NEEDED
{accessibility_fixes}
## YOUR TASK
Synthesize ALL the above into:
1. Executive Summary (2-3 sentences)
2. Overall Scores
3. Top 3 Priority Actions (with effort estimates)
4. Specific Color Recommendations (with accept/reject defaults)
5. Type Scale Recommendation
6. Spacing Recommendation
## OUTPUT FORMAT (JSON only)
{{
"executive_summary": "Your design system scores X/100. Key issues are Y. Priority action is Z.",
"scores": {{
"overall": <0-100>,
"accessibility": <0-100>,
"consistency": <0-100>,
"organization": <0-100>
}},
"benchmark_fit": {{
"closest": "<name>",
"similarity": "<X%>",
"recommendation": "Align type scale to 1.25"
}},
"brand_analysis": {{
"primary": "#hex",
"secondary": "#hex",
"cohesion": <1-10>
}},
"top_3_actions": [
{{"action": "Fix brand color AA", "impact": "high", "effort": "5 min", "details": "Change #X to #Y"}}
],
"color_recommendations": [
{{"role": "brand.primary", "current": "#06b2c4", "suggested": "#0891a8", "reason": "AA compliance", "accept": true}}
],
"type_scale_recommendation": {{
"current_ratio": 1.18,
"recommended_ratio": 1.25,
"reason": "Align with industry standard"
}},
"spacing_recommendation": {{
"current": "mixed",
"recommended": "8px",
"reason": "Consistent grid improves maintainability"
}}
}}
Return ONLY valid JSON."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def synthesize(
self,
rule_engine_results: Any,
benchmark_comparisons: list,
brand_identification: BrandIdentification,
benchmark_advice: BenchmarkAdvice,
best_practices: BestPracticesResult,
log_callback: Callable = None,
) -> HeadSynthesis:
"""
Synthesize all results into final recommendations.
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log("")
log("═" * 60)
log("🧠 LAYER 4: HEAD SYNTHESIZER")
log("═" * 60)
log("")
log(" Combining: Rule Engine + Benchmarks + Brand + Best Practices...")
# Extract data
typo = rule_engine_results.typography
spacing = rule_engine_results.spacing
color_stats = rule_engine_results.color_stats
accessibility = rule_engine_results.accessibility
failures = [a for a in accessibility if not a.passes_aa_normal]
aa_fixes_str = "\n".join([
f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
for a in failures[:5] if a.suggested_fix
])
closest = benchmark_comparisons[0] if benchmark_comparisons else None
prompt = self.PROMPT_TEMPLATE.format(
type_ratio=f"{typo.detected_ratio:.3f}",
type_status="consistent" if typo.is_consistent else "inconsistent",
base_size=typo.sizes_px[0] if typo.sizes_px else 16,
aa_failures=len(failures),
spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
unique_colors=color_stats.unique_count,
consistency_score=rule_engine_results.consistency_score,
closest_benchmark=closest.benchmark.name if closest else "Unknown",
match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:3]]),
brand_primary=brand_identification.brand_primary.get("color", "Unknown"),
brand_secondary=brand_identification.brand_secondary.get("color", "Unknown"),
cohesion_score=brand_identification.cohesion_score,
best_practices_score=best_practices.overall_score,
priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:3]]),
accessibility_fixes=aa_fixes_str or "None needed",
)
try:
start_time = datetime.now()
response = await self.hf_client.complete_async(
agent_name="head_synthesizer",
system_prompt="You are a senior design system architect specializing in synthesis and recommendations.",
user_message=prompt,
max_tokens=1000,
json_mode=True,
)
duration = (datetime.now() - start_time).total_seconds()
result = self._parse_response(response)
log("")
log(f" ✅ HEAD Synthesizer: COMPLETE ({duration:.1f}s)")
log("")
return result
except Exception as e:
log(f" ├─ ⚠️ Head Synthesizer failed: {str(e)[:120]}")
return HeadSynthesis()
def _parse_response(self, response: str) -> HeadSynthesis:
"""Parse LLM response into HeadSynthesis."""
try:
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
data = json.loads(json_match.group())
return HeadSynthesis(
executive_summary=data.get("executive_summary", ""),
scores=data.get("scores", {}),
benchmark_fit=data.get("benchmark_fit", {}),
brand_analysis=data.get("brand_analysis", {}),
top_3_actions=data.get("top_3_actions", []),
color_recommendations=data.get("color_recommendations", []),
type_scale_recommendation=data.get("type_scale_recommendation", {}),
spacing_recommendation=data.get("spacing_recommendation", {}),
)
except Exception:
pass
return HeadSynthesis()