Spaces:
Runtime error
Runtime error
| """ | |
| Stage 2 LLM Agents — v3 Agentic Architecture | |
| ============================================== | |
| Each agent: | |
| - Researches ALL token types (colors, typography, spacing, radius, shadows) | |
| - Uses ReAct framework: THINK → ACT → OBSERVE → VERIFY | |
| - Returns visible reasoning chain for the UI | |
| - Has a Python-based critic for validation | |
| Agents run IN PARALLEL (asyncio.gather), then NEXUS compiles. | |
| Agent Responsibilities: | |
| - AURORA: Brand identity + semantic naming for ALL colors + notes on all token types | |
| - SENTINEL: Best practices audit across ALL token types, grounded in rule-engine data | |
| - ATLAS: Benchmark comparison for ALL token types | |
| - NEXUS (HEAD): Tree-of-Thought synthesis, compiles all agent outputs | |
| """ | |
| import json | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional, Callable, Any | |
| from datetime import datetime | |
| # ============================================================================= | |
| # DATA CLASSES — v3: includes reasoning_trace + naming_map | |
| # ============================================================================= | |
| class BrandIdentification: | |
| """Results from AURORA — Brand Identifier (ReAct).""" | |
| brand_primary: dict = field(default_factory=dict) | |
| brand_secondary: dict = field(default_factory=dict) | |
| brand_accent: dict = field(default_factory=dict) | |
| palette_strategy: str = "" | |
| cohesion_score: int = 5 | |
| cohesion_notes: str = "" | |
| # v3: naming_map covers ALL colors, not just top 10 | |
| naming_map: dict = field(default_factory=dict) | |
| # {hex: "color.brand.primary"} or {hex: "color.blue.500"} | |
| semantic_names: dict = field(default_factory=dict) # backward compat | |
| self_evaluation: dict = field(default_factory=dict) | |
| # v3: reasoning trace visible to user | |
| reasoning_trace: list = field(default_factory=list) | |
| validation_passed: bool = False | |
| retry_count: int = 0 | |
| # v3: per-token-type observations | |
| typography_notes: str = "" | |
| spacing_notes: str = "" | |
| radius_notes: str = "" | |
| shadow_notes: str = "" | |
| def to_dict(self) -> dict: | |
| return { | |
| "brand_primary": self.brand_primary, | |
| "brand_secondary": self.brand_secondary, | |
| "brand_accent": self.brand_accent, | |
| "palette_strategy": self.palette_strategy, | |
| "cohesion_score": self.cohesion_score, | |
| "cohesion_notes": self.cohesion_notes, | |
| "naming_map": self.naming_map, | |
| "semantic_names": self.semantic_names, | |
| "self_evaluation": self.self_evaluation, | |
| "typography_notes": self.typography_notes, | |
| "spacing_notes": self.spacing_notes, | |
| "radius_notes": self.radius_notes, | |
| "shadow_notes": self.shadow_notes, | |
| } | |
| class BenchmarkAdvice: | |
| """Results from ATLAS — Benchmark Advisor (ReAct).""" | |
| recommended_benchmark: str = "" | |
| recommended_benchmark_name: str = "" | |
| reasoning: str = "" | |
| alignment_changes: list = field(default_factory=list) | |
| pros_of_alignment: list = field(default_factory=list) | |
| cons_of_alignment: list = field(default_factory=list) | |
| alternative_benchmarks: list = field(default_factory=list) | |
| self_evaluation: dict = field(default_factory=dict) | |
| # v3: per-token-type benchmark comparison | |
| typography_comparison: dict = field(default_factory=dict) | |
| spacing_comparison: dict = field(default_factory=dict) | |
| color_comparison: dict = field(default_factory=dict) | |
| radius_comparison: dict = field(default_factory=dict) | |
| shadow_comparison: dict = field(default_factory=dict) | |
| reasoning_trace: list = field(default_factory=list) | |
| def to_dict(self) -> dict: | |
| return { | |
| "recommended_benchmark": self.recommended_benchmark, | |
| "recommended_benchmark_name": self.recommended_benchmark_name, | |
| "reasoning": self.reasoning, | |
| "alignment_changes": self.alignment_changes, | |
| "pros": self.pros_of_alignment, | |
| "cons": self.cons_of_alignment, | |
| "alternatives": self.alternative_benchmarks, | |
| "self_evaluation": self.self_evaluation, | |
| "typography_comparison": self.typography_comparison, | |
| "spacing_comparison": self.spacing_comparison, | |
| "color_comparison": self.color_comparison, | |
| "radius_comparison": self.radius_comparison, | |
| "shadow_comparison": self.shadow_comparison, | |
| } | |
| class BestPracticesResult: | |
| """Results from SENTINEL — Best Practices Auditor (ReAct).""" | |
| overall_score: int = 50 | |
| checks: dict = field(default_factory=dict) | |
| priority_fixes: list = field(default_factory=list) | |
| passing_practices: list = field(default_factory=list) | |
| failing_practices: list = field(default_factory=list) | |
| self_evaluation: dict = field(default_factory=dict) | |
| # v3: per-token-type assessments | |
| color_assessment: dict = field(default_factory=dict) | |
| typography_assessment: dict = field(default_factory=dict) | |
| spacing_assessment: dict = field(default_factory=dict) | |
| radius_assessment: dict = field(default_factory=dict) | |
| shadow_assessment: dict = field(default_factory=dict) | |
| reasoning_trace: list = field(default_factory=list) | |
| validation_passed: bool = False | |
| def to_dict(self) -> dict: | |
| return { | |
| "overall_score": self.overall_score, | |
| "checks": self.checks, | |
| "priority_fixes": self.priority_fixes, | |
| "passing": self.passing_practices, | |
| "failing": self.failing_practices, | |
| "self_evaluation": self.self_evaluation, | |
| "color_assessment": self.color_assessment, | |
| "typography_assessment": self.typography_assessment, | |
| "spacing_assessment": self.spacing_assessment, | |
| "radius_assessment": self.radius_assessment, | |
| "shadow_assessment": self.shadow_assessment, | |
| } | |
| class HeadSynthesis: | |
| """Results from NEXUS — HEAD Synthesizer (Tree of Thought).""" | |
| executive_summary: str = "" | |
| scores: dict = field(default_factory=dict) | |
| benchmark_fit: dict = field(default_factory=dict) | |
| brand_analysis: dict = field(default_factory=dict) | |
| top_3_actions: list = field(default_factory=list) | |
| color_recommendations: list = field(default_factory=list) | |
| type_scale_recommendation: dict = field(default_factory=dict) | |
| spacing_recommendation: dict = field(default_factory=dict) | |
| radius_recommendation: dict = field(default_factory=dict) | |
| shadow_recommendation: dict = field(default_factory=dict) | |
| self_evaluation: dict = field(default_factory=dict) | |
| # v3: ToT branches visible to user | |
| perspective_a: dict = field(default_factory=dict) | |
| perspective_b: dict = field(default_factory=dict) | |
| chosen_perspective: str = "" | |
| choice_reasoning: str = "" | |
| reasoning_trace: list = field(default_factory=list) | |
| def to_dict(self) -> dict: | |
| return { | |
| "executive_summary": self.executive_summary, | |
| "scores": self.scores, | |
| "benchmark_fit": self.benchmark_fit, | |
| "brand_analysis": self.brand_analysis, | |
| "top_3_actions": self.top_3_actions, | |
| "color_recommendations": self.color_recommendations, | |
| "type_scale_recommendation": self.type_scale_recommendation, | |
| "spacing_recommendation": self.spacing_recommendation, | |
| "radius_recommendation": self.radius_recommendation, | |
| "shadow_recommendation": self.shadow_recommendation, | |
| "self_evaluation": self.self_evaluation, | |
| "chosen_perspective": self.chosen_perspective, | |
| "choice_reasoning": self.choice_reasoning, | |
| } | |
| # ============================================================================= | |
| # SHARED HELPERS — format token data for prompts | |
| # ============================================================================= | |
| def _fmt_colors(tokens: dict, limit: int = 40) -> str: | |
| """Format color tokens for any agent prompt.""" | |
| if not tokens: | |
| return "No color data" | |
| lines = [] | |
| for name, t in list(tokens.items())[:limit]: | |
| d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {} | |
| hex_val = d.get("value", "") | |
| freq = d.get("frequency", 0) | |
| hint = d.get("role_hint", "") | |
| ctx = ", ".join((d.get("contexts") or [])[:3]) | |
| els = ", ".join((d.get("elements") or [])[:3]) | |
| hint_s = f" [hint:{hint}]" if hint else "" | |
| lines.append(f"- {hex_val}: {freq}x, ctx=[{ctx}], el=[{els}]{hint_s}") | |
| return "\n".join(lines) | |
| def _fmt_typography(tokens: dict, limit: int = 15) -> str: | |
| if not tokens: | |
| return "No typography data" | |
| lines = [] | |
| for name, t in list(tokens.items())[:limit]: | |
| d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {} | |
| fam = d.get("font_family", "?") | |
| sz = d.get("font_size", "?") | |
| w = d.get("font_weight", 400) | |
| lh = d.get("line_height", "?") | |
| freq = d.get("frequency", 0) | |
| els = ", ".join((d.get("elements") or [])[:3]) | |
| lines.append(f"- {fam} {sz} w{w} lh={lh} ({freq}x) [{els}]") | |
| return "\n".join(lines) | |
| def _fmt_spacing(tokens: dict, limit: int = 15) -> str: | |
| if not tokens: | |
| return "No spacing data" | |
| lines = [] | |
| for name, t in list(tokens.items())[:limit]: | |
| d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {} | |
| val = d.get("value", "?") | |
| px = d.get("value_px", "?") | |
| freq = d.get("frequency", 0) | |
| ctx = ", ".join((d.get("contexts") or [])[:3]) | |
| lines.append(f"- {val} ({px}px) {freq}x [{ctx}]") | |
| return "\n".join(lines) | |
| def _fmt_radius(tokens: dict, limit: int = 10) -> str: | |
| if not tokens: | |
| return "No radius data" | |
| lines = [] | |
| for name, t in list(tokens.items())[:limit]: | |
| d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {} | |
| val = d.get("value", "?") | |
| px = d.get("value_px", "?") | |
| freq = d.get("frequency", 0) | |
| b4 = d.get("fits_base_4", False) | |
| b8 = d.get("fits_base_8", False) | |
| els = ", ".join((d.get("elements") or [])[:3]) | |
| lines.append(f"- {name}: {val} (base4={b4}, base8={b8}, {freq}x) [{els}]") | |
| return "\n".join(lines) | |
| def _fmt_shadows(tokens: dict, limit: int = 10) -> str: | |
| if not tokens: | |
| return "No shadow data" | |
| lines = [] | |
| for name, t in list(tokens.items())[:limit]: | |
| d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {} | |
| blur = d.get("blur_px", "?") | |
| y = d.get("y_offset_px", "?") | |
| freq = d.get("frequency", 0) | |
| els = ", ".join((d.get("elements") or [])[:3]) | |
| lines.append(f"- {name}: blur={blur}px y={y}px ({freq}x) [{els}]") | |
| return "\n".join(lines) | |
| def _log_reasoning(steps: list, log_fn: Callable): | |
| """Log ReAct reasoning steps with full content (no truncation).""" | |
| icons = {"THINK": "🧠", "ACT": "⚡", "OBSERVE": "👁️", "VERIFY": "✅"} | |
| for step in (steps or []): | |
| if isinstance(step, dict): | |
| st = step.get("step", "?") | |
| area = step.get("area", "") | |
| content = step.get("content", "") | |
| icon = icons.get(st, "📝") | |
| # Show full reasoning — wrap long lines for readability | |
| if len(content) > 120: | |
| log_fn(f" {icon} [{st}] {area}:") | |
| # Word-wrap at ~100 chars per line | |
| words = content.split() | |
| line = " " | |
| for word in words: | |
| if len(line) + len(word) + 1 > 105: | |
| log_fn(line) | |
| line = " " + word | |
| else: | |
| line = line + " " + word if line.strip() else " " + word | |
| if line.strip(): | |
| log_fn(line) | |
| else: | |
| log_fn(f" {icon} [{st}] {area}: {content}") | |
| def _extract_hexes(tokens: dict) -> list: | |
| """Get list of hex values from color token dict.""" | |
| hexes = [] | |
| for name, t in tokens.items(): | |
| if isinstance(t, dict): | |
| h = t.get("value", "") | |
| else: | |
| h = getattr(t, "value", "") | |
| if h: | |
| hexes.append(h.lower()) | |
| return hexes | |
| # ============================================================================= | |
| # AURORA — Brand Identifier (ReAct Framework) | |
| # ============================================================================= | |
| class BrandIdentifierAgent: | |
| """ | |
| AURORA — Senior Brand & Visual Identity Analyst. | |
| v3.1: ADVISORY ONLY — does NOT name colors (rule-based classifier does that). | |
| Provides brand insights, palette strategy, cohesion assessment. | |
| Model: Qwen 72B · Temperature: 0.4 | |
| """ | |
| SYSTEM_PROMPT = """You are AURORA, a Senior Brand & Visual Identity Analyst. | |
| ## YOUR ROLE (v3.1: Advisory Only) | |
| Color NAMING is handled by a rule-based classifier. Do NOT output naming_map. | |
| Your job is to provide INSIGHTS about the brand identity and design cohesion. | |
| ## REASONING FRAMEWORK (ReAct) | |
| Structure your response with explicit reasoning steps. | |
| For each area: THINK → ACT → OBSERVE → VERIFY. | |
| ## ANALYZE ALL TOKEN TYPES: | |
| ### 1. COLORS — Identify brand strategy (complementary? analogous? monochromatic?) | |
| ### 2. TYPOGRAPHY — Identify heading vs body hierarchy, font pairing quality | |
| ### 3. SPACING — Identify grid system, note consistency | |
| ### 4. RADIUS — Identify radius strategy (sharp/rounded/pill) | |
| ### 5. SHADOWS — Identify elevation strategy, blur progression | |
| ## QUALITY RULES | |
| - Brand Primary MUST cite usage evidence (e.g. "47x on buttons") | |
| - Cohesion 1-10: most sites score 5-7. Use the full range. | |
| - Do NOT invent names. Focus on analysis and insights. | |
| ## OUTPUT (JSON) | |
| { | |
| "reasoning_steps": [ | |
| {"step": "THINK", "area": "colors", "content": "..."}, | |
| {"step": "ACT", "area": "colors", "content": "..."}, | |
| {"step": "OBSERVE", "area": "typography", "content": "..."}, | |
| {"step": "ACT", "area": "spacing", "content": "..."}, | |
| {"step": "ACT", "area": "radius", "content": "..."}, | |
| {"step": "ACT", "area": "shadows", "content": "..."}, | |
| {"step": "VERIFY", "area": "all", "content": "Cross-checking consistency..."} | |
| ], | |
| "brand_primary": {"color": "#hex", "confidence": "high|medium|low", "reasoning": "cite evidence", "usage_count": N}, | |
| "brand_secondary": {"color": "#hex", "confidence": "...", "reasoning": "..."}, | |
| "brand_accent": {"color": "#hex or null", "confidence": "...", "reasoning": "..."}, | |
| "palette_strategy": "complementary|analogous|triadic|monochromatic|random", | |
| "cohesion_score": N, | |
| "cohesion_notes": "...", | |
| "naming_map": {}, // Optional: ONLY semantic role suggestions (brand.primary, text.secondary, etc.) | |
| "typography_notes": "Heading: Inter 700, Body: Inter 400. Clean hierarchy.", | |
| "spacing_notes": "8px grid, 92% aligned.", | |
| "radius_notes": "Rounded style: 4px inputs, 8px cards.", | |
| "shadow_notes": "3-level elevation: blur 4/8/24px.", | |
| "self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "good|fair|poor", "flags": []} | |
| } | |
| Return ONLY valid JSON.""" | |
| PROMPT_TEMPLATE = """Analyze the complete design system. | |
| ## COLORS (with role_hints) | |
| {color_data} | |
| ## TYPOGRAPHY | |
| {typography_data} | |
| ## SPACING | |
| {spacing_data} | |
| ## RADIUS | |
| {radius_data} | |
| ## SHADOWS | |
| {shadow_data} | |
| Use ReAct for each area. If you see clear semantic roles (brand primary, text color, etc.), suggest them in naming_map. Otherwise leave naming_map empty — the rule-based classifier handles naming.""" | |
| def __init__(self, hf_client): | |
| self.hf_client = hf_client | |
| async def analyze( | |
| self, | |
| color_tokens: dict, | |
| typography_tokens: dict = None, | |
| spacing_tokens: dict = None, | |
| radius_tokens: dict = None, | |
| shadow_tokens: dict = None, | |
| log_callback: Callable = None, | |
| ) -> BrandIdentification: | |
| def log(msg): | |
| if log_callback: | |
| log_callback(msg) | |
| log(" 🎨 AURORA — Brand & Visual Identity (Qwen 72B)") | |
| log(" └─ ReAct: Analyzing colors + typography + spacing + radius + shadows...") | |
| prompt = self.PROMPT_TEMPLATE.format( | |
| color_data=_fmt_colors(color_tokens), | |
| typography_data=_fmt_typography(typography_tokens), | |
| spacing_data=_fmt_spacing(spacing_tokens), | |
| radius_data=_fmt_radius(radius_tokens), | |
| shadow_data=_fmt_shadows(shadow_tokens), | |
| ) | |
| try: | |
| start = datetime.now() | |
| response = await self.hf_client.complete_async( | |
| agent_name="brand_identifier", | |
| system_prompt=self.SYSTEM_PROMPT, | |
| user_message=prompt, | |
| max_tokens=2000, | |
| json_mode=True, | |
| ) | |
| dur = (datetime.now() - start).total_seconds() | |
| result = self._parse(response) | |
| # Critic validation | |
| input_hexes = _extract_hexes(color_tokens) | |
| passed, errors = validate_aurora_output(result, input_hexes) | |
| result.validation_passed = passed | |
| if not passed and result.retry_count == 0: | |
| log(f" ⚠️ Critic: {len(errors)} issues — retrying with feedback...") | |
| for e in errors[:3]: | |
| log(f" └─ {e}") | |
| retry_prompt = prompt + "\n\n## CRITIC FEEDBACK — Fix:\n" + "\n".join(errors[:10]) | |
| resp2 = await self.hf_client.complete_async( | |
| agent_name="brand_identifier", | |
| system_prompt=self.SYSTEM_PROMPT, | |
| user_message=retry_prompt, | |
| max_tokens=2000, | |
| json_mode=True, | |
| ) | |
| result = self._parse(resp2) | |
| result.retry_count = 1 | |
| p2, e2 = validate_aurora_output(result, input_hexes) | |
| result.validation_passed = p2 | |
| if not p2: | |
| log(f" ⚠️ Retry: still {len(e2)} issues — using normalizer fallback names") | |
| # Log reasoning chain | |
| log(f" ─────────────────────────────────────────") | |
| log(f" 🎨 AURORA — COMPLETE ({dur:.1f}s)") | |
| _log_reasoning(result.reasoning_trace, log) | |
| log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')})") | |
| log(f" ├─ Palette: {result.palette_strategy} · Cohesion: {result.cohesion_score}/10") | |
| log(f" ├─ Colors Named: {len(result.naming_map)}/{len(input_hexes)}") | |
| log(f" ├─ Typography: {result.typography_notes or 'N/A'}") | |
| log(f" ├─ Spacing: {result.spacing_notes or 'N/A'}") | |
| log(f" ├─ Radius: {result.radius_notes or 'N/A'}") | |
| log(f" ├─ Shadows: {result.shadow_notes or 'N/A'}") | |
| log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FALLBACK'}") | |
| return result | |
| except Exception as e: | |
| log(f" ⚠️ AURORA failed: {str(e)[:120]}") | |
| return BrandIdentification() | |
| def _parse(self, response: str) -> BrandIdentification: | |
| try: | |
| m = re.search(r'\{[\s\S]*\}', response) | |
| if m: | |
| d = json.loads(m.group()) | |
| return BrandIdentification( | |
| brand_primary=d.get("brand_primary", {}), | |
| brand_secondary=d.get("brand_secondary", {}), | |
| brand_accent=d.get("brand_accent", {}), | |
| palette_strategy=d.get("palette_strategy", "unknown"), | |
| cohesion_score=d.get("cohesion_score", 5), | |
| cohesion_notes=d.get("cohesion_notes", ""), | |
| naming_map=d.get("naming_map", {}), | |
| semantic_names=d.get("naming_map", {}), | |
| self_evaluation=d.get("self_evaluation", {}), | |
| reasoning_trace=d.get("reasoning_steps", []), | |
| typography_notes=d.get("typography_notes", ""), | |
| spacing_notes=d.get("spacing_notes", ""), | |
| radius_notes=d.get("radius_notes", ""), | |
| shadow_notes=d.get("shadow_notes", ""), | |
| ) | |
| except Exception: | |
| pass | |
| return BrandIdentification() | |
| # ============================================================================= | |
| # ATLAS — Benchmark Advisor (ReAct Framework) | |
| # ============================================================================= | |
| class BenchmarkAdvisorAgent: | |
| """ | |
| ATLAS — Senior Design System Benchmark Analyst. | |
| ReAct comparison of ALL token types against industry benchmarks. | |
| Model: Llama 3.3 70B · Temperature: 0.25 | |
| """ | |
| SYSTEM_PROMPT = """You are ATLAS, a Senior Design System Benchmark Analyst. | |
| ## REASONING FRAMEWORK (ReAct) | |
| For EACH token type: THINK → ACT → OBSERVE → VERIFY. | |
| Compare the user's values against benchmarks for: | |
| 1. TYPOGRAPHY — ratio, base size, scale pattern | |
| 2. SPACING — grid base, alignment, scale | |
| 3. COLORS — palette size, brand color usage | |
| 4. RADIUS — strategy (sharp/rounded/pill), tier count | |
| 5. SHADOWS — elevation levels, blur range | |
| Then pick the BEST OVERALL FIT benchmark. | |
| Max 4 alignment changes. If >85% match, say "already well-aligned". | |
| ## OUTPUT (JSON) | |
| { | |
| "reasoning_steps": [ | |
| {"step": "THINK", "area": "typography", "content": "User ratio 1.18 vs Material 1.25..."}, | |
| {"step": "ACT", "area": "typography", "content": "Material closest for type"}, | |
| {"step": "THINK", "area": "spacing", "content": "8px matches Material and Polaris"}, | |
| {"step": "ACT", "area": "spacing", "content": "Both aligned"}, | |
| {"step": "THINK", "area": "colors", "content": "25 colors vs Polaris 18..."}, | |
| {"step": "THINK", "area": "radius", "content": "4/8px tiers..."}, | |
| {"step": "THINK", "area": "shadows", "content": "3 levels vs Material 5..."}, | |
| {"step": "VERIFY", "area": "overall", "content": "Material best: 4/5 areas align"} | |
| ], | |
| "recommended_benchmark": "material_design_3", | |
| "recommended_benchmark_name": "Material Design 3", | |
| "reasoning": "Best fit across all token types — cite data", | |
| "alignment_changes": [ | |
| {"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium", "token_type": "typography"} | |
| ], | |
| "typography_comparison": {"user": "1.18", "benchmark": "1.25", "gap": "minor"}, | |
| "spacing_comparison": {"user": "8px", "benchmark": "8px", "gap": "aligned"}, | |
| "color_comparison": {"user": "25", "benchmark": "18", "gap": "reduce"}, | |
| "radius_comparison": {"user": "2 tiers", "benchmark": "3 tiers", "gap": "add xl"}, | |
| "shadow_comparison": {"user": "3 levels", "benchmark": "5 levels", "gap": "add 2"}, | |
| "pros_of_alignment": ["..."], | |
| "cons_of_alignment": ["..."], | |
| "alternative_benchmarks": [{"name": "Polaris", "reason": "..."}], | |
| "self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []} | |
| } | |
| Return ONLY valid JSON.""" | |
| PROMPT_TEMPLATE = """Compare this design system against benchmarks — ALL token types. | |
| ## CURRENT VALUES | |
| - Type Scale Ratio: {user_ratio} | Base: {user_base}px | Sizes: {user_sizes} | |
| - Spacing Grid: {user_spacing}px | Values: {spacing_values} | |
| - Colors: {color_count} unique | Brand: {brand_info} | |
| - Radius: {radius_data} | |
| - Shadows: {shadow_data} | |
| ## BENCHMARKS | |
| {benchmark_comparison} | |
| Use ReAct per token type. Pick the best overall fit.""" | |
| def __init__(self, hf_client): | |
| self.hf_client = hf_client | |
| async def analyze( | |
| self, | |
| user_ratio: float, user_base: int, user_spacing: int, | |
| benchmark_comparisons: list, | |
| color_count: int = 0, brand_info: str = "", | |
| user_sizes: str = "", spacing_values: str = "", | |
| radius_data: str = "", shadow_data: str = "", | |
| log_callback: Callable = None, | |
| ) -> BenchmarkAdvice: | |
| def log(msg): | |
| if log_callback: | |
| log_callback(msg) | |
| log("") | |
| log(" 🏢 ATLAS — Benchmark Advisor (Llama 3.3 70B)") | |
| log(" └─ ReAct: Comparing typography + spacing + colors + radius + shadows...") | |
| prompt = self.PROMPT_TEMPLATE.format( | |
| user_ratio=user_ratio, user_base=user_base, user_spacing=user_spacing, | |
| user_sizes=user_sizes or "N/A", | |
| spacing_values=spacing_values or "N/A", | |
| color_count=color_count, brand_info=brand_info or "N/A", | |
| radius_data=radius_data or "No radius data", | |
| shadow_data=shadow_data or "No shadow data", | |
| benchmark_comparison=self._fmt_benchmarks(benchmark_comparisons), | |
| ) | |
| try: | |
| start = datetime.now() | |
| response = await self.hf_client.complete_async( | |
| agent_name="benchmark_advisor", | |
| system_prompt=self.SYSTEM_PROMPT, | |
| user_message=prompt, | |
| max_tokens=1500, | |
| json_mode=True, | |
| ) | |
| dur = (datetime.now() - start).total_seconds() | |
| result = self._parse(response) | |
| log(f" ─────────────────────────────────────────") | |
| log(f" 🏢 ATLAS — COMPLETE ({dur:.1f}s)") | |
| _log_reasoning(result.reasoning_trace, log) | |
| log(f" ├─ Recommended: {result.recommended_benchmark_name}") | |
| log(f" ├─ Changes: {len(result.alignment_changes)}") | |
| log(f" ├─ Typography: {result.typography_comparison}") | |
| log(f" ├─ Spacing: {result.spacing_comparison}") | |
| log(f" ├─ Colors: {result.color_comparison}") | |
| log(f" ├─ Radius: {result.radius_comparison}") | |
| log(f" └─ Shadows: {result.shadow_comparison}") | |
| return result | |
| except Exception as e: | |
| log(f" ⚠️ ATLAS failed: {str(e)[:120]}") | |
| return BenchmarkAdvice() | |
| def _fmt_benchmarks(self, comparisons: list) -> str: | |
| lines = [] | |
| for i, c in enumerate(comparisons[:5]): | |
| b = c.benchmark | |
| lines.append(f"{i+1}. {b.icon} {b.name} — Match: {c.overall_match_pct:.0f}%" | |
| f" | Type: {b.typography.get('scale_ratio', '?')}" | |
| f" | Spacing: {b.spacing.get('base', '?')}px" | |
| f" | Best for: {', '.join(b.best_for)}") | |
| return "\n".join(lines) if lines else "No benchmark data" | |
| def _parse(self, response: str) -> BenchmarkAdvice: | |
| try: | |
| m = re.search(r'\{[\s\S]*\}', response) | |
| if m: | |
| d = json.loads(m.group()) | |
| return BenchmarkAdvice( | |
| recommended_benchmark=d.get("recommended_benchmark", ""), | |
| recommended_benchmark_name=d.get("recommended_benchmark_name", ""), | |
| reasoning=d.get("reasoning", ""), | |
| alignment_changes=d.get("alignment_changes", []), | |
| pros_of_alignment=d.get("pros_of_alignment", []), | |
| cons_of_alignment=d.get("cons_of_alignment", []), | |
| alternative_benchmarks=d.get("alternative_benchmarks", []), | |
| self_evaluation=d.get("self_evaluation", {}), | |
| typography_comparison=d.get("typography_comparison", {}), | |
| spacing_comparison=d.get("spacing_comparison", {}), | |
| color_comparison=d.get("color_comparison", {}), | |
| radius_comparison=d.get("radius_comparison", {}), | |
| shadow_comparison=d.get("shadow_comparison", {}), | |
| reasoning_trace=d.get("reasoning_steps", []), | |
| ) | |
| except Exception: | |
| pass | |
| return BenchmarkAdvice() | |
| # ============================================================================= | |
| # SENTINEL — Best Practices Auditor (ReAct + Grounded Scoring) | |
| # ============================================================================= | |
| class BestPracticesValidatorAgent: | |
| """ | |
| SENTINEL — Design System Best Practices Auditor. | |
| ReAct: Grounds EVERY score in actual rule-engine data. Audits ALL token types. | |
| Model: Qwen 72B · Temperature: 0.2 | |
| """ | |
| SYSTEM_PROMPT = """You are SENTINEL, a Design System Best Practices Auditor. | |
| ## REASONING FRAMEWORK (ReAct + Grounded) | |
| For EACH check: THINK → ACT (cite data) → OBSERVE → VERIFY. | |
| You MUST CITE the exact input data for every score. | |
| ## AUDIT ALL TOKEN TYPES: | |
| ### COLORS (25 pts) | |
| - aa_compliance: CITE AA pass/fail count | |
| - color_count: < 20 semantic colors ideal | |
| - near_duplicates: should be 0 | |
| ### TYPOGRAPHY (25 pts) | |
| - type_scale_standard: nearest standard ratio | |
| - type_scale_consistent: variance check | |
| - base_size_accessible: >= 16px | |
| ### SPACING (20 pts) | |
| - spacing_grid: 4px or 8px consistency | |
| - spacing_alignment: > 80% target | |
| ### RADIUS (15 pts) | |
| - radius_consistency: base-4/8 grid, clear tiers | |
| ### SHADOWS (15 pts) | |
| - shadow_system: elevation hierarchy, blur progression | |
| ## CRITICAL: If data says 7 AA failures, you CANNOT say "pass". | |
| ## OUTPUT (JSON) | |
| { | |
| "reasoning_steps": [ | |
| {"step": "THINK", "area": "colors", "content": "7/25 fail AA = 28%"}, | |
| {"step": "ACT", "area": "colors", "content": "aa_compliance = FAIL"}, | |
| {"step": "THINK", "area": "typography", "content": "ratio 1.18, variance 0.22"}, | |
| {"step": "ACT", "area": "typography", "content": "type_scale_consistent = WARN"}, | |
| {"step": "THINK", "area": "spacing", "content": "8px base, 85% aligned"}, | |
| {"step": "ACT", "area": "spacing", "content": "spacing_grid = PASS"}, | |
| {"step": "THINK", "area": "radius", "content": "4px,8px,16px all base-4"}, | |
| {"step": "ACT", "area": "radius", "content": "radius_consistency = PASS"}, | |
| {"step": "THINK", "area": "shadows", "content": "3 levels, blur 4→8→24"}, | |
| {"step": "ACT", "area": "shadows", "content": "shadow_system = WARN"}, | |
| {"step": "VERIFY", "area": "scoring", "content": "3 pass, 2 warn, 1 fail → 62/100"} | |
| ], | |
| "overall_score": N, | |
| "checks": { | |
| "aa_compliance": {"status": "pass|warn|fail", "note": "CITE: 7/25 fail AA"}, | |
| "type_scale_standard": {"status": "...", "note": "CITE: ratio 1.18 nearest 1.2"}, | |
| "type_scale_consistent": {"status": "...", "note": "CITE: variance 0.22 > 0.15"}, | |
| "base_size_accessible": {"status": "...", "note": "CITE: base = Npx"}, | |
| "spacing_grid": {"status": "...", "note": "CITE: N% aligned to Npx"}, | |
| "color_count": {"status": "...", "note": "CITE: N unique colors"}, | |
| "near_duplicates": {"status": "...", "note": "CITE: N pairs"}, | |
| "radius_consistency": {"status": "...", "note": "CITE: tiers and grid"}, | |
| "shadow_system": {"status": "...", "note": "CITE: N levels, progression"} | |
| }, | |
| "color_assessment": {"aa_pass_rate": "72%", "palette_size": 25, "verdict": "needs work"}, | |
| "typography_assessment": {"ratio": 1.18, "consistent": false, "base_ok": true, "verdict": "fair"}, | |
| "spacing_assessment": {"grid": "8px", "alignment": "85%", "verdict": "good"}, | |
| "radius_assessment": {"tiers": 3, "base_aligned": true, "verdict": "good"}, | |
| "shadow_assessment": {"levels": 3, "progression": "non-linear", "verdict": "fair"}, | |
| "priority_fixes": [ | |
| {"rank": 1, "issue": "...", "impact": "high", "effort": "low", "action": "Specific fix", "token_type": "color"} | |
| ], | |
| "passing_practices": ["spacing_grid"], | |
| "failing_practices": ["aa_compliance"], | |
| "self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []} | |
| } | |
| Return ONLY valid JSON.""" | |
| PROMPT_TEMPLATE = """Audit this design system. CITE the data for every score. | |
| ## RULE ENGINE FACTS (verified) | |
| ### Typography | |
| - Ratio: {type_ratio} ({type_consistent}) | Base: {base_size}px | Sizes: {sizes} | |
| ### Accessibility | |
| - Total: {total_colors} | AA Pass: {aa_pass} | AA Fail: {aa_fail} | |
| - Failing: {failing_colors} | |
| ### Spacing | |
| - Base: {spacing_base}px | Aligned: {spacing_aligned}% | Values: {spacing_values} | |
| ### Color Stats | |
| - Unique: {unique_colors} | Near-Duplicates: {near_duplicates} | |
| ### Radius | |
| {radius_data} | |
| ### Shadows | |
| {shadow_data} | |
| CITE the EXACT numbers above for every check.""" | |
| def __init__(self, hf_client): | |
| self.hf_client = hf_client | |
| async def analyze( | |
| self, | |
| rule_engine_results: Any, | |
| radius_tokens: dict = None, | |
| shadow_tokens: dict = None, | |
| log_callback: Callable = None, | |
| ) -> BestPracticesResult: | |
| def log(msg): | |
| if log_callback: | |
| log_callback(msg) | |
| log("") | |
| log(" ✅ SENTINEL — Best Practices Auditor (Qwen 72B)") | |
| log(" └─ ReAct: Auditing colors + typography + spacing + radius + shadows...") | |
| typo = rule_engine_results.typography | |
| spacing = rule_engine_results.spacing | |
| color_stats = rule_engine_results.color_stats | |
| accessibility = rule_engine_results.accessibility | |
| failures = [a for a in accessibility if not a.passes_aa_normal] | |
| failing_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:8]]) | |
| sizes_str = ", ".join([f"{s}px" for s in typo.sizes_px[:8]]) if typo.sizes_px else "N/A" | |
| sp_vals = ", ".join([f"{v}px" for v in spacing.current_values[:10]]) if hasattr(spacing, 'current_values') and spacing.current_values else "N/A" | |
| prompt = self.PROMPT_TEMPLATE.format( | |
| type_ratio=f"{typo.detected_ratio:.3f}", | |
| type_consistent="consistent" if typo.is_consistent else f"inconsistent (var={typo.variance:.2f})", | |
| base_size=typo.sizes_px[0] if typo.sizes_px else 16, | |
| sizes=sizes_str, | |
| total_colors=len(accessibility), | |
| aa_pass=len(accessibility) - len(failures), | |
| aa_fail=len(failures), | |
| failing_colors=failing_str or "None", | |
| spacing_base=spacing.detected_base, | |
| spacing_aligned=f"{spacing.alignment_percentage:.0f}", | |
| spacing_values=sp_vals, | |
| unique_colors=color_stats.unique_count, | |
| near_duplicates=len(color_stats.near_duplicates), | |
| radius_data=_fmt_radius(radius_tokens) if radius_tokens else "No radius data", | |
| shadow_data=_fmt_shadows(shadow_tokens) if shadow_tokens else "No shadow data", | |
| ) | |
| try: | |
| start = datetime.now() | |
| response = await self.hf_client.complete_async( | |
| agent_name="best_practices_validator", | |
| system_prompt=self.SYSTEM_PROMPT, | |
| user_message=prompt, | |
| max_tokens=2000, | |
| json_mode=True, | |
| ) | |
| dur = (datetime.now() - start).total_seconds() | |
| result = self._parse(response) | |
| # Critic cross-reference | |
| passed, errors = validate_sentinel_output(result, rule_engine_results) | |
| result.validation_passed = passed | |
| if not passed: | |
| log(f" ⚠️ Critic: {len(errors)} issues — applying fixes...") | |
| for e in errors[:3]: | |
| log(f" └─ {e}") | |
| result = _apply_sentinel_fixes(result, rule_engine_results, errors) | |
| log(f" ─────────────────────────────────────────") | |
| log(f" ✅ SENTINEL — COMPLETE ({dur:.1f}s)") | |
| _log_reasoning(result.reasoning_trace, log) | |
| log(f" ├─ Overall Score: {result.overall_score}/100") | |
| for cn, cv in (result.checks or {}).items(): | |
| if isinstance(cv, dict): | |
| s = cv.get("status", "?") | |
| si = {"pass": "✅", "warn": "⚠️", "fail": "❌"}.get(s, "?") | |
| log(f" │ {si} {cn}: {s}") | |
| log(f" ├─ Priority Fixes: {len(result.priority_fixes)}") | |
| log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FIXED'}") | |
| return result | |
| except Exception as e: | |
| log(f" ⚠️ SENTINEL failed: {str(e)[:120]}") | |
| return BestPracticesResult() | |
| def _parse(self, response: str) -> BestPracticesResult: | |
| try: | |
| m = re.search(r'\{[\s\S]*\}', response) | |
| if m: | |
| d = json.loads(m.group()) | |
| return BestPracticesResult( | |
| overall_score=d.get("overall_score", 50), | |
| checks=d.get("checks", {}), | |
| priority_fixes=d.get("priority_fixes", []), | |
| passing_practices=d.get("passing_practices", []), | |
| failing_practices=d.get("failing_practices", []), | |
| self_evaluation=d.get("self_evaluation", {}), | |
| color_assessment=d.get("color_assessment", {}), | |
| typography_assessment=d.get("typography_assessment", {}), | |
| spacing_assessment=d.get("spacing_assessment", {}), | |
| radius_assessment=d.get("radius_assessment", {}), | |
| shadow_assessment=d.get("shadow_assessment", {}), | |
| reasoning_trace=d.get("reasoning_steps", []), | |
| ) | |
| except Exception: | |
| pass | |
| return BestPracticesResult() | |
| # ============================================================================= | |
| # NEXUS — HEAD Synthesizer (Tree of Thought) | |
| # ============================================================================= | |
| class HeadSynthesizerAgent: | |
| """ | |
| NEXUS — Senior Design System Architect. | |
| Tree of Thought: 2 perspectives, picks best, compiles all agent outputs. | |
| Recommendations for ALL token types. | |
| Model: Llama 3.3 70B · Temperature: 0.3 | |
| """ | |
| SYSTEM_PROMPT = """You are NEXUS, a Senior Design System Architect — the final synthesizer. | |
| ## REASONING FRAMEWORK (Tree of Thought) | |
| Evaluate TWO perspectives: | |
| ### PERSPECTIVE A — Accessibility-First | |
| Weights: accessibility=40%, consistency=30%, organization=30% | |
| Penalize heavily for AA failures. | |
| ### PERSPECTIVE B — Balanced | |
| Weights: accessibility=30%, consistency=35%, organization=35% | |
| Equal emphasis across areas. | |
| For each: calculate scores, determine top 3 actions. | |
| Then CHOOSE the perspective that better reflects reality. | |
| ## SYNTHESIZE ALL TOKEN TYPES: | |
| - Colors: AURORA brand + SENTINEL AA findings → color recommendations | |
| - Typography: ATLAS benchmark match + SENTINEL scale audit → type scale rec | |
| - Spacing: ATLAS grid comparison + SENTINEL alignment → spacing rec | |
| - Radius: SENTINEL consistency + ATLAS benchmark → radius rec | |
| - Shadows: SENTINEL elevation + ATLAS benchmark → shadow rec | |
| ## OUTPUT (JSON) | |
| { | |
| "reasoning_steps": [ | |
| {"step": "THINK", "area": "perspective_a", "content": "Accessibility-first weighting..."}, | |
| {"step": "ACT", "area": "perspective_a", "content": "Score: overall=52..."}, | |
| {"step": "THINK", "area": "perspective_b", "content": "Balanced weighting..."}, | |
| {"step": "ACT", "area": "perspective_b", "content": "Score: overall=63..."}, | |
| {"step": "OBSERVE", "area": "comparison", "content": "A shows severity of AA failures..."}, | |
| {"step": "VERIFY", "area": "decision", "content": "Choosing A — honest about AA issues"} | |
| ], | |
| "perspective_a": {"scores": {"overall": 52, "accessibility": 38, "consistency": 72, "organization": 68}, "reasoning": "..."}, | |
| "perspective_b": {"scores": {"overall": 63, "accessibility": 45, "consistency": 72, "organization": 68}, "reasoning": "..."}, | |
| "chosen_perspective": "A", | |
| "choice_reasoning": "AA failures affect real users — lower score is more honest", | |
| "executive_summary": "Your design system scores X/100...", | |
| "scores": {"overall": 52, "accessibility": 38, "consistency": 72, "organization": 68}, | |
| "top_3_actions": [ | |
| {"action": "Fix AA compliance", "impact": "high", "effort": "medium", "details": "#X→#Y", "token_type": "color"} | |
| ], | |
| "color_recommendations": [ | |
| {"role": "brand.primary", "current": "#hex", "suggested": "#hex", "reason": "AA", "accept": true} | |
| ], | |
| "type_scale_recommendation": {"current_ratio": 1.18, "recommended_ratio": 1.25, "reason": "..."}, | |
| "spacing_recommendation": {"current": "8px", "recommended": "8px", "reason": "Already aligned"}, | |
| "radius_recommendation": {"current": "3 tiers", "recommended": "Add xl tier", "reason": "..."}, | |
| "shadow_recommendation": {"current": "3 levels", "recommended": "Add 2 more", "reason": "..."}, | |
| "benchmark_fit": {"closest": "Material", "similarity": "78%", "recommendation": "..."}, | |
| "brand_analysis": {"primary": "#hex", "secondary": "#hex", "cohesion": 7}, | |
| "self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []} | |
| } | |
| Return ONLY valid JSON.""" | |
| PROMPT_TEMPLATE = """Synthesize all analysis into a final report. | |
| ## RULE ENGINE FACTS | |
| - Type: {type_ratio} ({type_status}) | Base: {base_size}px | |
| - AA Failures: {aa_failures}/{total_colors} | |
| - Spacing: {spacing_status} | |
| - Colors: {unique_colors} unique | Consistency: {consistency_score}/100 | |
| - Radius: {radius_facts} | |
| - Shadows: {shadow_facts} | |
| ## AURORA — Brand Analysis | |
| - Primary: {brand_primary} ({brand_confidence}) | Secondary: {brand_secondary} | |
| - Palette: {palette_strategy} | Cohesion: {cohesion_score}/10 | |
| - Typography: {aurora_typo} | |
| - Spacing: {aurora_spacing} | |
| - Radius: {aurora_radius} | |
| - Shadows: {aurora_shadows} | |
| ## ATLAS — Benchmark | |
| - Closest: {closest_benchmark} ({match_pct}%) | |
| - Typo: {atlas_typo} | Spacing: {atlas_spacing} | Colors: {atlas_colors} | |
| - Radius: {atlas_radius} | Shadows: {atlas_shadows} | |
| - Changes: {benchmark_changes} | |
| ## SENTINEL — Audit | |
| - Score: {best_practices_score}/100 | |
| - Color: {sentinel_color} | Typo: {sentinel_typo} | Spacing: {sentinel_spacing} | |
| - Radius: {sentinel_radius} | Shadows: {sentinel_shadows} | |
| - Fixes: {priority_fixes} | |
| ## AA FIXES NEEDED | |
| {accessibility_fixes} | |
| Evaluate from TWO perspectives (Tree of Thought). Choose one. Recommend for ALL token types.""" | |
| def __init__(self, hf_client): | |
| self.hf_client = hf_client | |
| async def synthesize( | |
| self, | |
| rule_engine_results: Any, | |
| benchmark_comparisons: list, | |
| brand_identification: BrandIdentification, | |
| benchmark_advice: BenchmarkAdvice, | |
| best_practices: BestPracticesResult, | |
| log_callback: Callable = None, | |
| ) -> HeadSynthesis: | |
| def log(msg): | |
| if log_callback: | |
| log_callback(msg) | |
| log("") | |
| log("═" * 60) | |
| log("🧠 NEXUS — HEAD SYNTHESIZER (Tree of Thought)") | |
| log("═" * 60) | |
| log(" Evaluating Perspective A (Accessibility-First) vs B (Balanced)...") | |
| log(" Compiling: Rule Engine + AURORA + ATLAS + SENTINEL...") | |
| typo = rule_engine_results.typography | |
| spacing = rule_engine_results.spacing | |
| color_stats = rule_engine_results.color_stats | |
| accessibility = rule_engine_results.accessibility | |
| failures = [a for a in accessibility if not a.passes_aa_normal] | |
| aa_fixes_str = "\n".join([ | |
| f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)" | |
| for a in failures[:8] if a.suggested_fix | |
| ]) | |
| closest = benchmark_comparisons[0] if benchmark_comparisons else None | |
| def _s(obj): | |
| """Safely stringify a dict/value for prompt.""" | |
| if isinstance(obj, dict): | |
| parts = [f"{k}={v}" for k, v in list(obj.items())[:4]] | |
| return ", ".join(parts) if parts else "N/A" | |
| return str(obj) if obj else "N/A" | |
| prompt = self.PROMPT_TEMPLATE.format( | |
| type_ratio=f"{typo.detected_ratio:.3f}", | |
| type_status="consistent" if typo.is_consistent else "inconsistent", | |
| base_size=typo.sizes_px[0] if typo.sizes_px else 16, | |
| aa_failures=len(failures), total_colors=len(accessibility), | |
| spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned", | |
| unique_colors=color_stats.unique_count, | |
| consistency_score=rule_engine_results.consistency_score, | |
| radius_facts=_s(best_practices.radius_assessment) or "N/A", | |
| shadow_facts=_s(best_practices.shadow_assessment) or "N/A", | |
| brand_primary=brand_identification.brand_primary.get("color", "?"), | |
| brand_confidence=brand_identification.brand_primary.get("confidence", "?"), | |
| brand_secondary=brand_identification.brand_secondary.get("color", "?"), | |
| palette_strategy=brand_identification.palette_strategy, | |
| cohesion_score=brand_identification.cohesion_score, | |
| aurora_typo=brand_identification.typography_notes or "N/A", | |
| aurora_spacing=brand_identification.spacing_notes or "N/A", | |
| aurora_radius=brand_identification.radius_notes or "N/A", | |
| aurora_shadows=brand_identification.shadow_notes or "N/A", | |
| closest_benchmark=closest.benchmark.name if closest else "?", | |
| match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0", | |
| atlas_typo=_s(benchmark_advice.typography_comparison), | |
| atlas_spacing=_s(benchmark_advice.spacing_comparison), | |
| atlas_colors=_s(benchmark_advice.color_comparison), | |
| atlas_radius=_s(benchmark_advice.radius_comparison), | |
| atlas_shadows=_s(benchmark_advice.shadow_comparison), | |
| benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:4]]), | |
| best_practices_score=best_practices.overall_score, | |
| sentinel_color=_s(best_practices.color_assessment), | |
| sentinel_typo=_s(best_practices.typography_assessment), | |
| sentinel_spacing=_s(best_practices.spacing_assessment), | |
| sentinel_radius=_s(best_practices.radius_assessment), | |
| sentinel_shadows=_s(best_practices.shadow_assessment), | |
| priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:5]]), | |
| accessibility_fixes=aa_fixes_str or "None needed", | |
| ) | |
| try: | |
| start = datetime.now() | |
| response = await self.hf_client.complete_async( | |
| agent_name="head_synthesizer", | |
| system_prompt=self.SYSTEM_PROMPT, | |
| user_message=prompt, | |
| max_tokens=2500, | |
| json_mode=True, | |
| ) | |
| dur = (datetime.now() - start).total_seconds() | |
| result = self._parse(response) | |
| log("") | |
| log(f" 🧠 NEXUS — COMPLETE ({dur:.1f}s)") | |
| _log_reasoning(result.reasoning_trace, log) | |
| pa = result.perspective_a.get("scores", {}).get("overall", "?") if result.perspective_a else "?" | |
| pb = result.perspective_b.get("scores", {}).get("overall", "?") if result.perspective_b else "?" | |
| log(f" ├─ Perspective A: {pa}/100") | |
| log(f" ├─ Perspective B: {pb}/100") | |
| log(f" ├─ Chosen: {result.chosen_perspective}") | |
| log(f" ├─ Why: {result.choice_reasoning or 'N/A'}") | |
| log(f" ├─ Final Score: {result.scores.get('overall', '?')}/100" if result.scores else " ├─ Scores: N/A") | |
| log(f" ├─ Actions: {len(result.top_3_actions)} | Color Recs: {len(result.color_recommendations)}") | |
| log(f" ├─ Typography: {_s(result.type_scale_recommendation)}") | |
| log(f" ├─ Spacing: {_s(result.spacing_recommendation)}") | |
| log(f" ├─ Radius: {_s(result.radius_recommendation)}") | |
| log(f" └─ Shadows: {_s(result.shadow_recommendation)}") | |
| log("") | |
| return result | |
| except Exception as e: | |
| log(f" ⚠️ NEXUS failed: {str(e)[:120]}") | |
| return HeadSynthesis() | |
| def _parse(self, response: str) -> HeadSynthesis: | |
| try: | |
| m = re.search(r'\{[\s\S]*\}', response) | |
| if m: | |
| d = json.loads(m.group()) | |
| return HeadSynthesis( | |
| executive_summary=d.get("executive_summary", ""), | |
| scores=d.get("scores", {}), | |
| benchmark_fit=d.get("benchmark_fit", {}), | |
| brand_analysis=d.get("brand_analysis", {}), | |
| top_3_actions=d.get("top_3_actions", []), | |
| color_recommendations=d.get("color_recommendations", []), | |
| type_scale_recommendation=d.get("type_scale_recommendation", {}), | |
| spacing_recommendation=d.get("spacing_recommendation", {}), | |
| radius_recommendation=d.get("radius_recommendation", {}), | |
| shadow_recommendation=d.get("shadow_recommendation", {}), | |
| self_evaluation=d.get("self_evaluation", {}), | |
| perspective_a=d.get("perspective_a", {}), | |
| perspective_b=d.get("perspective_b", {}), | |
| chosen_perspective=d.get("chosen_perspective", ""), | |
| choice_reasoning=d.get("choice_reasoning", ""), | |
| reasoning_trace=d.get("reasoning_steps", []), | |
| ) | |
| except Exception: | |
| pass | |
| return HeadSynthesis() | |
| # ============================================================================= | |
| # CRITIC / VALIDATOR FUNCTIONS (Rule-based, no LLM) | |
| # ============================================================================= | |
| def validate_aurora_output(output: BrandIdentification, input_hexes: list) -> tuple: | |
| """Validate AURORA naming_map. Returns (passed, errors).""" | |
| errors = [] | |
| nm = output.naming_map or {} | |
| # All input colors must have names | |
| for h in input_hexes: | |
| if h not in nm and h.lower() not in nm: | |
| errors.append(f"Missing name for {h}") | |
| # No word-based shades | |
| bad_words = {"light", "dark", "base", "muted", "deep", "lighter", "darker"} | |
| for h, name in nm.items(): | |
| for part in name.split("."): | |
| if part.lower() in bad_words: | |
| errors.append(f"Word shade '{part}' in {name}") | |
| # No duplicates | |
| seen = set() | |
| for n in nm.values(): | |
| if n in seen: | |
| errors.append(f"Duplicate: {n}") | |
| seen.add(n) | |
| # Convention: color.X.Y | |
| for h, name in nm.items(): | |
| if not name.startswith("color."): | |
| errors.append(f"'{name}' must start with 'color.'") | |
| if len(name.split(".")) < 3: | |
| errors.append(f"'{name}' needs 3+ parts") | |
| return len(errors) == 0, errors | |
| def validate_sentinel_output(output: BestPracticesResult, rule_engine) -> tuple: | |
| """Cross-reference SENTINEL scores against rule engine data.""" | |
| errors = [] | |
| checks = output.checks or {} | |
| accessibility = rule_engine.accessibility | |
| aa_failures = len([a for a in accessibility if not a.passes_aa_normal]) | |
| aa_check = checks.get("aa_compliance", {}) | |
| if aa_failures > 0 and isinstance(aa_check, dict) and aa_check.get("status") == "pass": | |
| errors.append(f"aa_compliance='pass' but {aa_failures} fail AA") | |
| score = output.overall_score | |
| if not (0 <= score <= 100): | |
| errors.append(f"Score {score} out of 0-100 range") | |
| fail_count = sum(1 for c in checks.values() if isinstance(c, dict) and c.get("status") == "fail") | |
| if fail_count >= 3 and score > 70: | |
| errors.append(f"Score {score} too high with {fail_count} failures") | |
| typo = rule_engine.typography | |
| base_size = typo.sizes_px[0] if typo.sizes_px else 16 | |
| base_check = checks.get("base_size_accessible", {}) | |
| if base_size < 16 and isinstance(base_check, dict) and base_check.get("status") == "pass": | |
| errors.append(f"base_size 'pass' but {base_size}px < 16") | |
| return len(errors) == 0, errors | |
| def _apply_sentinel_fixes(result: BestPracticesResult, rule_engine, errors: list) -> BestPracticesResult: | |
| """Deterministic fixes when critic finds issues.""" | |
| accessibility = rule_engine.accessibility | |
| failures = [a for a in accessibility if not a.passes_aa_normal] | |
| for err in errors: | |
| if "aa_compliance" in err and "pass" in err: | |
| if "aa_compliance" in result.checks: | |
| result.checks["aa_compliance"]["status"] = "fail" | |
| result.checks["aa_compliance"]["note"] = f"CORRECTED: {len(failures)} fail AA" | |
| if "too high" in err.lower(): | |
| fail_count = sum(1 for c in result.checks.values() if isinstance(c, dict) and c.get("status") == "fail") | |
| max_s = max(30, 100 - fail_count * 15) | |
| if result.overall_score > max_s: | |
| result.overall_score = max_s | |
| result.overall_score = max(0, min(100, result.overall_score)) | |
| result.validation_passed = True | |
| return result | |
| def filter_aurora_naming_map(aurora: BrandIdentification) -> dict: | |
| """Filter AURORA naming_map to only keep semantic role assignments. | |
| AURORA is a secondary naming authority — it can assign semantic roles | |
| (brand.primary, text.secondary, bg.primary, feedback.error, etc.) | |
| but cannot override palette names (blue.500, neutral.700, etc.). | |
| The color_classifier is the primary naming authority. | |
| Returns: | |
| Dict of hex -> semantic_name (only role-based names). | |
| """ | |
| SEMANTIC_PREFIXES = ('brand.', 'text.', 'bg.', 'border.', 'feedback.') | |
| filtered = {} | |
| for hex_val, name in (aurora.naming_map or {}).items(): | |
| hex_clean = str(hex_val).strip().lower() | |
| if not hex_clean.startswith('#') or not name: | |
| continue | |
| clean_name = name if name.startswith('color.') else f'color.{name}' | |
| # Extract the part after "color." | |
| after_prefix = clean_name[6:] # "brand.primary", "blue.500", etc. | |
| if any(after_prefix.startswith(sp) for sp in SEMANTIC_PREFIXES): | |
| filtered[hex_clean] = clean_name | |
| return filtered | |
| def post_validate_stage2( | |
| aurora: BrandIdentification, | |
| sentinel: BestPracticesResult, | |
| nexus: HeadSynthesis, | |
| rule_engine: Any, | |
| ) -> list: | |
| """Final deterministic checks after ALL agents. Returns issues list.""" | |
| issues = [] | |
| for h, name in (aurora.naming_map or {}).items(): | |
| if not re.match(r'^color\.\w+\.[\w]+$', name): | |
| issues.append(f"Bad name: {name}") | |
| for key, val in (nexus.scores or {}).items(): | |
| if isinstance(val, (int, float)) and not (0 <= val <= 100): | |
| issues.append(f"Score {key}={val} OOB") | |
| aa_failures = len([a for a in rule_engine.accessibility if not a.passes_aa_normal]) | |
| n_acc = nexus.scores.get("accessibility", 50) if nexus.scores else 50 | |
| if aa_failures > 3 and n_acc > 85: | |
| issues.append(f"Nexus accessibility={n_acc} but {aa_failures} AA failures") | |
| for rec in (nexus.color_recommendations or []): | |
| for field in ("current", "suggested"): | |
| v = rec.get(field, "") | |
| if v and not v.startswith("#"): | |
| issues.append(f"Color rec {field} missing #: {v}") | |
| return issues | |