riazmo's picture
docs: update all docs for v3.2 + add Part 2 component generation research
f0ceb42
"""
Stage 2 LLM Agents — v3 Agentic Architecture
==============================================
Each agent:
- Researches ALL token types (colors, typography, spacing, radius, shadows)
- Uses ReAct framework: THINK → ACT → OBSERVE → VERIFY
- Returns visible reasoning chain for the UI
- Has a Python-based critic for validation
Agents run IN PARALLEL (asyncio.gather), then NEXUS compiles.
Agent Responsibilities:
- AURORA: Brand identity + semantic naming for ALL colors + notes on all token types
- SENTINEL: Best practices audit across ALL token types, grounded in rule-engine data
- ATLAS: Benchmark comparison for ALL token types
- NEXUS (HEAD): Tree-of-Thought synthesis, compiles all agent outputs
"""
import json
import re
from dataclasses import dataclass, field
from typing import Optional, Callable, Any
from datetime import datetime
# =============================================================================
# DATA CLASSES — v3: includes reasoning_trace + naming_map
# =============================================================================
@dataclass
class BrandIdentification:
"""Results from AURORA — Brand Identifier (ReAct)."""
brand_primary: dict = field(default_factory=dict)
brand_secondary: dict = field(default_factory=dict)
brand_accent: dict = field(default_factory=dict)
palette_strategy: str = ""
cohesion_score: int = 5
cohesion_notes: str = ""
# v3: naming_map covers ALL colors, not just top 10
naming_map: dict = field(default_factory=dict)
# {hex: "color.brand.primary"} or {hex: "color.blue.500"}
semantic_names: dict = field(default_factory=dict) # backward compat
self_evaluation: dict = field(default_factory=dict)
# v3: reasoning trace visible to user
reasoning_trace: list = field(default_factory=list)
validation_passed: bool = False
retry_count: int = 0
# v3: per-token-type observations
typography_notes: str = ""
spacing_notes: str = ""
radius_notes: str = ""
shadow_notes: str = ""
def to_dict(self) -> dict:
return {
"brand_primary": self.brand_primary,
"brand_secondary": self.brand_secondary,
"brand_accent": self.brand_accent,
"palette_strategy": self.palette_strategy,
"cohesion_score": self.cohesion_score,
"cohesion_notes": self.cohesion_notes,
"naming_map": self.naming_map,
"semantic_names": self.semantic_names,
"self_evaluation": self.self_evaluation,
"typography_notes": self.typography_notes,
"spacing_notes": self.spacing_notes,
"radius_notes": self.radius_notes,
"shadow_notes": self.shadow_notes,
}
@dataclass
class BenchmarkAdvice:
"""Results from ATLAS — Benchmark Advisor (ReAct)."""
recommended_benchmark: str = ""
recommended_benchmark_name: str = ""
reasoning: str = ""
alignment_changes: list = field(default_factory=list)
pros_of_alignment: list = field(default_factory=list)
cons_of_alignment: list = field(default_factory=list)
alternative_benchmarks: list = field(default_factory=list)
self_evaluation: dict = field(default_factory=dict)
# v3: per-token-type benchmark comparison
typography_comparison: dict = field(default_factory=dict)
spacing_comparison: dict = field(default_factory=dict)
color_comparison: dict = field(default_factory=dict)
radius_comparison: dict = field(default_factory=dict)
shadow_comparison: dict = field(default_factory=dict)
reasoning_trace: list = field(default_factory=list)
def to_dict(self) -> dict:
return {
"recommended_benchmark": self.recommended_benchmark,
"recommended_benchmark_name": self.recommended_benchmark_name,
"reasoning": self.reasoning,
"alignment_changes": self.alignment_changes,
"pros": self.pros_of_alignment,
"cons": self.cons_of_alignment,
"alternatives": self.alternative_benchmarks,
"self_evaluation": self.self_evaluation,
"typography_comparison": self.typography_comparison,
"spacing_comparison": self.spacing_comparison,
"color_comparison": self.color_comparison,
"radius_comparison": self.radius_comparison,
"shadow_comparison": self.shadow_comparison,
}
@dataclass
class BestPracticesResult:
"""Results from SENTINEL — Best Practices Auditor (ReAct)."""
overall_score: int = 50
checks: dict = field(default_factory=dict)
priority_fixes: list = field(default_factory=list)
passing_practices: list = field(default_factory=list)
failing_practices: list = field(default_factory=list)
self_evaluation: dict = field(default_factory=dict)
# v3: per-token-type assessments
color_assessment: dict = field(default_factory=dict)
typography_assessment: dict = field(default_factory=dict)
spacing_assessment: dict = field(default_factory=dict)
radius_assessment: dict = field(default_factory=dict)
shadow_assessment: dict = field(default_factory=dict)
reasoning_trace: list = field(default_factory=list)
validation_passed: bool = False
def to_dict(self) -> dict:
return {
"overall_score": self.overall_score,
"checks": self.checks,
"priority_fixes": self.priority_fixes,
"passing": self.passing_practices,
"failing": self.failing_practices,
"self_evaluation": self.self_evaluation,
"color_assessment": self.color_assessment,
"typography_assessment": self.typography_assessment,
"spacing_assessment": self.spacing_assessment,
"radius_assessment": self.radius_assessment,
"shadow_assessment": self.shadow_assessment,
}
@dataclass
class HeadSynthesis:
"""Results from NEXUS — HEAD Synthesizer (Tree of Thought)."""
executive_summary: str = ""
scores: dict = field(default_factory=dict)
benchmark_fit: dict = field(default_factory=dict)
brand_analysis: dict = field(default_factory=dict)
top_3_actions: list = field(default_factory=list)
color_recommendations: list = field(default_factory=list)
type_scale_recommendation: dict = field(default_factory=dict)
spacing_recommendation: dict = field(default_factory=dict)
radius_recommendation: dict = field(default_factory=dict)
shadow_recommendation: dict = field(default_factory=dict)
self_evaluation: dict = field(default_factory=dict)
# v3: ToT branches visible to user
perspective_a: dict = field(default_factory=dict)
perspective_b: dict = field(default_factory=dict)
chosen_perspective: str = ""
choice_reasoning: str = ""
reasoning_trace: list = field(default_factory=list)
def to_dict(self) -> dict:
return {
"executive_summary": self.executive_summary,
"scores": self.scores,
"benchmark_fit": self.benchmark_fit,
"brand_analysis": self.brand_analysis,
"top_3_actions": self.top_3_actions,
"color_recommendations": self.color_recommendations,
"type_scale_recommendation": self.type_scale_recommendation,
"spacing_recommendation": self.spacing_recommendation,
"radius_recommendation": self.radius_recommendation,
"shadow_recommendation": self.shadow_recommendation,
"self_evaluation": self.self_evaluation,
"chosen_perspective": self.chosen_perspective,
"choice_reasoning": self.choice_reasoning,
}
# =============================================================================
# SHARED HELPERS — format token data for prompts
# =============================================================================
def _fmt_colors(tokens: dict, limit: int = 40) -> str:
"""Format color tokens for any agent prompt."""
if not tokens:
return "No color data"
lines = []
for name, t in list(tokens.items())[:limit]:
d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {}
hex_val = d.get("value", "")
freq = d.get("frequency", 0)
hint = d.get("role_hint", "")
ctx = ", ".join((d.get("contexts") or [])[:3])
els = ", ".join((d.get("elements") or [])[:3])
hint_s = f" [hint:{hint}]" if hint else ""
lines.append(f"- {hex_val}: {freq}x, ctx=[{ctx}], el=[{els}]{hint_s}")
return "\n".join(lines)
def _fmt_typography(tokens: dict, limit: int = 15) -> str:
if not tokens:
return "No typography data"
lines = []
for name, t in list(tokens.items())[:limit]:
d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {}
fam = d.get("font_family", "?")
sz = d.get("font_size", "?")
w = d.get("font_weight", 400)
lh = d.get("line_height", "?")
freq = d.get("frequency", 0)
els = ", ".join((d.get("elements") or [])[:3])
lines.append(f"- {fam} {sz} w{w} lh={lh} ({freq}x) [{els}]")
return "\n".join(lines)
def _fmt_spacing(tokens: dict, limit: int = 15) -> str:
if not tokens:
return "No spacing data"
lines = []
for name, t in list(tokens.items())[:limit]:
d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {}
val = d.get("value", "?")
px = d.get("value_px", "?")
freq = d.get("frequency", 0)
ctx = ", ".join((d.get("contexts") or [])[:3])
lines.append(f"- {val} ({px}px) {freq}x [{ctx}]")
return "\n".join(lines)
def _fmt_radius(tokens: dict, limit: int = 10) -> str:
if not tokens:
return "No radius data"
lines = []
for name, t in list(tokens.items())[:limit]:
d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {}
val = d.get("value", "?")
px = d.get("value_px", "?")
freq = d.get("frequency", 0)
b4 = d.get("fits_base_4", False)
b8 = d.get("fits_base_8", False)
els = ", ".join((d.get("elements") or [])[:3])
lines.append(f"- {name}: {val} (base4={b4}, base8={b8}, {freq}x) [{els}]")
return "\n".join(lines)
def _fmt_shadows(tokens: dict, limit: int = 10) -> str:
if not tokens:
return "No shadow data"
lines = []
for name, t in list(tokens.items())[:limit]:
d = t if isinstance(t, dict) else t.__dict__ if hasattr(t, '__dict__') else {}
blur = d.get("blur_px", "?")
y = d.get("y_offset_px", "?")
freq = d.get("frequency", 0)
els = ", ".join((d.get("elements") or [])[:3])
lines.append(f"- {name}: blur={blur}px y={y}px ({freq}x) [{els}]")
return "\n".join(lines)
def _log_reasoning(steps: list, log_fn: Callable):
"""Log ReAct reasoning steps with full content (no truncation)."""
icons = {"THINK": "🧠", "ACT": "⚡", "OBSERVE": "👁️", "VERIFY": "✅"}
for step in (steps or []):
if isinstance(step, dict):
st = step.get("step", "?")
area = step.get("area", "")
content = step.get("content", "")
icon = icons.get(st, "📝")
# Show full reasoning — wrap long lines for readability
if len(content) > 120:
log_fn(f" {icon} [{st}] {area}:")
# Word-wrap at ~100 chars per line
words = content.split()
line = " "
for word in words:
if len(line) + len(word) + 1 > 105:
log_fn(line)
line = " " + word
else:
line = line + " " + word if line.strip() else " " + word
if line.strip():
log_fn(line)
else:
log_fn(f" {icon} [{st}] {area}: {content}")
def _extract_hexes(tokens: dict) -> list:
"""Get list of hex values from color token dict."""
hexes = []
for name, t in tokens.items():
if isinstance(t, dict):
h = t.get("value", "")
else:
h = getattr(t, "value", "")
if h:
hexes.append(h.lower())
return hexes
# =============================================================================
# AURORA — Brand Identifier (ReAct Framework)
# =============================================================================
class BrandIdentifierAgent:
"""
AURORA — Senior Brand & Visual Identity Analyst.
v3.1: ADVISORY ONLY — does NOT name colors (rule-based classifier does that).
Provides brand insights, palette strategy, cohesion assessment.
Model: Qwen 72B · Temperature: 0.4
"""
SYSTEM_PROMPT = """You are AURORA, a Senior Brand & Visual Identity Analyst.
## YOUR ROLE (v3.1: Advisory Only)
Color NAMING is handled by a rule-based classifier. Do NOT output naming_map.
Your job is to provide INSIGHTS about the brand identity and design cohesion.
## REASONING FRAMEWORK (ReAct)
Structure your response with explicit reasoning steps.
For each area: THINK → ACT → OBSERVE → VERIFY.
## ANALYZE ALL TOKEN TYPES:
### 1. COLORS — Identify brand strategy (complementary? analogous? monochromatic?)
### 2. TYPOGRAPHY — Identify heading vs body hierarchy, font pairing quality
### 3. SPACING — Identify grid system, note consistency
### 4. RADIUS — Identify radius strategy (sharp/rounded/pill)
### 5. SHADOWS — Identify elevation strategy, blur progression
## QUALITY RULES
- Brand Primary MUST cite usage evidence (e.g. "47x on buttons")
- Cohesion 1-10: most sites score 5-7. Use the full range.
- Do NOT invent names. Focus on analysis and insights.
## OUTPUT (JSON)
{
"reasoning_steps": [
{"step": "THINK", "area": "colors", "content": "..."},
{"step": "ACT", "area": "colors", "content": "..."},
{"step": "OBSERVE", "area": "typography", "content": "..."},
{"step": "ACT", "area": "spacing", "content": "..."},
{"step": "ACT", "area": "radius", "content": "..."},
{"step": "ACT", "area": "shadows", "content": "..."},
{"step": "VERIFY", "area": "all", "content": "Cross-checking consistency..."}
],
"brand_primary": {"color": "#hex", "confidence": "high|medium|low", "reasoning": "cite evidence", "usage_count": N},
"brand_secondary": {"color": "#hex", "confidence": "...", "reasoning": "..."},
"brand_accent": {"color": "#hex or null", "confidence": "...", "reasoning": "..."},
"palette_strategy": "complementary|analogous|triadic|monochromatic|random",
"cohesion_score": N,
"cohesion_notes": "...",
"naming_map": {}, // Optional: ONLY semantic role suggestions (brand.primary, text.secondary, etc.)
"typography_notes": "Heading: Inter 700, Body: Inter 400. Clean hierarchy.",
"spacing_notes": "8px grid, 92% aligned.",
"radius_notes": "Rounded style: 4px inputs, 8px cards.",
"shadow_notes": "3-level elevation: blur 4/8/24px.",
"self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "good|fair|poor", "flags": []}
}
Return ONLY valid JSON."""
PROMPT_TEMPLATE = """Analyze the complete design system.
## COLORS (with role_hints)
{color_data}
## TYPOGRAPHY
{typography_data}
## SPACING
{spacing_data}
## RADIUS
{radius_data}
## SHADOWS
{shadow_data}
Use ReAct for each area. If you see clear semantic roles (brand primary, text color, etc.), suggest them in naming_map. Otherwise leave naming_map empty — the rule-based classifier handles naming."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
color_tokens: dict,
typography_tokens: dict = None,
spacing_tokens: dict = None,
radius_tokens: dict = None,
shadow_tokens: dict = None,
log_callback: Callable = None,
) -> BrandIdentification:
def log(msg):
if log_callback:
log_callback(msg)
log(" 🎨 AURORA — Brand & Visual Identity (Qwen 72B)")
log(" └─ ReAct: Analyzing colors + typography + spacing + radius + shadows...")
prompt = self.PROMPT_TEMPLATE.format(
color_data=_fmt_colors(color_tokens),
typography_data=_fmt_typography(typography_tokens),
spacing_data=_fmt_spacing(spacing_tokens),
radius_data=_fmt_radius(radius_tokens),
shadow_data=_fmt_shadows(shadow_tokens),
)
try:
start = datetime.now()
response = await self.hf_client.complete_async(
agent_name="brand_identifier",
system_prompt=self.SYSTEM_PROMPT,
user_message=prompt,
max_tokens=2000,
json_mode=True,
)
dur = (datetime.now() - start).total_seconds()
result = self._parse(response)
# Critic validation
input_hexes = _extract_hexes(color_tokens)
passed, errors = validate_aurora_output(result, input_hexes)
result.validation_passed = passed
if not passed and result.retry_count == 0:
log(f" ⚠️ Critic: {len(errors)} issues — retrying with feedback...")
for e in errors[:3]:
log(f" └─ {e}")
retry_prompt = prompt + "\n\n## CRITIC FEEDBACK — Fix:\n" + "\n".join(errors[:10])
resp2 = await self.hf_client.complete_async(
agent_name="brand_identifier",
system_prompt=self.SYSTEM_PROMPT,
user_message=retry_prompt,
max_tokens=2000,
json_mode=True,
)
result = self._parse(resp2)
result.retry_count = 1
p2, e2 = validate_aurora_output(result, input_hexes)
result.validation_passed = p2
if not p2:
log(f" ⚠️ Retry: still {len(e2)} issues — using normalizer fallback names")
# Log reasoning chain
log(f" ─────────────────────────────────────────")
log(f" 🎨 AURORA — COMPLETE ({dur:.1f}s)")
_log_reasoning(result.reasoning_trace, log)
log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')})")
log(f" ├─ Palette: {result.palette_strategy} · Cohesion: {result.cohesion_score}/10")
log(f" ├─ Colors Named: {len(result.naming_map)}/{len(input_hexes)}")
log(f" ├─ Typography: {result.typography_notes or 'N/A'}")
log(f" ├─ Spacing: {result.spacing_notes or 'N/A'}")
log(f" ├─ Radius: {result.radius_notes or 'N/A'}")
log(f" ├─ Shadows: {result.shadow_notes or 'N/A'}")
log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FALLBACK'}")
return result
except Exception as e:
log(f" ⚠️ AURORA failed: {str(e)[:120]}")
return BrandIdentification()
def _parse(self, response: str) -> BrandIdentification:
try:
m = re.search(r'\{[\s\S]*\}', response)
if m:
d = json.loads(m.group())
return BrandIdentification(
brand_primary=d.get("brand_primary", {}),
brand_secondary=d.get("brand_secondary", {}),
brand_accent=d.get("brand_accent", {}),
palette_strategy=d.get("palette_strategy", "unknown"),
cohesion_score=d.get("cohesion_score", 5),
cohesion_notes=d.get("cohesion_notes", ""),
naming_map=d.get("naming_map", {}),
semantic_names=d.get("naming_map", {}),
self_evaluation=d.get("self_evaluation", {}),
reasoning_trace=d.get("reasoning_steps", []),
typography_notes=d.get("typography_notes", ""),
spacing_notes=d.get("spacing_notes", ""),
radius_notes=d.get("radius_notes", ""),
shadow_notes=d.get("shadow_notes", ""),
)
except Exception:
pass
return BrandIdentification()
# =============================================================================
# ATLAS — Benchmark Advisor (ReAct Framework)
# =============================================================================
class BenchmarkAdvisorAgent:
"""
ATLAS — Senior Design System Benchmark Analyst.
ReAct comparison of ALL token types against industry benchmarks.
Model: Llama 3.3 70B · Temperature: 0.25
"""
SYSTEM_PROMPT = """You are ATLAS, a Senior Design System Benchmark Analyst.
## REASONING FRAMEWORK (ReAct)
For EACH token type: THINK → ACT → OBSERVE → VERIFY.
Compare the user's values against benchmarks for:
1. TYPOGRAPHY — ratio, base size, scale pattern
2. SPACING — grid base, alignment, scale
3. COLORS — palette size, brand color usage
4. RADIUS — strategy (sharp/rounded/pill), tier count
5. SHADOWS — elevation levels, blur range
Then pick the BEST OVERALL FIT benchmark.
Max 4 alignment changes. If >85% match, say "already well-aligned".
## OUTPUT (JSON)
{
"reasoning_steps": [
{"step": "THINK", "area": "typography", "content": "User ratio 1.18 vs Material 1.25..."},
{"step": "ACT", "area": "typography", "content": "Material closest for type"},
{"step": "THINK", "area": "spacing", "content": "8px matches Material and Polaris"},
{"step": "ACT", "area": "spacing", "content": "Both aligned"},
{"step": "THINK", "area": "colors", "content": "25 colors vs Polaris 18..."},
{"step": "THINK", "area": "radius", "content": "4/8px tiers..."},
{"step": "THINK", "area": "shadows", "content": "3 levels vs Material 5..."},
{"step": "VERIFY", "area": "overall", "content": "Material best: 4/5 areas align"}
],
"recommended_benchmark": "material_design_3",
"recommended_benchmark_name": "Material Design 3",
"reasoning": "Best fit across all token types — cite data",
"alignment_changes": [
{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium", "token_type": "typography"}
],
"typography_comparison": {"user": "1.18", "benchmark": "1.25", "gap": "minor"},
"spacing_comparison": {"user": "8px", "benchmark": "8px", "gap": "aligned"},
"color_comparison": {"user": "25", "benchmark": "18", "gap": "reduce"},
"radius_comparison": {"user": "2 tiers", "benchmark": "3 tiers", "gap": "add xl"},
"shadow_comparison": {"user": "3 levels", "benchmark": "5 levels", "gap": "add 2"},
"pros_of_alignment": ["..."],
"cons_of_alignment": ["..."],
"alternative_benchmarks": [{"name": "Polaris", "reason": "..."}],
"self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []}
}
Return ONLY valid JSON."""
PROMPT_TEMPLATE = """Compare this design system against benchmarks — ALL token types.
## CURRENT VALUES
- Type Scale Ratio: {user_ratio} | Base: {user_base}px | Sizes: {user_sizes}
- Spacing Grid: {user_spacing}px | Values: {spacing_values}
- Colors: {color_count} unique | Brand: {brand_info}
- Radius: {radius_data}
- Shadows: {shadow_data}
## BENCHMARKS
{benchmark_comparison}
Use ReAct per token type. Pick the best overall fit."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
user_ratio: float, user_base: int, user_spacing: int,
benchmark_comparisons: list,
color_count: int = 0, brand_info: str = "",
user_sizes: str = "", spacing_values: str = "",
radius_data: str = "", shadow_data: str = "",
log_callback: Callable = None,
) -> BenchmarkAdvice:
def log(msg):
if log_callback:
log_callback(msg)
log("")
log(" 🏢 ATLAS — Benchmark Advisor (Llama 3.3 70B)")
log(" └─ ReAct: Comparing typography + spacing + colors + radius + shadows...")
prompt = self.PROMPT_TEMPLATE.format(
user_ratio=user_ratio, user_base=user_base, user_spacing=user_spacing,
user_sizes=user_sizes or "N/A",
spacing_values=spacing_values or "N/A",
color_count=color_count, brand_info=brand_info or "N/A",
radius_data=radius_data or "No radius data",
shadow_data=shadow_data or "No shadow data",
benchmark_comparison=self._fmt_benchmarks(benchmark_comparisons),
)
try:
start = datetime.now()
response = await self.hf_client.complete_async(
agent_name="benchmark_advisor",
system_prompt=self.SYSTEM_PROMPT,
user_message=prompt,
max_tokens=1500,
json_mode=True,
)
dur = (datetime.now() - start).total_seconds()
result = self._parse(response)
log(f" ─────────────────────────────────────────")
log(f" 🏢 ATLAS — COMPLETE ({dur:.1f}s)")
_log_reasoning(result.reasoning_trace, log)
log(f" ├─ Recommended: {result.recommended_benchmark_name}")
log(f" ├─ Changes: {len(result.alignment_changes)}")
log(f" ├─ Typography: {result.typography_comparison}")
log(f" ├─ Spacing: {result.spacing_comparison}")
log(f" ├─ Colors: {result.color_comparison}")
log(f" ├─ Radius: {result.radius_comparison}")
log(f" └─ Shadows: {result.shadow_comparison}")
return result
except Exception as e:
log(f" ⚠️ ATLAS failed: {str(e)[:120]}")
return BenchmarkAdvice()
def _fmt_benchmarks(self, comparisons: list) -> str:
lines = []
for i, c in enumerate(comparisons[:5]):
b = c.benchmark
lines.append(f"{i+1}. {b.icon} {b.name} — Match: {c.overall_match_pct:.0f}%"
f" | Type: {b.typography.get('scale_ratio', '?')}"
f" | Spacing: {b.spacing.get('base', '?')}px"
f" | Best for: {', '.join(b.best_for)}")
return "\n".join(lines) if lines else "No benchmark data"
def _parse(self, response: str) -> BenchmarkAdvice:
try:
m = re.search(r'\{[\s\S]*\}', response)
if m:
d = json.loads(m.group())
return BenchmarkAdvice(
recommended_benchmark=d.get("recommended_benchmark", ""),
recommended_benchmark_name=d.get("recommended_benchmark_name", ""),
reasoning=d.get("reasoning", ""),
alignment_changes=d.get("alignment_changes", []),
pros_of_alignment=d.get("pros_of_alignment", []),
cons_of_alignment=d.get("cons_of_alignment", []),
alternative_benchmarks=d.get("alternative_benchmarks", []),
self_evaluation=d.get("self_evaluation", {}),
typography_comparison=d.get("typography_comparison", {}),
spacing_comparison=d.get("spacing_comparison", {}),
color_comparison=d.get("color_comparison", {}),
radius_comparison=d.get("radius_comparison", {}),
shadow_comparison=d.get("shadow_comparison", {}),
reasoning_trace=d.get("reasoning_steps", []),
)
except Exception:
pass
return BenchmarkAdvice()
# =============================================================================
# SENTINEL — Best Practices Auditor (ReAct + Grounded Scoring)
# =============================================================================
class BestPracticesValidatorAgent:
"""
SENTINEL — Design System Best Practices Auditor.
ReAct: Grounds EVERY score in actual rule-engine data. Audits ALL token types.
Model: Qwen 72B · Temperature: 0.2
"""
SYSTEM_PROMPT = """You are SENTINEL, a Design System Best Practices Auditor.
## REASONING FRAMEWORK (ReAct + Grounded)
For EACH check: THINK → ACT (cite data) → OBSERVE → VERIFY.
You MUST CITE the exact input data for every score.
## AUDIT ALL TOKEN TYPES:
### COLORS (25 pts)
- aa_compliance: CITE AA pass/fail count
- color_count: < 20 semantic colors ideal
- near_duplicates: should be 0
### TYPOGRAPHY (25 pts)
- type_scale_standard: nearest standard ratio
- type_scale_consistent: variance check
- base_size_accessible: >= 16px
### SPACING (20 pts)
- spacing_grid: 4px or 8px consistency
- spacing_alignment: > 80% target
### RADIUS (15 pts)
- radius_consistency: base-4/8 grid, clear tiers
### SHADOWS (15 pts)
- shadow_system: elevation hierarchy, blur progression
## CRITICAL: If data says 7 AA failures, you CANNOT say "pass".
## OUTPUT (JSON)
{
"reasoning_steps": [
{"step": "THINK", "area": "colors", "content": "7/25 fail AA = 28%"},
{"step": "ACT", "area": "colors", "content": "aa_compliance = FAIL"},
{"step": "THINK", "area": "typography", "content": "ratio 1.18, variance 0.22"},
{"step": "ACT", "area": "typography", "content": "type_scale_consistent = WARN"},
{"step": "THINK", "area": "spacing", "content": "8px base, 85% aligned"},
{"step": "ACT", "area": "spacing", "content": "spacing_grid = PASS"},
{"step": "THINK", "area": "radius", "content": "4px,8px,16px all base-4"},
{"step": "ACT", "area": "radius", "content": "radius_consistency = PASS"},
{"step": "THINK", "area": "shadows", "content": "3 levels, blur 4→8→24"},
{"step": "ACT", "area": "shadows", "content": "shadow_system = WARN"},
{"step": "VERIFY", "area": "scoring", "content": "3 pass, 2 warn, 1 fail → 62/100"}
],
"overall_score": N,
"checks": {
"aa_compliance": {"status": "pass|warn|fail", "note": "CITE: 7/25 fail AA"},
"type_scale_standard": {"status": "...", "note": "CITE: ratio 1.18 nearest 1.2"},
"type_scale_consistent": {"status": "...", "note": "CITE: variance 0.22 > 0.15"},
"base_size_accessible": {"status": "...", "note": "CITE: base = Npx"},
"spacing_grid": {"status": "...", "note": "CITE: N% aligned to Npx"},
"color_count": {"status": "...", "note": "CITE: N unique colors"},
"near_duplicates": {"status": "...", "note": "CITE: N pairs"},
"radius_consistency": {"status": "...", "note": "CITE: tiers and grid"},
"shadow_system": {"status": "...", "note": "CITE: N levels, progression"}
},
"color_assessment": {"aa_pass_rate": "72%", "palette_size": 25, "verdict": "needs work"},
"typography_assessment": {"ratio": 1.18, "consistent": false, "base_ok": true, "verdict": "fair"},
"spacing_assessment": {"grid": "8px", "alignment": "85%", "verdict": "good"},
"radius_assessment": {"tiers": 3, "base_aligned": true, "verdict": "good"},
"shadow_assessment": {"levels": 3, "progression": "non-linear", "verdict": "fair"},
"priority_fixes": [
{"rank": 1, "issue": "...", "impact": "high", "effort": "low", "action": "Specific fix", "token_type": "color"}
],
"passing_practices": ["spacing_grid"],
"failing_practices": ["aa_compliance"],
"self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []}
}
Return ONLY valid JSON."""
PROMPT_TEMPLATE = """Audit this design system. CITE the data for every score.
## RULE ENGINE FACTS (verified)
### Typography
- Ratio: {type_ratio} ({type_consistent}) | Base: {base_size}px | Sizes: {sizes}
### Accessibility
- Total: {total_colors} | AA Pass: {aa_pass} | AA Fail: {aa_fail}
- Failing: {failing_colors}
### Spacing
- Base: {spacing_base}px | Aligned: {spacing_aligned}% | Values: {spacing_values}
### Color Stats
- Unique: {unique_colors} | Near-Duplicates: {near_duplicates}
### Radius
{radius_data}
### Shadows
{shadow_data}
CITE the EXACT numbers above for every check."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def analyze(
self,
rule_engine_results: Any,
radius_tokens: dict = None,
shadow_tokens: dict = None,
log_callback: Callable = None,
) -> BestPracticesResult:
def log(msg):
if log_callback:
log_callback(msg)
log("")
log(" ✅ SENTINEL — Best Practices Auditor (Qwen 72B)")
log(" └─ ReAct: Auditing colors + typography + spacing + radius + shadows...")
typo = rule_engine_results.typography
spacing = rule_engine_results.spacing
color_stats = rule_engine_results.color_stats
accessibility = rule_engine_results.accessibility
failures = [a for a in accessibility if not a.passes_aa_normal]
failing_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:8]])
sizes_str = ", ".join([f"{s}px" for s in typo.sizes_px[:8]]) if typo.sizes_px else "N/A"
sp_vals = ", ".join([f"{v}px" for v in spacing.current_values[:10]]) if hasattr(spacing, 'current_values') and spacing.current_values else "N/A"
prompt = self.PROMPT_TEMPLATE.format(
type_ratio=f"{typo.detected_ratio:.3f}",
type_consistent="consistent" if typo.is_consistent else f"inconsistent (var={typo.variance:.2f})",
base_size=typo.sizes_px[0] if typo.sizes_px else 16,
sizes=sizes_str,
total_colors=len(accessibility),
aa_pass=len(accessibility) - len(failures),
aa_fail=len(failures),
failing_colors=failing_str or "None",
spacing_base=spacing.detected_base,
spacing_aligned=f"{spacing.alignment_percentage:.0f}",
spacing_values=sp_vals,
unique_colors=color_stats.unique_count,
near_duplicates=len(color_stats.near_duplicates),
radius_data=_fmt_radius(radius_tokens) if radius_tokens else "No radius data",
shadow_data=_fmt_shadows(shadow_tokens) if shadow_tokens else "No shadow data",
)
try:
start = datetime.now()
response = await self.hf_client.complete_async(
agent_name="best_practices_validator",
system_prompt=self.SYSTEM_PROMPT,
user_message=prompt,
max_tokens=2000,
json_mode=True,
)
dur = (datetime.now() - start).total_seconds()
result = self._parse(response)
# Critic cross-reference
passed, errors = validate_sentinel_output(result, rule_engine_results)
result.validation_passed = passed
if not passed:
log(f" ⚠️ Critic: {len(errors)} issues — applying fixes...")
for e in errors[:3]:
log(f" └─ {e}")
result = _apply_sentinel_fixes(result, rule_engine_results, errors)
log(f" ─────────────────────────────────────────")
log(f" ✅ SENTINEL — COMPLETE ({dur:.1f}s)")
_log_reasoning(result.reasoning_trace, log)
log(f" ├─ Overall Score: {result.overall_score}/100")
for cn, cv in (result.checks or {}).items():
if isinstance(cv, dict):
s = cv.get("status", "?")
si = {"pass": "✅", "warn": "⚠️", "fail": "❌"}.get(s, "?")
log(f" │ {si} {cn}: {s}")
log(f" ├─ Priority Fixes: {len(result.priority_fixes)}")
log(f" └─ Critic: {'✅ PASSED' if result.validation_passed else '⚠️ FIXED'}")
return result
except Exception as e:
log(f" ⚠️ SENTINEL failed: {str(e)[:120]}")
return BestPracticesResult()
def _parse(self, response: str) -> BestPracticesResult:
try:
m = re.search(r'\{[\s\S]*\}', response)
if m:
d = json.loads(m.group())
return BestPracticesResult(
overall_score=d.get("overall_score", 50),
checks=d.get("checks", {}),
priority_fixes=d.get("priority_fixes", []),
passing_practices=d.get("passing_practices", []),
failing_practices=d.get("failing_practices", []),
self_evaluation=d.get("self_evaluation", {}),
color_assessment=d.get("color_assessment", {}),
typography_assessment=d.get("typography_assessment", {}),
spacing_assessment=d.get("spacing_assessment", {}),
radius_assessment=d.get("radius_assessment", {}),
shadow_assessment=d.get("shadow_assessment", {}),
reasoning_trace=d.get("reasoning_steps", []),
)
except Exception:
pass
return BestPracticesResult()
# =============================================================================
# NEXUS — HEAD Synthesizer (Tree of Thought)
# =============================================================================
class HeadSynthesizerAgent:
"""
NEXUS — Senior Design System Architect.
Tree of Thought: 2 perspectives, picks best, compiles all agent outputs.
Recommendations for ALL token types.
Model: Llama 3.3 70B · Temperature: 0.3
"""
SYSTEM_PROMPT = """You are NEXUS, a Senior Design System Architect — the final synthesizer.
## REASONING FRAMEWORK (Tree of Thought)
Evaluate TWO perspectives:
### PERSPECTIVE A — Accessibility-First
Weights: accessibility=40%, consistency=30%, organization=30%
Penalize heavily for AA failures.
### PERSPECTIVE B — Balanced
Weights: accessibility=30%, consistency=35%, organization=35%
Equal emphasis across areas.
For each: calculate scores, determine top 3 actions.
Then CHOOSE the perspective that better reflects reality.
## SYNTHESIZE ALL TOKEN TYPES:
- Colors: AURORA brand + SENTINEL AA findings → color recommendations
- Typography: ATLAS benchmark match + SENTINEL scale audit → type scale rec
- Spacing: ATLAS grid comparison + SENTINEL alignment → spacing rec
- Radius: SENTINEL consistency + ATLAS benchmark → radius rec
- Shadows: SENTINEL elevation + ATLAS benchmark → shadow rec
## OUTPUT (JSON)
{
"reasoning_steps": [
{"step": "THINK", "area": "perspective_a", "content": "Accessibility-first weighting..."},
{"step": "ACT", "area": "perspective_a", "content": "Score: overall=52..."},
{"step": "THINK", "area": "perspective_b", "content": "Balanced weighting..."},
{"step": "ACT", "area": "perspective_b", "content": "Score: overall=63..."},
{"step": "OBSERVE", "area": "comparison", "content": "A shows severity of AA failures..."},
{"step": "VERIFY", "area": "decision", "content": "Choosing A — honest about AA issues"}
],
"perspective_a": {"scores": {"overall": 52, "accessibility": 38, "consistency": 72, "organization": 68}, "reasoning": "..."},
"perspective_b": {"scores": {"overall": 63, "accessibility": 45, "consistency": 72, "organization": 68}, "reasoning": "..."},
"chosen_perspective": "A",
"choice_reasoning": "AA failures affect real users — lower score is more honest",
"executive_summary": "Your design system scores X/100...",
"scores": {"overall": 52, "accessibility": 38, "consistency": 72, "organization": 68},
"top_3_actions": [
{"action": "Fix AA compliance", "impact": "high", "effort": "medium", "details": "#X→#Y", "token_type": "color"}
],
"color_recommendations": [
{"role": "brand.primary", "current": "#hex", "suggested": "#hex", "reason": "AA", "accept": true}
],
"type_scale_recommendation": {"current_ratio": 1.18, "recommended_ratio": 1.25, "reason": "..."},
"spacing_recommendation": {"current": "8px", "recommended": "8px", "reason": "Already aligned"},
"radius_recommendation": {"current": "3 tiers", "recommended": "Add xl tier", "reason": "..."},
"shadow_recommendation": {"current": "3 levels", "recommended": "Add 2 more", "reason": "..."},
"benchmark_fit": {"closest": "Material", "similarity": "78%", "recommendation": "..."},
"brand_analysis": {"primary": "#hex", "secondary": "#hex", "cohesion": 7},
"self_evaluation": {"confidence": N, "reasoning": "...", "data_quality": "...", "flags": []}
}
Return ONLY valid JSON."""
PROMPT_TEMPLATE = """Synthesize all analysis into a final report.
## RULE ENGINE FACTS
- Type: {type_ratio} ({type_status}) | Base: {base_size}px
- AA Failures: {aa_failures}/{total_colors}
- Spacing: {spacing_status}
- Colors: {unique_colors} unique | Consistency: {consistency_score}/100
- Radius: {radius_facts}
- Shadows: {shadow_facts}
## AURORA — Brand Analysis
- Primary: {brand_primary} ({brand_confidence}) | Secondary: {brand_secondary}
- Palette: {palette_strategy} | Cohesion: {cohesion_score}/10
- Typography: {aurora_typo}
- Spacing: {aurora_spacing}
- Radius: {aurora_radius}
- Shadows: {aurora_shadows}
## ATLAS — Benchmark
- Closest: {closest_benchmark} ({match_pct}%)
- Typo: {atlas_typo} | Spacing: {atlas_spacing} | Colors: {atlas_colors}
- Radius: {atlas_radius} | Shadows: {atlas_shadows}
- Changes: {benchmark_changes}
## SENTINEL — Audit
- Score: {best_practices_score}/100
- Color: {sentinel_color} | Typo: {sentinel_typo} | Spacing: {sentinel_spacing}
- Radius: {sentinel_radius} | Shadows: {sentinel_shadows}
- Fixes: {priority_fixes}
## AA FIXES NEEDED
{accessibility_fixes}
Evaluate from TWO perspectives (Tree of Thought). Choose one. Recommend for ALL token types."""
def __init__(self, hf_client):
self.hf_client = hf_client
async def synthesize(
self,
rule_engine_results: Any,
benchmark_comparisons: list,
brand_identification: BrandIdentification,
benchmark_advice: BenchmarkAdvice,
best_practices: BestPracticesResult,
log_callback: Callable = None,
) -> HeadSynthesis:
def log(msg):
if log_callback:
log_callback(msg)
log("")
log("═" * 60)
log("🧠 NEXUS — HEAD SYNTHESIZER (Tree of Thought)")
log("═" * 60)
log(" Evaluating Perspective A (Accessibility-First) vs B (Balanced)...")
log(" Compiling: Rule Engine + AURORA + ATLAS + SENTINEL...")
typo = rule_engine_results.typography
spacing = rule_engine_results.spacing
color_stats = rule_engine_results.color_stats
accessibility = rule_engine_results.accessibility
failures = [a for a in accessibility if not a.passes_aa_normal]
aa_fixes_str = "\n".join([
f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
for a in failures[:8] if a.suggested_fix
])
closest = benchmark_comparisons[0] if benchmark_comparisons else None
def _s(obj):
"""Safely stringify a dict/value for prompt."""
if isinstance(obj, dict):
parts = [f"{k}={v}" for k, v in list(obj.items())[:4]]
return ", ".join(parts) if parts else "N/A"
return str(obj) if obj else "N/A"
prompt = self.PROMPT_TEMPLATE.format(
type_ratio=f"{typo.detected_ratio:.3f}",
type_status="consistent" if typo.is_consistent else "inconsistent",
base_size=typo.sizes_px[0] if typo.sizes_px else 16,
aa_failures=len(failures), total_colors=len(accessibility),
spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
unique_colors=color_stats.unique_count,
consistency_score=rule_engine_results.consistency_score,
radius_facts=_s(best_practices.radius_assessment) or "N/A",
shadow_facts=_s(best_practices.shadow_assessment) or "N/A",
brand_primary=brand_identification.brand_primary.get("color", "?"),
brand_confidence=brand_identification.brand_primary.get("confidence", "?"),
brand_secondary=brand_identification.brand_secondary.get("color", "?"),
palette_strategy=brand_identification.palette_strategy,
cohesion_score=brand_identification.cohesion_score,
aurora_typo=brand_identification.typography_notes or "N/A",
aurora_spacing=brand_identification.spacing_notes or "N/A",
aurora_radius=brand_identification.radius_notes or "N/A",
aurora_shadows=brand_identification.shadow_notes or "N/A",
closest_benchmark=closest.benchmark.name if closest else "?",
match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
atlas_typo=_s(benchmark_advice.typography_comparison),
atlas_spacing=_s(benchmark_advice.spacing_comparison),
atlas_colors=_s(benchmark_advice.color_comparison),
atlas_radius=_s(benchmark_advice.radius_comparison),
atlas_shadows=_s(benchmark_advice.shadow_comparison),
benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:4]]),
best_practices_score=best_practices.overall_score,
sentinel_color=_s(best_practices.color_assessment),
sentinel_typo=_s(best_practices.typography_assessment),
sentinel_spacing=_s(best_practices.spacing_assessment),
sentinel_radius=_s(best_practices.radius_assessment),
sentinel_shadows=_s(best_practices.shadow_assessment),
priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:5]]),
accessibility_fixes=aa_fixes_str or "None needed",
)
try:
start = datetime.now()
response = await self.hf_client.complete_async(
agent_name="head_synthesizer",
system_prompt=self.SYSTEM_PROMPT,
user_message=prompt,
max_tokens=2500,
json_mode=True,
)
dur = (datetime.now() - start).total_seconds()
result = self._parse(response)
log("")
log(f" 🧠 NEXUS — COMPLETE ({dur:.1f}s)")
_log_reasoning(result.reasoning_trace, log)
pa = result.perspective_a.get("scores", {}).get("overall", "?") if result.perspective_a else "?"
pb = result.perspective_b.get("scores", {}).get("overall", "?") if result.perspective_b else "?"
log(f" ├─ Perspective A: {pa}/100")
log(f" ├─ Perspective B: {pb}/100")
log(f" ├─ Chosen: {result.chosen_perspective}")
log(f" ├─ Why: {result.choice_reasoning or 'N/A'}")
log(f" ├─ Final Score: {result.scores.get('overall', '?')}/100" if result.scores else " ├─ Scores: N/A")
log(f" ├─ Actions: {len(result.top_3_actions)} | Color Recs: {len(result.color_recommendations)}")
log(f" ├─ Typography: {_s(result.type_scale_recommendation)}")
log(f" ├─ Spacing: {_s(result.spacing_recommendation)}")
log(f" ├─ Radius: {_s(result.radius_recommendation)}")
log(f" └─ Shadows: {_s(result.shadow_recommendation)}")
log("")
return result
except Exception as e:
log(f" ⚠️ NEXUS failed: {str(e)[:120]}")
return HeadSynthesis()
def _parse(self, response: str) -> HeadSynthesis:
try:
m = re.search(r'\{[\s\S]*\}', response)
if m:
d = json.loads(m.group())
return HeadSynthesis(
executive_summary=d.get("executive_summary", ""),
scores=d.get("scores", {}),
benchmark_fit=d.get("benchmark_fit", {}),
brand_analysis=d.get("brand_analysis", {}),
top_3_actions=d.get("top_3_actions", []),
color_recommendations=d.get("color_recommendations", []),
type_scale_recommendation=d.get("type_scale_recommendation", {}),
spacing_recommendation=d.get("spacing_recommendation", {}),
radius_recommendation=d.get("radius_recommendation", {}),
shadow_recommendation=d.get("shadow_recommendation", {}),
self_evaluation=d.get("self_evaluation", {}),
perspective_a=d.get("perspective_a", {}),
perspective_b=d.get("perspective_b", {}),
chosen_perspective=d.get("chosen_perspective", ""),
choice_reasoning=d.get("choice_reasoning", ""),
reasoning_trace=d.get("reasoning_steps", []),
)
except Exception:
pass
return HeadSynthesis()
# =============================================================================
# CRITIC / VALIDATOR FUNCTIONS (Rule-based, no LLM)
# =============================================================================
def validate_aurora_output(output: BrandIdentification, input_hexes: list) -> tuple:
"""Validate AURORA naming_map. Returns (passed, errors)."""
errors = []
nm = output.naming_map or {}
# All input colors must have names
for h in input_hexes:
if h not in nm and h.lower() not in nm:
errors.append(f"Missing name for {h}")
# No word-based shades
bad_words = {"light", "dark", "base", "muted", "deep", "lighter", "darker"}
for h, name in nm.items():
for part in name.split("."):
if part.lower() in bad_words:
errors.append(f"Word shade '{part}' in {name}")
# No duplicates
seen = set()
for n in nm.values():
if n in seen:
errors.append(f"Duplicate: {n}")
seen.add(n)
# Convention: color.X.Y
for h, name in nm.items():
if not name.startswith("color."):
errors.append(f"'{name}' must start with 'color.'")
if len(name.split(".")) < 3:
errors.append(f"'{name}' needs 3+ parts")
return len(errors) == 0, errors
def validate_sentinel_output(output: BestPracticesResult, rule_engine) -> tuple:
"""Cross-reference SENTINEL scores against rule engine data."""
errors = []
checks = output.checks or {}
accessibility = rule_engine.accessibility
aa_failures = len([a for a in accessibility if not a.passes_aa_normal])
aa_check = checks.get("aa_compliance", {})
if aa_failures > 0 and isinstance(aa_check, dict) and aa_check.get("status") == "pass":
errors.append(f"aa_compliance='pass' but {aa_failures} fail AA")
score = output.overall_score
if not (0 <= score <= 100):
errors.append(f"Score {score} out of 0-100 range")
fail_count = sum(1 for c in checks.values() if isinstance(c, dict) and c.get("status") == "fail")
if fail_count >= 3 and score > 70:
errors.append(f"Score {score} too high with {fail_count} failures")
typo = rule_engine.typography
base_size = typo.sizes_px[0] if typo.sizes_px else 16
base_check = checks.get("base_size_accessible", {})
if base_size < 16 and isinstance(base_check, dict) and base_check.get("status") == "pass":
errors.append(f"base_size 'pass' but {base_size}px < 16")
return len(errors) == 0, errors
def _apply_sentinel_fixes(result: BestPracticesResult, rule_engine, errors: list) -> BestPracticesResult:
"""Deterministic fixes when critic finds issues."""
accessibility = rule_engine.accessibility
failures = [a for a in accessibility if not a.passes_aa_normal]
for err in errors:
if "aa_compliance" in err and "pass" in err:
if "aa_compliance" in result.checks:
result.checks["aa_compliance"]["status"] = "fail"
result.checks["aa_compliance"]["note"] = f"CORRECTED: {len(failures)} fail AA"
if "too high" in err.lower():
fail_count = sum(1 for c in result.checks.values() if isinstance(c, dict) and c.get("status") == "fail")
max_s = max(30, 100 - fail_count * 15)
if result.overall_score > max_s:
result.overall_score = max_s
result.overall_score = max(0, min(100, result.overall_score))
result.validation_passed = True
return result
def filter_aurora_naming_map(aurora: BrandIdentification) -> dict:
"""Filter AURORA naming_map to only keep semantic role assignments.
AURORA is a secondary naming authority — it can assign semantic roles
(brand.primary, text.secondary, bg.primary, feedback.error, etc.)
but cannot override palette names (blue.500, neutral.700, etc.).
The color_classifier is the primary naming authority.
Returns:
Dict of hex -> semantic_name (only role-based names).
"""
SEMANTIC_PREFIXES = ('brand.', 'text.', 'bg.', 'border.', 'feedback.')
filtered = {}
for hex_val, name in (aurora.naming_map or {}).items():
hex_clean = str(hex_val).strip().lower()
if not hex_clean.startswith('#') or not name:
continue
clean_name = name if name.startswith('color.') else f'color.{name}'
# Extract the part after "color."
after_prefix = clean_name[6:] # "brand.primary", "blue.500", etc.
if any(after_prefix.startswith(sp) for sp in SEMANTIC_PREFIXES):
filtered[hex_clean] = clean_name
return filtered
def post_validate_stage2(
aurora: BrandIdentification,
sentinel: BestPracticesResult,
nexus: HeadSynthesis,
rule_engine: Any,
) -> list:
"""Final deterministic checks after ALL agents. Returns issues list."""
issues = []
for h, name in (aurora.naming_map or {}).items():
if not re.match(r'^color\.\w+\.[\w]+$', name):
issues.append(f"Bad name: {name}")
for key, val in (nexus.scores or {}).items():
if isinstance(val, (int, float)) and not (0 <= val <= 100):
issues.append(f"Score {key}={val} OOB")
aa_failures = len([a for a in rule_engine.accessibility if not a.passes_aa_normal])
n_acc = nexus.scores.get("accessibility", 50) if nexus.scores else 50
if aa_failures > 3 and n_acc > 85:
issues.append(f"Nexus accessibility={n_acc} but {aa_failures} AA failures")
for rec in (nexus.color_recommendations or []):
for field in ("current", "suggested"):
v = rec.get(field, "")
if v and not v.startswith("#"):
issues.append(f"Color rec {field} missing #: {v}")
return issues