riazmo commited on
Commit
5cc6b41
·
verified ·
1 Parent(s): 13b3320

Delete agents/llm_agents.py

Browse files
Files changed (1) hide show
  1. agents/llm_agents.py +0 -904
agents/llm_agents.py DELETED
@@ -1,904 +0,0 @@
1
- """
2
- Stage 2 LLM Agents — Specialized Analysis Tasks
3
- =================================================
4
-
5
- These agents handle tasks that REQUIRE LLM reasoning:
6
- - Brand Identifier: Identify brand colors from usage context
7
- - Benchmark Advisor: Recommend best-fit design system
8
- - Best Practices Validator: Prioritize fixes by business impact
9
- - HEAD Synthesizer: Combine all outputs into final recommendations
10
-
11
- Each agent has a focused prompt for its specific task.
12
- """
13
-
14
- import json
15
- import re
16
- from dataclasses import dataclass, field
17
- from typing import Optional, Callable, Any
18
- from datetime import datetime
19
-
20
-
21
- # =============================================================================
22
- # DATA CLASSES
23
- # =============================================================================
24
-
25
- @dataclass
26
- class BrandIdentification:
27
- """Results from Brand Identifier agent."""
28
- brand_primary: dict = field(default_factory=dict)
29
- # {color, confidence, reasoning, usage_count}
30
-
31
- brand_secondary: dict = field(default_factory=dict)
32
- brand_accent: dict = field(default_factory=dict)
33
-
34
- palette_strategy: str = "" # complementary, analogous, triadic, monochromatic, random
35
- cohesion_score: int = 5 # 1-10
36
- cohesion_notes: str = ""
37
-
38
- semantic_names: dict = field(default_factory=dict)
39
- # {hex_color: suggested_name}
40
-
41
- def to_dict(self) -> dict:
42
- return {
43
- "brand_primary": self.brand_primary,
44
- "brand_secondary": self.brand_secondary,
45
- "brand_accent": self.brand_accent,
46
- "palette_strategy": self.palette_strategy,
47
- "cohesion_score": self.cohesion_score,
48
- "cohesion_notes": self.cohesion_notes,
49
- "semantic_names": self.semantic_names,
50
- }
51
-
52
-
53
- @dataclass
54
- class BenchmarkAdvice:
55
- """Results from Benchmark Advisor agent."""
56
- recommended_benchmark: str = ""
57
- recommended_benchmark_name: str = ""
58
- reasoning: str = ""
59
-
60
- alignment_changes: list = field(default_factory=list)
61
- # [{change, from, to, effort}]
62
-
63
- pros_of_alignment: list = field(default_factory=list)
64
- cons_of_alignment: list = field(default_factory=list)
65
-
66
- alternative_benchmarks: list = field(default_factory=list)
67
- # [{name, reason}]
68
-
69
- def to_dict(self) -> dict:
70
- return {
71
- "recommended_benchmark": self.recommended_benchmark,
72
- "recommended_benchmark_name": self.recommended_benchmark_name,
73
- "reasoning": self.reasoning,
74
- "alignment_changes": self.alignment_changes,
75
- "pros": self.pros_of_alignment,
76
- "cons": self.cons_of_alignment,
77
- "alternatives": self.alternative_benchmarks,
78
- }
79
-
80
-
81
- @dataclass
82
- class BestPracticesResult:
83
- """Results from Best Practices Validator agent."""
84
- overall_score: int = 50 # 0-100
85
-
86
- checks: dict = field(default_factory=dict)
87
- # {check_name: {status: pass/warn/fail, note: str}}
88
-
89
- priority_fixes: list = field(default_factory=list)
90
- # [{rank, issue, impact, effort, action}]
91
-
92
- passing_practices: list = field(default_factory=list)
93
- failing_practices: list = field(default_factory=list)
94
-
95
- def to_dict(self) -> dict:
96
- return {
97
- "overall_score": self.overall_score,
98
- "checks": self.checks,
99
- "priority_fixes": self.priority_fixes,
100
- "passing": self.passing_practices,
101
- "failing": self.failing_practices,
102
- }
103
-
104
-
105
- @dataclass
106
- class HeadSynthesis:
107
- """Final synthesized output from HEAD agent."""
108
- executive_summary: str = ""
109
-
110
- scores: dict = field(default_factory=dict)
111
- # {overall, accessibility, consistency, organization}
112
-
113
- benchmark_fit: dict = field(default_factory=dict)
114
- # {closest, similarity, recommendation}
115
-
116
- brand_analysis: dict = field(default_factory=dict)
117
- # {primary, secondary, cohesion}
118
-
119
- top_3_actions: list = field(default_factory=list)
120
- # [{action, impact, effort, details}]
121
-
122
- color_recommendations: list = field(default_factory=list)
123
- # [{role, current, suggested, reason, accept}]
124
-
125
- type_scale_recommendation: dict = field(default_factory=dict)
126
- spacing_recommendation: dict = field(default_factory=dict)
127
-
128
- def to_dict(self) -> dict:
129
- return {
130
- "executive_summary": self.executive_summary,
131
- "scores": self.scores,
132
- "benchmark_fit": self.benchmark_fit,
133
- "brand_analysis": self.brand_analysis,
134
- "top_3_actions": self.top_3_actions,
135
- "color_recommendations": self.color_recommendations,
136
- "type_scale_recommendation": self.type_scale_recommendation,
137
- "spacing_recommendation": self.spacing_recommendation,
138
- }
139
-
140
-
141
- # =============================================================================
142
- # BRAND IDENTIFIER AGENT
143
- # =============================================================================
144
-
145
- class BrandIdentifierAgent:
146
- """
147
- Identifies brand colors from usage context.
148
-
149
- WHY LLM: Requires understanding context (33 buttons = likely brand primary),
150
- not just color math.
151
- """
152
-
153
- PROMPT_TEMPLATE = """You are a senior design system analyst. Identify the brand colors from this color usage data.
154
-
155
- ## COLOR DATA WITH USAGE CONTEXT
156
-
157
- {color_data}
158
-
159
- ## SEMANTIC ANALYSIS (from CSS properties)
160
-
161
- {semantic_analysis}
162
-
163
- ## YOUR TASK
164
-
165
- 1. **Identify Brand Colors**:
166
- - Brand Primary: The main action/CTA color (highest visibility)
167
- - Brand Secondary: Supporting brand color
168
- - Brand Accent: Highlight color for emphasis
169
-
170
- 2. **Assess Palette Strategy**:
171
- - Is it complementary, analogous, triadic, monochromatic, or random?
172
-
173
- 3. **Rate Cohesion** (1-10):
174
- - Do the colors work together?
175
- - Is there a clear color story?
176
-
177
- 4. **Suggest Semantic Names** for top 10 most-used colors
178
-
179
- ## OUTPUT FORMAT (JSON only)
180
-
181
- {{
182
- "brand_primary": {{
183
- "color": "#hex",
184
- "confidence": "high|medium|low",
185
- "reasoning": "Why this is brand primary",
186
- "usage_count": <number>
187
- }},
188
- "brand_secondary": {{
189
- "color": "#hex",
190
- "confidence": "high|medium|low",
191
- "reasoning": "..."
192
- }},
193
- "brand_accent": {{
194
- "color": "#hex or null",
195
- "confidence": "...",
196
- "reasoning": "..."
197
- }},
198
- "palette_strategy": "complementary|analogous|triadic|monochromatic|random",
199
- "cohesion_score": <1-10>,
200
- "cohesion_notes": "Assessment of how well colors work together",
201
- "semantic_names": {{
202
- "#hex1": "brand.primary",
203
- "#hex2": "text.primary",
204
- "#hex3": "background.primary"
205
- }}
206
- }}
207
-
208
- Return ONLY valid JSON."""
209
-
210
- def __init__(self, hf_client):
211
- self.hf_client = hf_client
212
-
213
- async def analyze(
214
- self,
215
- color_tokens: dict,
216
- semantic_analysis: dict,
217
- log_callback: Callable = None,
218
- ) -> BrandIdentification:
219
- """
220
- Identify brand colors from usage context.
221
-
222
- Args:
223
- color_tokens: Dict of color tokens with usage data
224
- semantic_analysis: Semantic categorization from Stage 1
225
- log_callback: Progress logging function
226
-
227
- Returns:
228
- BrandIdentification with identified colors
229
- """
230
- def log(msg: str):
231
- if log_callback:
232
- log_callback(msg)
233
-
234
- log(" 🎨 Brand Identifier (Llama 70B)")
235
- log(" └─ Analyzing color context and usage patterns...")
236
-
237
- # Format color data
238
- color_data = self._format_color_data(color_tokens)
239
- semantic_str = self._format_semantic_analysis(semantic_analysis)
240
-
241
- prompt = self.PROMPT_TEMPLATE.format(
242
- color_data=color_data,
243
- semantic_analysis=semantic_str,
244
- )
245
-
246
- try:
247
- start_time = datetime.now()
248
-
249
- # Use the correct method signature
250
- response = await self.hf_client.complete_async(
251
- agent_name="brand_identifier",
252
- system_prompt="You are a senior design system analyst specializing in brand color identification.",
253
- user_message=prompt,
254
- max_tokens=800,
255
- json_mode=True,
256
- )
257
-
258
- duration = (datetime.now() - start_time).total_seconds()
259
-
260
- # Parse response
261
- result = self._parse_response(response)
262
-
263
- log(f" ────────────────────────────────────────────────")
264
- log(f" 🎨 Brand Identifier: COMPLETE ({duration:.1f}s)")
265
- log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')} confidence)")
266
- log(f" ├─ Brand Secondary: {result.brand_secondary.get('color', '?')}")
267
- log(f" ├─ Palette Strategy: {result.palette_strategy}")
268
- log(f" └─ Cohesion Score: {result.cohesion_score}/10")
269
-
270
- return result
271
-
272
- except Exception as e:
273
- error_msg = str(e)
274
- # Always log full error for diagnosis
275
- log(f" ⚠️ Brand Identifier failed: {error_msg[:120]}")
276
- if "gated" in error_msg.lower() or "access" in error_msg.lower():
277
- log(f" └─ Model may require license acceptance at huggingface.co")
278
- elif "Rate limit" in error_msg or "429" in error_msg:
279
- log(f" └─ HF free tier rate limit — wait or upgrade to Pro")
280
- return BrandIdentification()
281
-
282
- def _format_color_data(self, color_tokens: dict) -> str:
283
- """Format color tokens for prompt."""
284
- lines = []
285
- for name, token in list(color_tokens.items())[:30]:
286
- if isinstance(token, dict):
287
- hex_val = token.get("value", token.get("hex", ""))
288
- usage = token.get("usage_count", token.get("count", 1))
289
- context = token.get("context", token.get("css_property", ""))
290
- else:
291
- hex_val = getattr(token, "value", "")
292
- usage = getattr(token, "usage_count", 1)
293
- context = getattr(token, "context", "")
294
-
295
- if hex_val:
296
- lines.append(f"- {hex_val}: used {usage}x, context: {context or 'unknown'}")
297
-
298
- return "\n".join(lines) if lines else "No color data available"
299
-
300
- def _format_semantic_analysis(self, semantic: dict) -> str:
301
- """Format semantic analysis for prompt."""
302
- if not semantic:
303
- return "No semantic analysis available"
304
-
305
- lines = []
306
- try:
307
- for category, value in semantic.items():
308
- if not value:
309
- continue
310
-
311
- if isinstance(value, list):
312
- # List of colors
313
- color_list = []
314
- for c in value[:5]:
315
- if isinstance(c, dict):
316
- color_list.append(c.get("hex", c.get("value", str(c))))
317
- else:
318
- color_list.append(str(c))
319
- lines.append(f"- {category}: {', '.join(color_list)}")
320
-
321
- elif isinstance(value, dict):
322
- # Could be a nested dict of sub-roles → color dicts
323
- # e.g. {"primary": {"hex": "#007bff", ...}, "secondary": {...}}
324
- # or a flat color dict {"hex": "#...", "confidence": "..."}
325
- # or a summary dict {"total_colors_analyzed": 50, ...}
326
- if "hex" in value:
327
- # Flat color dict
328
- lines.append(f"- {category}: {value['hex']}")
329
- else:
330
- # Nested dict — iterate sub-roles
331
- sub_items = []
332
- for sub_role, sub_val in list(value.items())[:5]:
333
- if isinstance(sub_val, dict) and "hex" in sub_val:
334
- sub_items.append(f"{sub_role}={sub_val['hex']}")
335
- elif isinstance(sub_val, (str, int, float, bool)):
336
- sub_items.append(f"{sub_role}={sub_val}")
337
- if sub_items:
338
- lines.append(f"- {category}: {', '.join(sub_items)}")
339
- else:
340
- lines.append(f"- {category}: {value}")
341
- except Exception as e:
342
- return f"Error formatting semantic analysis: {str(e)[:50]}"
343
-
344
- return "\n".join(lines) if lines else "No semantic analysis available"
345
-
346
- def _parse_response(self, response: str) -> BrandIdentification:
347
- """Parse LLM response into BrandIdentification."""
348
- try:
349
- json_match = re.search(r'\{[\s\S]*\}', response)
350
- if json_match:
351
- data = json.loads(json_match.group())
352
- return BrandIdentification(
353
- brand_primary=data.get("brand_primary", {}),
354
- brand_secondary=data.get("brand_secondary", {}),
355
- brand_accent=data.get("brand_accent", {}),
356
- palette_strategy=data.get("palette_strategy", "unknown"),
357
- cohesion_score=data.get("cohesion_score", 5),
358
- cohesion_notes=data.get("cohesion_notes", ""),
359
- semantic_names=data.get("semantic_names", {}),
360
- )
361
- except Exception:
362
- pass
363
-
364
- return BrandIdentification()
365
-
366
-
367
- # =============================================================================
368
- # BENCHMARK ADVISOR AGENT
369
- # =============================================================================
370
-
371
- class BenchmarkAdvisorAgent:
372
- """
373
- Recommends best-fit design system based on comparison data.
374
-
375
- WHY LLM: Requires reasoning about trade-offs and use-case fit,
376
- not just similarity scores.
377
- """
378
-
379
- PROMPT_TEMPLATE = """You are a senior design system consultant. Recommend the best design system alignment.
380
-
381
- ## USER'S CURRENT VALUES
382
-
383
- - Type Scale Ratio: {user_ratio}
384
- - Base Font Size: {user_base}px
385
- - Spacing Grid: {user_spacing}px
386
-
387
- ## BENCHMARK COMPARISON
388
-
389
- {benchmark_comparison}
390
-
391
- ## YOUR TASK
392
-
393
- 1. **Recommend Best Fit**: Which design system should they align with?
394
- 2. **Explain Why**: Consider similarity scores AND use-case fit
395
- 3. **List Changes Needed**: What would they need to change to align?
396
- 4. **Pros/Cons**: Benefits and drawbacks of alignment
397
-
398
- ## OUTPUT FORMAT (JSON only)
399
-
400
- {{
401
- "recommended_benchmark": "<system_key>",
402
- "recommended_benchmark_name": "<full name>",
403
- "reasoning": "Why this is the best fit for their use case",
404
- "alignment_changes": [
405
- {{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium"}},
406
- {{"change": "Spacing grid", "from": "mixed", "to": "4px", "effort": "high"}}
407
- ],
408
- "pros_of_alignment": [
409
- "Familiar patterns for users",
410
- "Well-tested accessibility"
411
- ],
412
- "cons_of_alignment": [
413
- "May lose brand uniqueness"
414
- ],
415
- "alternative_benchmarks": [
416
- {{"name": "Material Design 3", "reason": "Good for Android-first products"}}
417
- ]
418
- }}
419
-
420
- Return ONLY valid JSON."""
421
-
422
- def __init__(self, hf_client):
423
- self.hf_client = hf_client
424
-
425
- async def analyze(
426
- self,
427
- user_ratio: float,
428
- user_base: int,
429
- user_spacing: int,
430
- benchmark_comparisons: list,
431
- log_callback: Callable = None,
432
- ) -> BenchmarkAdvice:
433
- """
434
- Recommend best-fit design system.
435
-
436
- Args:
437
- user_ratio: User's detected type scale ratio
438
- user_base: User's base font size
439
- user_spacing: User's spacing grid base
440
- benchmark_comparisons: List of BenchmarkComparison objects
441
- log_callback: Progress logging function
442
-
443
- Returns:
444
- BenchmarkAdvice with recommendations
445
- """
446
- def log(msg: str):
447
- if log_callback:
448
- log_callback(msg)
449
-
450
- log("")
451
- log(" 🏢 Benchmark Advisor (Qwen 72B)")
452
- log(" └─ Evaluating benchmark fit for your use case...")
453
-
454
- # Format comparison data
455
- comparison_str = self._format_comparisons(benchmark_comparisons)
456
-
457
- prompt = self.PROMPT_TEMPLATE.format(
458
- user_ratio=user_ratio,
459
- user_base=user_base,
460
- user_spacing=user_spacing,
461
- benchmark_comparison=comparison_str,
462
- )
463
-
464
- try:
465
- start_time = datetime.now()
466
-
467
- response = await self.hf_client.complete_async(
468
- agent_name="benchmark_advisor",
469
- system_prompt="You are a senior design system consultant specializing in design system architecture.",
470
- user_message=prompt,
471
- max_tokens=700,
472
- json_mode=True,
473
- )
474
-
475
- duration = (datetime.now() - start_time).total_seconds()
476
-
477
- result = self._parse_response(response)
478
-
479
- log(f" ────────────────────────────────────────────────")
480
- log(f" 🏢 Benchmark Advisor: COMPLETE ({duration:.1f}s)")
481
- log(f" ├─ Recommended: {result.recommended_benchmark_name}")
482
- log(f" ├─ Changes Needed: {len(result.alignment_changes)}")
483
- log(f" └─ Key Change: {result.alignment_changes[0].get('change', 'N/A') if result.alignment_changes else 'None'}")
484
-
485
- return result
486
-
487
- except Exception as e:
488
- log(f" ├─ ⚠️ Benchmark Advisor failed: {str(e)[:120]}")
489
- return BenchmarkAdvice()
490
-
491
- def _format_comparisons(self, comparisons: list) -> str:
492
- """Format benchmark comparisons for prompt."""
493
- lines = []
494
- for i, c in enumerate(comparisons[:5]):
495
- b = c.benchmark
496
- lines.append(f"""
497
- {i+1}. {b.icon} {b.name}
498
- - Similarity Score: {c.similarity_score:.2f} (lower = better)
499
- - Match: {c.overall_match_pct:.0f}%
500
- - Type Ratio: {b.typography.get('scale_ratio', '?')} (diff: {c.type_ratio_diff:.3f})
501
- - Base Size: {b.typography.get('base_size', '?')}px (diff: {c.base_size_diff})
502
- - Spacing: {b.spacing.get('base', '?')}px (diff: {c.spacing_grid_diff})
503
- - Best For: {', '.join(b.best_for)}""")
504
-
505
- return "\n".join(lines)
506
-
507
- def _parse_response(self, response: str) -> BenchmarkAdvice:
508
- """Parse LLM response into BenchmarkAdvice."""
509
- try:
510
- json_match = re.search(r'\{[\s\S]*\}', response)
511
- if json_match:
512
- data = json.loads(json_match.group())
513
- return BenchmarkAdvice(
514
- recommended_benchmark=data.get("recommended_benchmark", ""),
515
- recommended_benchmark_name=data.get("recommended_benchmark_name", ""),
516
- reasoning=data.get("reasoning", ""),
517
- alignment_changes=data.get("alignment_changes", []),
518
- pros_of_alignment=data.get("pros_of_alignment", []),
519
- cons_of_alignment=data.get("cons_of_alignment", []),
520
- alternative_benchmarks=data.get("alternative_benchmarks", []),
521
- )
522
- except Exception:
523
- pass
524
-
525
- return BenchmarkAdvice()
526
-
527
-
528
- # =============================================================================
529
- # BEST PRACTICES VALIDATOR AGENT
530
- # =============================================================================
531
-
532
- class BestPracticesValidatorAgent:
533
- """
534
- Validates against design system best practices and prioritizes fixes.
535
-
536
- WHY LLM: Prioritization requires judgment about business impact,
537
- not just checking boxes.
538
- """
539
-
540
- PROMPT_TEMPLATE = """You are a design system auditor. Validate these tokens against best practices.
541
-
542
- ## RULE ENGINE ANALYSIS RESULTS
543
-
544
- ### Typography
545
- - Detected Ratio: {type_ratio} ({type_consistent})
546
- - Base Size: {base_size}px
547
- - Recommendation: {type_recommendation}
548
-
549
- ### Accessibility
550
- - Total Colors: {total_colors}
551
- - AA Pass: {aa_pass}
552
- - AA Fail: {aa_fail}
553
- - Failing Colors: {failing_colors}
554
-
555
- ### Spacing
556
- - Detected Base: {spacing_base}px
557
- - Grid Aligned: {spacing_aligned}%
558
- - Recommendation: {spacing_recommendation}px
559
-
560
- ### Color Statistics
561
- - Unique Colors: {unique_colors}
562
- - Duplicates: {duplicates}
563
- - Near-Duplicates: {near_duplicates}
564
-
565
- ## BEST PRACTICES CHECKLIST
566
-
567
- 1. Type scale uses standard ratio (1.2, 1.25, 1.333, 1.5, 1.618)
568
- 2. Type scale is consistent (variance < 0.15)
569
- 3. Base font size >= 16px (accessibility)
570
- 4. Line height >= 1.5 for body text
571
- 5. All interactive colors pass AA (4.5:1)
572
- 6. Spacing uses consistent grid (4px or 8px)
573
- 7. Limited color palette (< 20 unique semantic colors)
574
- 8. No near-duplicate colors
575
-
576
- ## YOUR TASK
577
-
578
- 1. Score each practice: pass/warn/fail
579
- 2. Calculate overall score (0-100)
580
- 3. Identify TOP 3 priority fixes with impact assessment
581
-
582
- ## OUTPUT FORMAT (JSON only)
583
-
584
- {{
585
- "overall_score": <0-100>,
586
- "checks": {{
587
- "type_scale_standard": {{"status": "pass|warn|fail", "note": "..."}},
588
- "type_scale_consistent": {{"status": "...", "note": "..."}},
589
- "base_size_accessible": {{"status": "...", "note": "..."}},
590
- "aa_compliance": {{"status": "...", "note": "..."}},
591
- "spacing_grid": {{"status": "...", "note": "..."}},
592
- "color_count": {{"status": "...", "note": "..."}}
593
- }},
594
- "priority_fixes": [
595
- {{
596
- "rank": 1,
597
- "issue": "Brand primary fails AA",
598
- "impact": "high|medium|low",
599
- "effort": "low|medium|high",
600
- "action": "Change #06b2c4 → #0891a8"
601
- }}
602
- ],
603
- "passing_practices": ["Base font size", "..."],
604
- "failing_practices": ["AA compliance", "..."]
605
- }}
606
-
607
- Return ONLY valid JSON."""
608
-
609
- def __init__(self, hf_client):
610
- self.hf_client = hf_client
611
-
612
- async def analyze(
613
- self,
614
- rule_engine_results: Any,
615
- log_callback: Callable = None,
616
- ) -> BestPracticesResult:
617
- """
618
- Validate against best practices.
619
-
620
- Args:
621
- rule_engine_results: Results from rule engine
622
- log_callback: Progress logging function
623
-
624
- Returns:
625
- BestPracticesResult with validation
626
- """
627
- def log(msg: str):
628
- if log_callback:
629
- log_callback(msg)
630
-
631
- log("")
632
- log(" ✅ Best Practices Validator (Qwen 72B)")
633
- log(" └─ Checking against design system standards...")
634
-
635
- # Extract data from rule engine
636
- typo = rule_engine_results.typography
637
- spacing = rule_engine_results.spacing
638
- color_stats = rule_engine_results.color_stats
639
- accessibility = rule_engine_results.accessibility
640
-
641
- failures = [a for a in accessibility if not a.passes_aa_normal]
642
- failing_colors_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:5]])
643
-
644
- prompt = self.PROMPT_TEMPLATE.format(
645
- type_ratio=f"{typo.detected_ratio:.3f}",
646
- type_consistent="consistent" if typo.is_consistent else f"inconsistent, variance={typo.variance:.2f}",
647
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
648
- type_recommendation=f"{typo.recommendation} ({typo.recommendation_name})",
649
- total_colors=len(accessibility),
650
- aa_pass=len(accessibility) - len(failures),
651
- aa_fail=len(failures),
652
- failing_colors=failing_colors_str or "None",
653
- spacing_base=spacing.detected_base,
654
- spacing_aligned=f"{spacing.alignment_percentage:.0f}",
655
- spacing_recommendation=spacing.recommendation,
656
- unique_colors=color_stats.unique_count,
657
- duplicates=color_stats.duplicate_count,
658
- near_duplicates=len(color_stats.near_duplicates),
659
- )
660
-
661
- try:
662
- start_time = datetime.now()
663
-
664
- response = await self.hf_client.complete_async(
665
- agent_name="best_practices_validator",
666
- system_prompt="You are a design system auditor specializing in best practices validation.",
667
- user_message=prompt,
668
- max_tokens=800,
669
- json_mode=True,
670
- )
671
-
672
- duration = (datetime.now() - start_time).total_seconds()
673
-
674
- result = self._parse_response(response)
675
-
676
- log(f" ────────────────────────────────────────────────")
677
- log(f" ✅ Best Practices: COMPLETE ({duration:.1f}s)")
678
- log(f" ├─ Overall Score: {result.overall_score}/100")
679
- log(f" ├─ Passing: {len(result.passing_practices)} | Failing: {len(result.failing_practices)}")
680
- if result.priority_fixes:
681
- log(f" └─ Top Fix: {result.priority_fixes[0].get('issue', 'N/A')}")
682
-
683
- return result
684
-
685
- except Exception as e:
686
- log(f" ├─ ⚠️ Best Practices Validator failed: {str(e)[:120]}")
687
- return BestPracticesResult()
688
-
689
- def _parse_response(self, response: str) -> BestPracticesResult:
690
- """Parse LLM response into BestPracticesResult."""
691
- try:
692
- json_match = re.search(r'\{[\s\S]*\}', response)
693
- if json_match:
694
- data = json.loads(json_match.group())
695
- return BestPracticesResult(
696
- overall_score=data.get("overall_score", 50),
697
- checks=data.get("checks", {}),
698
- priority_fixes=data.get("priority_fixes", []),
699
- passing_practices=data.get("passing_practices", []),
700
- failing_practices=data.get("failing_practices", []),
701
- )
702
- except Exception:
703
- pass
704
-
705
- return BestPracticesResult()
706
-
707
-
708
- # =============================================================================
709
- # HEAD SYNTHESIZER AGENT
710
- # =============================================================================
711
-
712
- class HeadSynthesizerAgent:
713
- """
714
- Combines all agent outputs into final recommendations.
715
-
716
- This is the final step that produces actionable output for the user.
717
- """
718
-
719
- PROMPT_TEMPLATE = """You are a senior design system architect. Synthesize these analysis results into final recommendations.
720
-
721
- ## RULE ENGINE FACTS
722
-
723
- - Type Scale: {type_ratio} ({type_status})
724
- - Base Size: {base_size}px
725
- - AA Failures: {aa_failures}
726
- - Spacing Grid: {spacing_status}
727
- - Unique Colors: {unique_colors}
728
- - Consistency Score: {consistency_score}/100
729
-
730
- ## BENCHMARK COMPARISON
731
-
732
- Closest Match: {closest_benchmark}
733
- Match Percentage: {match_pct}%
734
- Recommended Changes: {benchmark_changes}
735
-
736
- ## BRAND IDENTIFICATION
737
-
738
- - Brand Primary: {brand_primary}
739
- - Brand Secondary: {brand_secondary}
740
- - Palette Cohesion: {cohesion_score}/10
741
-
742
- ## BEST PRACTICES VALIDATION
743
-
744
- Overall Score: {best_practices_score}/100
745
- Priority Fixes: {priority_fixes}
746
-
747
- ## ACCESSIBILITY FIXES NEEDED
748
-
749
- {accessibility_fixes}
750
-
751
- ## YOUR TASK
752
-
753
- Synthesize ALL the above into:
754
- 1. Executive Summary (2-3 sentences)
755
- 2. Overall Scores
756
- 3. Top 3 Priority Actions (with effort estimates)
757
- 4. Specific Color Recommendations (with accept/reject defaults)
758
- 5. Type Scale Recommendation
759
- 6. Spacing Recommendation
760
-
761
- ## OUTPUT FORMAT (JSON only)
762
-
763
- {{
764
- "executive_summary": "Your design system scores X/100. Key issues are Y. Priority action is Z.",
765
- "scores": {{
766
- "overall": <0-100>,
767
- "accessibility": <0-100>,
768
- "consistency": <0-100>,
769
- "organization": <0-100>
770
- }},
771
- "benchmark_fit": {{
772
- "closest": "<name>",
773
- "similarity": "<X%>",
774
- "recommendation": "Align type scale to 1.25"
775
- }},
776
- "brand_analysis": {{
777
- "primary": "#hex",
778
- "secondary": "#hex",
779
- "cohesion": <1-10>
780
- }},
781
- "top_3_actions": [
782
- {{"action": "Fix brand color AA", "impact": "high", "effort": "5 min", "details": "Change #X to #Y"}}
783
- ],
784
- "color_recommendations": [
785
- {{"role": "brand.primary", "current": "#06b2c4", "suggested": "#0891a8", "reason": "AA compliance", "accept": true}}
786
- ],
787
- "type_scale_recommendation": {{
788
- "current_ratio": 1.18,
789
- "recommended_ratio": 1.25,
790
- "reason": "Align with industry standard"
791
- }},
792
- "spacing_recommendation": {{
793
- "current": "mixed",
794
- "recommended": "8px",
795
- "reason": "Consistent grid improves maintainability"
796
- }}
797
- }}
798
-
799
- Return ONLY valid JSON."""
800
-
801
- def __init__(self, hf_client):
802
- self.hf_client = hf_client
803
-
804
- async def synthesize(
805
- self,
806
- rule_engine_results: Any,
807
- benchmark_comparisons: list,
808
- brand_identification: BrandIdentification,
809
- benchmark_advice: BenchmarkAdvice,
810
- best_practices: BestPracticesResult,
811
- log_callback: Callable = None,
812
- ) -> HeadSynthesis:
813
- """
814
- Synthesize all results into final recommendations.
815
- """
816
- def log(msg: str):
817
- if log_callback:
818
- log_callback(msg)
819
-
820
- log("")
821
- log("═" * 60)
822
- log("🧠 LAYER 4: HEAD SYNTHESIZER")
823
- log("═" * 60)
824
- log("")
825
- log(" Combining: Rule Engine + Benchmarks + Brand + Best Practices...")
826
-
827
- # Extract data
828
- typo = rule_engine_results.typography
829
- spacing = rule_engine_results.spacing
830
- color_stats = rule_engine_results.color_stats
831
- accessibility = rule_engine_results.accessibility
832
-
833
- failures = [a for a in accessibility if not a.passes_aa_normal]
834
- aa_fixes_str = "\n".join([
835
- f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
836
- for a in failures[:5] if a.suggested_fix
837
- ])
838
-
839
- closest = benchmark_comparisons[0] if benchmark_comparisons else None
840
-
841
- prompt = self.PROMPT_TEMPLATE.format(
842
- type_ratio=f"{typo.detected_ratio:.3f}",
843
- type_status="consistent" if typo.is_consistent else "inconsistent",
844
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
845
- aa_failures=len(failures),
846
- spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
847
- unique_colors=color_stats.unique_count,
848
- consistency_score=rule_engine_results.consistency_score,
849
- closest_benchmark=closest.benchmark.name if closest else "Unknown",
850
- match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
851
- benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:3]]),
852
- brand_primary=brand_identification.brand_primary.get("color", "Unknown"),
853
- brand_secondary=brand_identification.brand_secondary.get("color", "Unknown"),
854
- cohesion_score=brand_identification.cohesion_score,
855
- best_practices_score=best_practices.overall_score,
856
- priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:3]]),
857
- accessibility_fixes=aa_fixes_str or "None needed",
858
- )
859
-
860
- try:
861
- start_time = datetime.now()
862
-
863
- response = await self.hf_client.complete_async(
864
- agent_name="head_synthesizer",
865
- system_prompt="You are a senior design system architect specializing in synthesis and recommendations.",
866
- user_message=prompt,
867
- max_tokens=1000,
868
- json_mode=True,
869
- )
870
-
871
- duration = (datetime.now() - start_time).total_seconds()
872
-
873
- result = self._parse_response(response)
874
-
875
- log("")
876
- log(f" ✅ HEAD Synthesizer: COMPLETE ({duration:.1f}s)")
877
- log("")
878
-
879
- return result
880
-
881
- except Exception as e:
882
- log(f" ├─ ⚠️ Head Synthesizer failed: {str(e)[:120]}")
883
- return HeadSynthesis()
884
-
885
- def _parse_response(self, response: str) -> HeadSynthesis:
886
- """Parse LLM response into HeadSynthesis."""
887
- try:
888
- json_match = re.search(r'\{[\s\S]*\}', response)
889
- if json_match:
890
- data = json.loads(json_match.group())
891
- return HeadSynthesis(
892
- executive_summary=data.get("executive_summary", ""),
893
- scores=data.get("scores", {}),
894
- benchmark_fit=data.get("benchmark_fit", {}),
895
- brand_analysis=data.get("brand_analysis", {}),
896
- top_3_actions=data.get("top_3_actions", []),
897
- color_recommendations=data.get("color_recommendations", []),
898
- type_scale_recommendation=data.get("type_scale_recommendation", {}),
899
- spacing_recommendation=data.get("spacing_recommendation", {}),
900
- )
901
- except Exception:
902
- pass
903
-
904
- return HeadSynthesis()