riazmo commited on
Commit
d2da502
·
verified ·
1 Parent(s): d9880a8

Delete agents/llm_agents.py

Browse files
Files changed (1) hide show
  1. agents/llm_agents.py +0 -865
agents/llm_agents.py DELETED
@@ -1,865 +0,0 @@
1
- """
2
- Stage 2 LLM Agents — Specialized Analysis Tasks
3
- =================================================
4
-
5
- These agents handle tasks that REQUIRE LLM reasoning:
6
- - Brand Identifier: Identify brand colors from usage context
7
- - Benchmark Advisor: Recommend best-fit design system
8
- - Best Practices Validator: Prioritize fixes by business impact
9
- - HEAD Synthesizer: Combine all outputs into final recommendations
10
-
11
- Each agent has a focused prompt for its specific task.
12
- """
13
-
14
- import json
15
- import re
16
- from dataclasses import dataclass, field
17
- from typing import Optional, Callable, Any
18
- from datetime import datetime
19
-
20
-
21
- # =============================================================================
22
- # DATA CLASSES
23
- # =============================================================================
24
-
25
- @dataclass
26
- class BrandIdentification:
27
- """Results from Brand Identifier agent."""
28
- brand_primary: dict = field(default_factory=dict)
29
- # {color, confidence, reasoning, usage_count}
30
-
31
- brand_secondary: dict = field(default_factory=dict)
32
- brand_accent: dict = field(default_factory=dict)
33
-
34
- palette_strategy: str = "" # complementary, analogous, triadic, monochromatic, random
35
- cohesion_score: int = 5 # 1-10
36
- cohesion_notes: str = ""
37
-
38
- semantic_names: dict = field(default_factory=dict)
39
- # {hex_color: suggested_name}
40
-
41
- def to_dict(self) -> dict:
42
- return {
43
- "brand_primary": self.brand_primary,
44
- "brand_secondary": self.brand_secondary,
45
- "brand_accent": self.brand_accent,
46
- "palette_strategy": self.palette_strategy,
47
- "cohesion_score": self.cohesion_score,
48
- "cohesion_notes": self.cohesion_notes,
49
- "semantic_names": self.semantic_names,
50
- }
51
-
52
-
53
- @dataclass
54
- class BenchmarkAdvice:
55
- """Results from Benchmark Advisor agent."""
56
- recommended_benchmark: str = ""
57
- recommended_benchmark_name: str = ""
58
- reasoning: str = ""
59
-
60
- alignment_changes: list = field(default_factory=list)
61
- # [{change, from, to, effort}]
62
-
63
- pros_of_alignment: list = field(default_factory=list)
64
- cons_of_alignment: list = field(default_factory=list)
65
-
66
- alternative_benchmarks: list = field(default_factory=list)
67
- # [{name, reason}]
68
-
69
- def to_dict(self) -> dict:
70
- return {
71
- "recommended_benchmark": self.recommended_benchmark,
72
- "recommended_benchmark_name": self.recommended_benchmark_name,
73
- "reasoning": self.reasoning,
74
- "alignment_changes": self.alignment_changes,
75
- "pros": self.pros_of_alignment,
76
- "cons": self.cons_of_alignment,
77
- "alternatives": self.alternative_benchmarks,
78
- }
79
-
80
-
81
- @dataclass
82
- class BestPracticesResult:
83
- """Results from Best Practices Validator agent."""
84
- overall_score: int = 50 # 0-100
85
-
86
- checks: dict = field(default_factory=dict)
87
- # {check_name: {status: pass/warn/fail, note: str}}
88
-
89
- priority_fixes: list = field(default_factory=list)
90
- # [{rank, issue, impact, effort, action}]
91
-
92
- passing_practices: list = field(default_factory=list)
93
- failing_practices: list = field(default_factory=list)
94
-
95
- def to_dict(self) -> dict:
96
- return {
97
- "overall_score": self.overall_score,
98
- "checks": self.checks,
99
- "priority_fixes": self.priority_fixes,
100
- "passing": self.passing_practices,
101
- "failing": self.failing_practices,
102
- }
103
-
104
-
105
- @dataclass
106
- class HeadSynthesis:
107
- """Final synthesized output from HEAD agent."""
108
- executive_summary: str = ""
109
-
110
- scores: dict = field(default_factory=dict)
111
- # {overall, accessibility, consistency, organization}
112
-
113
- benchmark_fit: dict = field(default_factory=dict)
114
- # {closest, similarity, recommendation}
115
-
116
- brand_analysis: dict = field(default_factory=dict)
117
- # {primary, secondary, cohesion}
118
-
119
- top_3_actions: list = field(default_factory=list)
120
- # [{action, impact, effort, details}]
121
-
122
- color_recommendations: list = field(default_factory=list)
123
- # [{role, current, suggested, reason, accept}]
124
-
125
- type_scale_recommendation: dict = field(default_factory=dict)
126
- spacing_recommendation: dict = field(default_factory=dict)
127
-
128
- def to_dict(self) -> dict:
129
- return {
130
- "executive_summary": self.executive_summary,
131
- "scores": self.scores,
132
- "benchmark_fit": self.benchmark_fit,
133
- "brand_analysis": self.brand_analysis,
134
- "top_3_actions": self.top_3_actions,
135
- "color_recommendations": self.color_recommendations,
136
- "type_scale_recommendation": self.type_scale_recommendation,
137
- "spacing_recommendation": self.spacing_recommendation,
138
- }
139
-
140
-
141
- # =============================================================================
142
- # BRAND IDENTIFIER AGENT
143
- # =============================================================================
144
-
145
- class BrandIdentifierAgent:
146
- """
147
- Identifies brand colors from usage context.
148
-
149
- WHY LLM: Requires understanding context (33 buttons = likely brand primary),
150
- not just color math.
151
- """
152
-
153
- PROMPT_TEMPLATE = """You are a senior design system analyst. Identify the brand colors from this color usage data.
154
-
155
- ## COLOR DATA WITH USAGE CONTEXT
156
-
157
- {color_data}
158
-
159
- ## SEMANTIC ANALYSIS (from CSS properties)
160
-
161
- {semantic_analysis}
162
-
163
- ## YOUR TASK
164
-
165
- 1. **Identify Brand Colors**:
166
- - Brand Primary: The main action/CTA color (highest visibility)
167
- - Brand Secondary: Supporting brand color
168
- - Brand Accent: Highlight color for emphasis
169
-
170
- 2. **Assess Palette Strategy**:
171
- - Is it complementary, analogous, triadic, monochromatic, or random?
172
-
173
- 3. **Rate Cohesion** (1-10):
174
- - Do the colors work together?
175
- - Is there a clear color story?
176
-
177
- 4. **Suggest Semantic Names** for top 10 most-used colors
178
-
179
- ## OUTPUT FORMAT (JSON only)
180
-
181
- {{
182
- "brand_primary": {{
183
- "color": "#hex",
184
- "confidence": "high|medium|low",
185
- "reasoning": "Why this is brand primary",
186
- "usage_count": <number>
187
- }},
188
- "brand_secondary": {{
189
- "color": "#hex",
190
- "confidence": "high|medium|low",
191
- "reasoning": "..."
192
- }},
193
- "brand_accent": {{
194
- "color": "#hex or null",
195
- "confidence": "...",
196
- "reasoning": "..."
197
- }},
198
- "palette_strategy": "complementary|analogous|triadic|monochromatic|random",
199
- "cohesion_score": <1-10>,
200
- "cohesion_notes": "Assessment of how well colors work together",
201
- "semantic_names": {{
202
- "#hex1": "brand.primary",
203
- "#hex2": "text.primary",
204
- "#hex3": "background.primary"
205
- }}
206
- }}
207
-
208
- Return ONLY valid JSON."""
209
-
210
- def __init__(self, hf_client):
211
- self.hf_client = hf_client
212
-
213
- async def analyze(
214
- self,
215
- color_tokens: dict,
216
- semantic_analysis: dict,
217
- log_callback: Callable = None,
218
- ) -> BrandIdentification:
219
- """
220
- Identify brand colors from usage context.
221
-
222
- Args:
223
- color_tokens: Dict of color tokens with usage data
224
- semantic_analysis: Semantic categorization from Stage 1
225
- log_callback: Progress logging function
226
-
227
- Returns:
228
- BrandIdentification with identified colors
229
- """
230
- def log(msg: str):
231
- if log_callback:
232
- log_callback(msg)
233
-
234
- log(" 🎨 Brand Identifier (Llama 70B)")
235
- log(" └─ Analyzing color context and usage patterns...")
236
-
237
- # Format color data
238
- color_data = self._format_color_data(color_tokens)
239
- semantic_str = self._format_semantic_analysis(semantic_analysis)
240
-
241
- prompt = self.PROMPT_TEMPLATE.format(
242
- color_data=color_data,
243
- semantic_analysis=semantic_str,
244
- )
245
-
246
- try:
247
- start_time = datetime.now()
248
-
249
- # Use the correct method signature
250
- response = await self.hf_client.complete_async(
251
- agent_name="brand_identifier",
252
- system_prompt="You are a senior design system analyst specializing in brand color identification.",
253
- user_message=prompt,
254
- max_tokens=800,
255
- json_mode=True,
256
- )
257
-
258
- duration = (datetime.now() - start_time).total_seconds()
259
-
260
- # Parse response
261
- result = self._parse_response(response)
262
-
263
- log(f" ────────────────────────────────────────────────")
264
- log(f" 🎨 Brand Identifier: COMPLETE ({duration:.1f}s)")
265
- log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')} confidence)")
266
- log(f" ├─ Brand Secondary: {result.brand_secondary.get('color', '?')}")
267
- log(f" ├─ Palette Strategy: {result.palette_strategy}")
268
- log(f" └─ Cohesion Score: {result.cohesion_score}/10")
269
-
270
- return result
271
-
272
- except Exception as e:
273
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
274
- return BrandIdentification()
275
-
276
- def _format_color_data(self, color_tokens: dict) -> str:
277
- """Format color tokens for prompt."""
278
- lines = []
279
- for name, token in list(color_tokens.items())[:30]:
280
- if isinstance(token, dict):
281
- hex_val = token.get("value", token.get("hex", ""))
282
- usage = token.get("usage_count", token.get("count", 1))
283
- context = token.get("context", token.get("css_property", ""))
284
- else:
285
- hex_val = getattr(token, "value", "")
286
- usage = getattr(token, "usage_count", 1)
287
- context = getattr(token, "context", "")
288
-
289
- if hex_val:
290
- lines.append(f"- {hex_val}: used {usage}x, context: {context or 'unknown'}")
291
-
292
- return "\n".join(lines) if lines else "No color data available"
293
-
294
- def _format_semantic_analysis(self, semantic: dict) -> str:
295
- """Format semantic analysis for prompt."""
296
- if not semantic:
297
- return "No semantic analysis available"
298
-
299
- lines = []
300
- for category, colors in semantic.items():
301
- if colors:
302
- color_list = [c.get("hex", c) if isinstance(c, dict) else c for c in colors[:5]]
303
- lines.append(f"- {category}: {', '.join(str(c) for c in color_list)}")
304
-
305
- return "\n".join(lines) if lines else "No semantic analysis available"
306
-
307
- def _parse_response(self, response: str) -> BrandIdentification:
308
- """Parse LLM response into BrandIdentification."""
309
- try:
310
- json_match = re.search(r'\{[\s\S]*\}', response)
311
- if json_match:
312
- data = json.loads(json_match.group())
313
- return BrandIdentification(
314
- brand_primary=data.get("brand_primary", {}),
315
- brand_secondary=data.get("brand_secondary", {}),
316
- brand_accent=data.get("brand_accent", {}),
317
- palette_strategy=data.get("palette_strategy", "unknown"),
318
- cohesion_score=data.get("cohesion_score", 5),
319
- cohesion_notes=data.get("cohesion_notes", ""),
320
- semantic_names=data.get("semantic_names", {}),
321
- )
322
- except Exception:
323
- pass
324
-
325
- return BrandIdentification()
326
-
327
-
328
- # =============================================================================
329
- # BENCHMARK ADVISOR AGENT
330
- # =============================================================================
331
-
332
- class BenchmarkAdvisorAgent:
333
- """
334
- Recommends best-fit design system based on comparison data.
335
-
336
- WHY LLM: Requires reasoning about trade-offs and use-case fit,
337
- not just similarity scores.
338
- """
339
-
340
- PROMPT_TEMPLATE = """You are a senior design system consultant. Recommend the best design system alignment.
341
-
342
- ## USER'S CURRENT VALUES
343
-
344
- - Type Scale Ratio: {user_ratio}
345
- - Base Font Size: {user_base}px
346
- - Spacing Grid: {user_spacing}px
347
-
348
- ## BENCHMARK COMPARISON
349
-
350
- {benchmark_comparison}
351
-
352
- ## YOUR TASK
353
-
354
- 1. **Recommend Best Fit**: Which design system should they align with?
355
- 2. **Explain Why**: Consider similarity scores AND use-case fit
356
- 3. **List Changes Needed**: What would they need to change to align?
357
- 4. **Pros/Cons**: Benefits and drawbacks of alignment
358
-
359
- ## OUTPUT FORMAT (JSON only)
360
-
361
- {{
362
- "recommended_benchmark": "<system_key>",
363
- "recommended_benchmark_name": "<full name>",
364
- "reasoning": "Why this is the best fit for their use case",
365
- "alignment_changes": [
366
- {{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium"}},
367
- {{"change": "Spacing grid", "from": "mixed", "to": "4px", "effort": "high"}}
368
- ],
369
- "pros_of_alignment": [
370
- "Familiar patterns for users",
371
- "Well-tested accessibility"
372
- ],
373
- "cons_of_alignment": [
374
- "May lose brand uniqueness"
375
- ],
376
- "alternative_benchmarks": [
377
- {{"name": "Material Design 3", "reason": "Good for Android-first products"}}
378
- ]
379
- }}
380
-
381
- Return ONLY valid JSON."""
382
-
383
- def __init__(self, hf_client):
384
- self.hf_client = hf_client
385
-
386
- async def analyze(
387
- self,
388
- user_ratio: float,
389
- user_base: int,
390
- user_spacing: int,
391
- benchmark_comparisons: list,
392
- log_callback: Callable = None,
393
- ) -> BenchmarkAdvice:
394
- """
395
- Recommend best-fit design system.
396
-
397
- Args:
398
- user_ratio: User's detected type scale ratio
399
- user_base: User's base font size
400
- user_spacing: User's spacing grid base
401
- benchmark_comparisons: List of BenchmarkComparison objects
402
- log_callback: Progress logging function
403
-
404
- Returns:
405
- BenchmarkAdvice with recommendations
406
- """
407
- def log(msg: str):
408
- if log_callback:
409
- log_callback(msg)
410
-
411
- log("")
412
- log(" 🏢 Benchmark Advisor (Qwen 72B)")
413
- log(" └─ Evaluating benchmark fit for your use case...")
414
-
415
- # Format comparison data
416
- comparison_str = self._format_comparisons(benchmark_comparisons)
417
-
418
- prompt = self.PROMPT_TEMPLATE.format(
419
- user_ratio=user_ratio,
420
- user_base=user_base,
421
- user_spacing=user_spacing,
422
- benchmark_comparison=comparison_str,
423
- )
424
-
425
- try:
426
- start_time = datetime.now()
427
-
428
- response = await self.hf_client.complete_async(
429
- agent_name="benchmark_advisor",
430
- system_prompt="You are a senior design system consultant specializing in design system architecture.",
431
- user_message=prompt,
432
- max_tokens=700,
433
- json_mode=True,
434
- )
435
-
436
- duration = (datetime.now() - start_time).total_seconds()
437
-
438
- result = self._parse_response(response)
439
-
440
- log(f" ────────────────────────────────────────────────")
441
- log(f" 🏢 Benchmark Advisor: COMPLETE ({duration:.1f}s)")
442
- log(f" ├─ Recommended: {result.recommended_benchmark_name}")
443
- log(f" ├─ Changes Needed: {len(result.alignment_changes)}")
444
- log(f" └─ Key Change: {result.alignment_changes[0].get('change', 'N/A') if result.alignment_changes else 'None'}")
445
-
446
- return result
447
-
448
- except Exception as e:
449
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
450
- return BenchmarkAdvice()
451
-
452
- def _format_comparisons(self, comparisons: list) -> str:
453
- """Format benchmark comparisons for prompt."""
454
- lines = []
455
- for i, c in enumerate(comparisons[:5]):
456
- b = c.benchmark
457
- lines.append(f"""
458
- {i+1}. {b.icon} {b.name}
459
- - Similarity Score: {c.similarity_score:.2f} (lower = better)
460
- - Match: {c.overall_match_pct:.0f}%
461
- - Type Ratio: {b.typography.get('scale_ratio', '?')} (diff: {c.type_ratio_diff:.3f})
462
- - Base Size: {b.typography.get('base_size', '?')}px (diff: {c.base_size_diff})
463
- - Spacing: {b.spacing.get('base', '?')}px (diff: {c.spacing_grid_diff})
464
- - Best For: {', '.join(b.best_for)}""")
465
-
466
- return "\n".join(lines)
467
-
468
- def _parse_response(self, response: str) -> BenchmarkAdvice:
469
- """Parse LLM response into BenchmarkAdvice."""
470
- try:
471
- json_match = re.search(r'\{[\s\S]*\}', response)
472
- if json_match:
473
- data = json.loads(json_match.group())
474
- return BenchmarkAdvice(
475
- recommended_benchmark=data.get("recommended_benchmark", ""),
476
- recommended_benchmark_name=data.get("recommended_benchmark_name", ""),
477
- reasoning=data.get("reasoning", ""),
478
- alignment_changes=data.get("alignment_changes", []),
479
- pros_of_alignment=data.get("pros_of_alignment", []),
480
- cons_of_alignment=data.get("cons_of_alignment", []),
481
- alternative_benchmarks=data.get("alternative_benchmarks", []),
482
- )
483
- except Exception:
484
- pass
485
-
486
- return BenchmarkAdvice()
487
-
488
-
489
- # =============================================================================
490
- # BEST PRACTICES VALIDATOR AGENT
491
- # =============================================================================
492
-
493
- class BestPracticesValidatorAgent:
494
- """
495
- Validates against design system best practices and prioritizes fixes.
496
-
497
- WHY LLM: Prioritization requires judgment about business impact,
498
- not just checking boxes.
499
- """
500
-
501
- PROMPT_TEMPLATE = """You are a design system auditor. Validate these tokens against best practices.
502
-
503
- ## RULE ENGINE ANALYSIS RESULTS
504
-
505
- ### Typography
506
- - Detected Ratio: {type_ratio} ({type_consistent})
507
- - Base Size: {base_size}px
508
- - Recommendation: {type_recommendation}
509
-
510
- ### Accessibility
511
- - Total Colors: {total_colors}
512
- - AA Pass: {aa_pass}
513
- - AA Fail: {aa_fail}
514
- - Failing Colors: {failing_colors}
515
-
516
- ### Spacing
517
- - Detected Base: {spacing_base}px
518
- - Grid Aligned: {spacing_aligned}%
519
- - Recommendation: {spacing_recommendation}px
520
-
521
- ### Color Statistics
522
- - Unique Colors: {unique_colors}
523
- - Duplicates: {duplicates}
524
- - Near-Duplicates: {near_duplicates}
525
-
526
- ## BEST PRACTICES CHECKLIST
527
-
528
- 1. Type scale uses standard ratio (1.2, 1.25, 1.333, 1.5, 1.618)
529
- 2. Type scale is consistent (variance < 0.15)
530
- 3. Base font size >= 16px (accessibility)
531
- 4. Line height >= 1.5 for body text
532
- 5. All interactive colors pass AA (4.5:1)
533
- 6. Spacing uses consistent grid (4px or 8px)
534
- 7. Limited color palette (< 20 unique semantic colors)
535
- 8. No near-duplicate colors
536
-
537
- ## YOUR TASK
538
-
539
- 1. Score each practice: pass/warn/fail
540
- 2. Calculate overall score (0-100)
541
- 3. Identify TOP 3 priority fixes with impact assessment
542
-
543
- ## OUTPUT FORMAT (JSON only)
544
-
545
- {{
546
- "overall_score": <0-100>,
547
- "checks": {{
548
- "type_scale_standard": {{"status": "pass|warn|fail", "note": "..."}},
549
- "type_scale_consistent": {{"status": "...", "note": "..."}},
550
- "base_size_accessible": {{"status": "...", "note": "..."}},
551
- "aa_compliance": {{"status": "...", "note": "..."}},
552
- "spacing_grid": {{"status": "...", "note": "..."}},
553
- "color_count": {{"status": "...", "note": "..."}}
554
- }},
555
- "priority_fixes": [
556
- {{
557
- "rank": 1,
558
- "issue": "Brand primary fails AA",
559
- "impact": "high|medium|low",
560
- "effort": "low|medium|high",
561
- "action": "Change #06b2c4 → #0891a8"
562
- }}
563
- ],
564
- "passing_practices": ["Base font size", "..."],
565
- "failing_practices": ["AA compliance", "..."]
566
- }}
567
-
568
- Return ONLY valid JSON."""
569
-
570
- def __init__(self, hf_client):
571
- self.hf_client = hf_client
572
-
573
- async def analyze(
574
- self,
575
- rule_engine_results: Any,
576
- log_callback: Callable = None,
577
- ) -> BestPracticesResult:
578
- """
579
- Validate against best practices.
580
-
581
- Args:
582
- rule_engine_results: Results from rule engine
583
- log_callback: Progress logging function
584
-
585
- Returns:
586
- BestPracticesResult with validation
587
- """
588
- def log(msg: str):
589
- if log_callback:
590
- log_callback(msg)
591
-
592
- log("")
593
- log(" ✅ Best Practices Validator (Qwen 72B)")
594
- log(" └─ Checking against design system standards...")
595
-
596
- # Extract data from rule engine
597
- typo = rule_engine_results.typography
598
- spacing = rule_engine_results.spacing
599
- color_stats = rule_engine_results.color_stats
600
- accessibility = rule_engine_results.accessibility
601
-
602
- failures = [a for a in accessibility if not a.passes_aa_normal]
603
- failing_colors_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:5]])
604
-
605
- prompt = self.PROMPT_TEMPLATE.format(
606
- type_ratio=f"{typo.detected_ratio:.3f}",
607
- type_consistent="consistent" if typo.is_consistent else f"inconsistent, variance={typo.variance:.2f}",
608
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
609
- type_recommendation=f"{typo.recommendation} ({typo.recommendation_name})",
610
- total_colors=len(accessibility),
611
- aa_pass=len(accessibility) - len(failures),
612
- aa_fail=len(failures),
613
- failing_colors=failing_colors_str or "None",
614
- spacing_base=spacing.detected_base,
615
- spacing_aligned=f"{spacing.alignment_percentage:.0f}",
616
- spacing_recommendation=spacing.recommendation,
617
- unique_colors=color_stats.unique_count,
618
- duplicates=color_stats.duplicate_count,
619
- near_duplicates=len(color_stats.near_duplicates),
620
- )
621
-
622
- try:
623
- start_time = datetime.now()
624
-
625
- response = await self.hf_client.complete_async(
626
- agent_name="best_practices_validator",
627
- system_prompt="You are a design system auditor specializing in best practices validation.",
628
- user_message=prompt,
629
- max_tokens=800,
630
- json_mode=True,
631
- )
632
-
633
- duration = (datetime.now() - start_time).total_seconds()
634
-
635
- result = self._parse_response(response)
636
-
637
- log(f" ────────────────────────────────────────────────")
638
- log(f" ✅ Best Practices: COMPLETE ({duration:.1f}s)")
639
- log(f" ├─ Overall Score: {result.overall_score}/100")
640
- log(f" ├─ Passing: {len(result.passing_practices)} | Failing: {len(result.failing_practices)}")
641
- if result.priority_fixes:
642
- log(f" └─ Top Fix: {result.priority_fixes[0].get('issue', 'N/A')}")
643
-
644
- return result
645
-
646
- except Exception as e:
647
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
648
- return BestPracticesResult()
649
-
650
- def _parse_response(self, response: str) -> BestPracticesResult:
651
- """Parse LLM response into BestPracticesResult."""
652
- try:
653
- json_match = re.search(r'\{[\s\S]*\}', response)
654
- if json_match:
655
- data = json.loads(json_match.group())
656
- return BestPracticesResult(
657
- overall_score=data.get("overall_score", 50),
658
- checks=data.get("checks", {}),
659
- priority_fixes=data.get("priority_fixes", []),
660
- passing_practices=data.get("passing_practices", []),
661
- failing_practices=data.get("failing_practices", []),
662
- )
663
- except Exception:
664
- pass
665
-
666
- return BestPracticesResult()
667
-
668
-
669
- # =============================================================================
670
- # HEAD SYNTHESIZER AGENT
671
- # =============================================================================
672
-
673
- class HeadSynthesizerAgent:
674
- """
675
- Combines all agent outputs into final recommendations.
676
-
677
- This is the final step that produces actionable output for the user.
678
- """
679
-
680
- PROMPT_TEMPLATE = """You are a senior design system architect. Synthesize these analysis results into final recommendations.
681
-
682
- ## RULE ENGINE FACTS
683
-
684
- - Type Scale: {type_ratio} ({type_status})
685
- - Base Size: {base_size}px
686
- - AA Failures: {aa_failures}
687
- - Spacing Grid: {spacing_status}
688
- - Unique Colors: {unique_colors}
689
- - Consistency Score: {consistency_score}/100
690
-
691
- ## BENCHMARK COMPARISON
692
-
693
- Closest Match: {closest_benchmark}
694
- Match Percentage: {match_pct}%
695
- Recommended Changes: {benchmark_changes}
696
-
697
- ## BRAND IDENTIFICATION
698
-
699
- - Brand Primary: {brand_primary}
700
- - Brand Secondary: {brand_secondary}
701
- - Palette Cohesion: {cohesion_score}/10
702
-
703
- ## BEST PRACTICES VALIDATION
704
-
705
- Overall Score: {best_practices_score}/100
706
- Priority Fixes: {priority_fixes}
707
-
708
- ## ACCESSIBILITY FIXES NEEDED
709
-
710
- {accessibility_fixes}
711
-
712
- ## YOUR TASK
713
-
714
- Synthesize ALL the above into:
715
- 1. Executive Summary (2-3 sentences)
716
- 2. Overall Scores
717
- 3. Top 3 Priority Actions (with effort estimates)
718
- 4. Specific Color Recommendations (with accept/reject defaults)
719
- 5. Type Scale Recommendation
720
- 6. Spacing Recommendation
721
-
722
- ## OUTPUT FORMAT (JSON only)
723
-
724
- {{
725
- "executive_summary": "Your design system scores X/100. Key issues are Y. Priority action is Z.",
726
- "scores": {{
727
- "overall": <0-100>,
728
- "accessibility": <0-100>,
729
- "consistency": <0-100>,
730
- "organization": <0-100>
731
- }},
732
- "benchmark_fit": {{
733
- "closest": "<name>",
734
- "similarity": "<X%>",
735
- "recommendation": "Align type scale to 1.25"
736
- }},
737
- "brand_analysis": {{
738
- "primary": "#hex",
739
- "secondary": "#hex",
740
- "cohesion": <1-10>
741
- }},
742
- "top_3_actions": [
743
- {{"action": "Fix brand color AA", "impact": "high", "effort": "5 min", "details": "Change #X to #Y"}}
744
- ],
745
- "color_recommendations": [
746
- {{"role": "brand.primary", "current": "#06b2c4", "suggested": "#0891a8", "reason": "AA compliance", "accept": true}}
747
- ],
748
- "type_scale_recommendation": {{
749
- "current_ratio": 1.18,
750
- "recommended_ratio": 1.25,
751
- "reason": "Align with industry standard"
752
- }},
753
- "spacing_recommendation": {{
754
- "current": "mixed",
755
- "recommended": "8px",
756
- "reason": "Consistent grid improves maintainability"
757
- }}
758
- }}
759
-
760
- Return ONLY valid JSON."""
761
-
762
- def __init__(self, hf_client):
763
- self.hf_client = hf_client
764
-
765
- async def synthesize(
766
- self,
767
- rule_engine_results: Any,
768
- benchmark_comparisons: list,
769
- brand_identification: BrandIdentification,
770
- benchmark_advice: BenchmarkAdvice,
771
- best_practices: BestPracticesResult,
772
- log_callback: Callable = None,
773
- ) -> HeadSynthesis:
774
- """
775
- Synthesize all results into final recommendations.
776
- """
777
- def log(msg: str):
778
- if log_callback:
779
- log_callback(msg)
780
-
781
- log("")
782
- log("═" * 60)
783
- log("🧠 LAYER 4: HEAD SYNTHESIZER")
784
- log("═" * 60)
785
- log("")
786
- log(" Combining: Rule Engine + Benchmarks + Brand + Best Practices...")
787
-
788
- # Extract data
789
- typo = rule_engine_results.typography
790
- spacing = rule_engine_results.spacing
791
- color_stats = rule_engine_results.color_stats
792
- accessibility = rule_engine_results.accessibility
793
-
794
- failures = [a for a in accessibility if not a.passes_aa_normal]
795
- aa_fixes_str = "\n".join([
796
- f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
797
- for a in failures[:5] if a.suggested_fix
798
- ])
799
-
800
- closest = benchmark_comparisons[0] if benchmark_comparisons else None
801
-
802
- prompt = self.PROMPT_TEMPLATE.format(
803
- type_ratio=f"{typo.detected_ratio:.3f}",
804
- type_status="consistent" if typo.is_consistent else "inconsistent",
805
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
806
- aa_failures=len(failures),
807
- spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
808
- unique_colors=color_stats.unique_count,
809
- consistency_score=rule_engine_results.consistency_score,
810
- closest_benchmark=closest.benchmark.name if closest else "Unknown",
811
- match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
812
- benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:3]]),
813
- brand_primary=brand_identification.brand_primary.get("color", "Unknown"),
814
- brand_secondary=brand_identification.brand_secondary.get("color", "Unknown"),
815
- cohesion_score=brand_identification.cohesion_score,
816
- best_practices_score=best_practices.overall_score,
817
- priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:3]]),
818
- accessibility_fixes=aa_fixes_str or "None needed",
819
- )
820
-
821
- try:
822
- start_time = datetime.now()
823
-
824
- response = await self.hf_client.complete_async(
825
- agent_name="head_synthesizer",
826
- system_prompt="You are a senior design system architect specializing in synthesis and recommendations.",
827
- user_message=prompt,
828
- max_tokens=1000,
829
- json_mode=True,
830
- )
831
-
832
- duration = (datetime.now() - start_time).total_seconds()
833
-
834
- result = self._parse_response(response)
835
-
836
- log("")
837
- log(f" ✅ HEAD Synthesizer: COMPLETE ({duration:.1f}s)")
838
- log("")
839
-
840
- return result
841
-
842
- except Exception as e:
843
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
844
- return HeadSynthesis()
845
-
846
- def _parse_response(self, response: str) -> HeadSynthesis:
847
- """Parse LLM response into HeadSynthesis."""
848
- try:
849
- json_match = re.search(r'\{[\s\S]*\}', response)
850
- if json_match:
851
- data = json.loads(json_match.group())
852
- return HeadSynthesis(
853
- executive_summary=data.get("executive_summary", ""),
854
- scores=data.get("scores", {}),
855
- benchmark_fit=data.get("benchmark_fit", {}),
856
- brand_analysis=data.get("brand_analysis", {}),
857
- top_3_actions=data.get("top_3_actions", []),
858
- color_recommendations=data.get("color_recommendations", []),
859
- type_scale_recommendation=data.get("type_scale_recommendation", {}),
860
- spacing_recommendation=data.get("spacing_recommendation", {}),
861
- )
862
- except Exception:
863
- pass
864
-
865
- return HeadSynthesis()