riazmo commited on
Commit
88ef75f
·
verified ·
1 Parent(s): 8b2bf32

Delete core/llm_agents.py

Browse files
Files changed (1) hide show
  1. core/llm_agents.py +0 -905
core/llm_agents.py DELETED
@@ -1,905 +0,0 @@
1
- """
2
- Stage 2 LLM Agents — Specialized Analysis Tasks
3
- =================================================
4
-
5
- These agents handle tasks that REQUIRE LLM reasoning:
6
- - Brand Identifier: Identify brand colors from usage context
7
- - Benchmark Advisor: Recommend best-fit design system
8
- - Best Practices Validator: Prioritize fixes by business impact
9
- - HEAD Synthesizer: Combine all outputs into final recommendations
10
-
11
- Each agent has a focused prompt for its specific task.
12
- """
13
-
14
- import json
15
- import re
16
- from dataclasses import dataclass, field
17
- from typing import Optional, Callable, Any
18
- from datetime import datetime
19
-
20
-
21
- # =============================================================================
22
- # DATA CLASSES
23
- # =============================================================================
24
-
25
- @dataclass
26
- class BrandIdentification:
27
- """Results from Brand Identifier agent."""
28
- brand_primary: dict = field(default_factory=dict)
29
- # {color, confidence, reasoning, usage_count}
30
-
31
- brand_secondary: dict = field(default_factory=dict)
32
- brand_accent: dict = field(default_factory=dict)
33
-
34
- palette_strategy: str = "" # complementary, analogous, triadic, monochromatic, random
35
- cohesion_score: int = 5 # 1-10
36
- cohesion_notes: str = ""
37
-
38
- semantic_names: dict = field(default_factory=dict)
39
- # {hex_color: suggested_name}
40
-
41
- def to_dict(self) -> dict:
42
- return {
43
- "brand_primary": self.brand_primary,
44
- "brand_secondary": self.brand_secondary,
45
- "brand_accent": self.brand_accent,
46
- "palette_strategy": self.palette_strategy,
47
- "cohesion_score": self.cohesion_score,
48
- "cohesion_notes": self.cohesion_notes,
49
- "semantic_names": self.semantic_names,
50
- }
51
-
52
-
53
- @dataclass
54
- class BenchmarkAdvice:
55
- """Results from Benchmark Advisor agent."""
56
- recommended_benchmark: str = ""
57
- recommended_benchmark_name: str = ""
58
- reasoning: str = ""
59
-
60
- alignment_changes: list = field(default_factory=list)
61
- # [{change, from, to, effort}]
62
-
63
- pros_of_alignment: list = field(default_factory=list)
64
- cons_of_alignment: list = field(default_factory=list)
65
-
66
- alternative_benchmarks: list = field(default_factory=list)
67
- # [{name, reason}]
68
-
69
- def to_dict(self) -> dict:
70
- return {
71
- "recommended_benchmark": self.recommended_benchmark,
72
- "recommended_benchmark_name": self.recommended_benchmark_name,
73
- "reasoning": self.reasoning,
74
- "alignment_changes": self.alignment_changes,
75
- "pros": self.pros_of_alignment,
76
- "cons": self.cons_of_alignment,
77
- "alternatives": self.alternative_benchmarks,
78
- }
79
-
80
-
81
- @dataclass
82
- class BestPracticesResult:
83
- """Results from Best Practices Validator agent."""
84
- overall_score: int = 50 # 0-100
85
-
86
- checks: dict = field(default_factory=dict)
87
- # {check_name: {status: pass/warn/fail, note: str}}
88
-
89
- priority_fixes: list = field(default_factory=list)
90
- # [{rank, issue, impact, effort, action}]
91
-
92
- passing_practices: list = field(default_factory=list)
93
- failing_practices: list = field(default_factory=list)
94
-
95
- def to_dict(self) -> dict:
96
- return {
97
- "overall_score": self.overall_score,
98
- "checks": self.checks,
99
- "priority_fixes": self.priority_fixes,
100
- "passing": self.passing_practices,
101
- "failing": self.failing_practices,
102
- }
103
-
104
-
105
- @dataclass
106
- class HeadSynthesis:
107
- """Final synthesized output from HEAD agent."""
108
- executive_summary: str = ""
109
-
110
- scores: dict = field(default_factory=dict)
111
- # {overall, accessibility, consistency, organization}
112
-
113
- benchmark_fit: dict = field(default_factory=dict)
114
- # {closest, similarity, recommendation}
115
-
116
- brand_analysis: dict = field(default_factory=dict)
117
- # {primary, secondary, cohesion}
118
-
119
- top_3_actions: list = field(default_factory=list)
120
- # [{action, impact, effort, details}]
121
-
122
- color_recommendations: list = field(default_factory=list)
123
- # [{role, current, suggested, reason, accept}]
124
-
125
- type_scale_recommendation: dict = field(default_factory=dict)
126
- spacing_recommendation: dict = field(default_factory=dict)
127
-
128
- def to_dict(self) -> dict:
129
- return {
130
- "executive_summary": self.executive_summary,
131
- "scores": self.scores,
132
- "benchmark_fit": self.benchmark_fit,
133
- "brand_analysis": self.brand_analysis,
134
- "top_3_actions": self.top_3_actions,
135
- "color_recommendations": self.color_recommendations,
136
- "type_scale_recommendation": self.type_scale_recommendation,
137
- "spacing_recommendation": self.spacing_recommendation,
138
- }
139
-
140
-
141
- # =============================================================================
142
- # BRAND IDENTIFIER AGENT
143
- # =============================================================================
144
-
145
- class BrandIdentifierAgent:
146
- """
147
- Identifies brand colors from usage context.
148
-
149
- WHY LLM: Requires understanding context (33 buttons = likely brand primary),
150
- not just color math.
151
- """
152
-
153
- PROMPT_TEMPLATE = """You are a senior design system analyst. Identify the brand colors from this color usage data.
154
-
155
- ## COLOR DATA WITH USAGE CONTEXT
156
-
157
- {color_data}
158
-
159
- ## SEMANTIC ANALYSIS (from CSS properties)
160
-
161
- {semantic_analysis}
162
-
163
- ## YOUR TASK
164
-
165
- 1. **Identify Brand Colors**:
166
- - Brand Primary: The main action/CTA color (highest visibility)
167
- - Brand Secondary: Supporting brand color
168
- - Brand Accent: Highlight color for emphasis
169
-
170
- 2. **Assess Palette Strategy**:
171
- - Is it complementary, analogous, triadic, monochromatic, or random?
172
-
173
- 3. **Rate Cohesion** (1-10):
174
- - Do the colors work together?
175
- - Is there a clear color story?
176
-
177
- 4. **Suggest Semantic Names** for top 10 most-used colors
178
-
179
- ## OUTPUT FORMAT (JSON only)
180
-
181
- {{
182
- "brand_primary": {{
183
- "color": "#hex",
184
- "confidence": "high|medium|low",
185
- "reasoning": "Why this is brand primary",
186
- "usage_count": <number>
187
- }},
188
- "brand_secondary": {{
189
- "color": "#hex",
190
- "confidence": "high|medium|low",
191
- "reasoning": "..."
192
- }},
193
- "brand_accent": {{
194
- "color": "#hex or null",
195
- "confidence": "...",
196
- "reasoning": "..."
197
- }},
198
- "palette_strategy": "complementary|analogous|triadic|monochromatic|random",
199
- "cohesion_score": <1-10>,
200
- "cohesion_notes": "Assessment of how well colors work together",
201
- "semantic_names": {{
202
- "#hex1": "brand.primary",
203
- "#hex2": "text.primary",
204
- "#hex3": "background.primary"
205
- }}
206
- }}
207
-
208
- Return ONLY valid JSON."""
209
-
210
- def __init__(self, hf_client):
211
- self.hf_client = hf_client
212
-
213
- async def analyze(
214
- self,
215
- color_tokens: dict,
216
- semantic_analysis: dict,
217
- log_callback: Callable = None,
218
- ) -> BrandIdentification:
219
- """
220
- Identify brand colors from usage context.
221
-
222
- Args:
223
- color_tokens: Dict of color tokens with usage data
224
- semantic_analysis: Semantic categorization from Stage 1
225
- log_callback: Progress logging function
226
-
227
- Returns:
228
- BrandIdentification with identified colors
229
- """
230
- def log(msg: str):
231
- if log_callback:
232
- log_callback(msg)
233
-
234
- log(" 🎨 Brand Identifier (Llama 70B)")
235
- log(" └─ Analyzing color context and usage patterns...")
236
-
237
- # Format color data
238
- color_data = self._format_color_data(color_tokens)
239
- semantic_str = self._format_semantic_analysis(semantic_analysis)
240
-
241
- prompt = self.PROMPT_TEMPLATE.format(
242
- color_data=color_data,
243
- semantic_analysis=semantic_str,
244
- )
245
-
246
- try:
247
- start_time = datetime.now()
248
-
249
- # Use the correct method signature
250
- response = await self.hf_client.complete_async(
251
- agent_name="brand_identifier",
252
- system_prompt="You are a senior design system analyst specializing in brand color identification.",
253
- user_message=prompt,
254
- max_tokens=800,
255
- json_mode=True,
256
- )
257
-
258
- duration = (datetime.now() - start_time).total_seconds()
259
-
260
- # Parse response
261
- result = self._parse_response(response)
262
-
263
- log(f" ────────────────────────────────────────────────")
264
- log(f" 🎨 Brand Identifier: COMPLETE ({duration:.1f}s)")
265
- log(f" ├─ Brand Primary: {result.brand_primary.get('color', '?')} ({result.brand_primary.get('confidence', '?')} confidence)")
266
- log(f" ├─ Brand Secondary: {result.brand_secondary.get('color', '?')}")
267
- log(f" ├─ Palette Strategy: {result.palette_strategy}")
268
- log(f" └─ Cohesion Score: {result.cohesion_score}/10")
269
-
270
- return result
271
-
272
- except Exception as e:
273
- error_msg = str(e)
274
- # Parse common HF errors
275
- if "Rate limit" in error_msg or "429" in error_msg:
276
- log(f" ⚠️ Rate limited - HF free tier exhausted")
277
- elif "Request ID:" in error_msg:
278
- log(f" ⚠️ HF API error (check token/model)")
279
- else:
280
- log(f" ⚠️ Error: {error_msg[:60]}")
281
- return BrandIdentification()
282
-
283
- def _format_color_data(self, color_tokens: dict) -> str:
284
- """Format color tokens for prompt."""
285
- lines = []
286
- for name, token in list(color_tokens.items())[:30]:
287
- if isinstance(token, dict):
288
- hex_val = token.get("value", token.get("hex", ""))
289
- usage = token.get("usage_count", token.get("count", 1))
290
- context = token.get("context", token.get("css_property", ""))
291
- else:
292
- hex_val = getattr(token, "value", "")
293
- usage = getattr(token, "usage_count", 1)
294
- context = getattr(token, "context", "")
295
-
296
- if hex_val:
297
- lines.append(f"- {hex_val}: used {usage}x, context: {context or 'unknown'}")
298
-
299
- return "\n".join(lines) if lines else "No color data available"
300
-
301
- def _format_semantic_analysis(self, semantic: dict) -> str:
302
- """Format semantic analysis for prompt."""
303
- if not semantic:
304
- return "No semantic analysis available"
305
-
306
- lines = []
307
- try:
308
- for category, value in semantic.items():
309
- if not value:
310
- continue
311
-
312
- if isinstance(value, list):
313
- # List of colors
314
- color_list = []
315
- for c in value[:5]:
316
- if isinstance(c, dict):
317
- color_list.append(c.get("hex", c.get("value", str(c))))
318
- else:
319
- color_list.append(str(c))
320
- lines.append(f"- {category}: {', '.join(color_list)}")
321
-
322
- elif isinstance(value, dict):
323
- # Could be a nested dict of sub-roles → color dicts
324
- # e.g. {"primary": {"hex": "#007bff", ...}, "secondary": {...}}
325
- # or a flat color dict {"hex": "#...", "confidence": "..."}
326
- # or a summary dict {"total_colors_analyzed": 50, ...}
327
- if "hex" in value:
328
- # Flat color dict
329
- lines.append(f"- {category}: {value['hex']}")
330
- else:
331
- # Nested dict — iterate sub-roles
332
- sub_items = []
333
- for sub_role, sub_val in list(value.items())[:5]:
334
- if isinstance(sub_val, dict) and "hex" in sub_val:
335
- sub_items.append(f"{sub_role}={sub_val['hex']}")
336
- elif isinstance(sub_val, (str, int, float, bool)):
337
- sub_items.append(f"{sub_role}={sub_val}")
338
- if sub_items:
339
- lines.append(f"- {category}: {', '.join(sub_items)}")
340
- else:
341
- lines.append(f"- {category}: {value}")
342
- except Exception as e:
343
- return f"Error formatting semantic analysis: {str(e)[:50]}"
344
-
345
- return "\n".join(lines) if lines else "No semantic analysis available"
346
-
347
- def _parse_response(self, response: str) -> BrandIdentification:
348
- """Parse LLM response into BrandIdentification."""
349
- try:
350
- json_match = re.search(r'\{[\s\S]*\}', response)
351
- if json_match:
352
- data = json.loads(json_match.group())
353
- return BrandIdentification(
354
- brand_primary=data.get("brand_primary", {}),
355
- brand_secondary=data.get("brand_secondary", {}),
356
- brand_accent=data.get("brand_accent", {}),
357
- palette_strategy=data.get("palette_strategy", "unknown"),
358
- cohesion_score=data.get("cohesion_score", 5),
359
- cohesion_notes=data.get("cohesion_notes", ""),
360
- semantic_names=data.get("semantic_names", {}),
361
- )
362
- except Exception:
363
- pass
364
-
365
- return BrandIdentification()
366
-
367
-
368
- # =============================================================================
369
- # BENCHMARK ADVISOR AGENT
370
- # =============================================================================
371
-
372
- class BenchmarkAdvisorAgent:
373
- """
374
- Recommends best-fit design system based on comparison data.
375
-
376
- WHY LLM: Requires reasoning about trade-offs and use-case fit,
377
- not just similarity scores.
378
- """
379
-
380
- PROMPT_TEMPLATE = """You are a senior design system consultant. Recommend the best design system alignment.
381
-
382
- ## USER'S CURRENT VALUES
383
-
384
- - Type Scale Ratio: {user_ratio}
385
- - Base Font Size: {user_base}px
386
- - Spacing Grid: {user_spacing}px
387
-
388
- ## BENCHMARK COMPARISON
389
-
390
- {benchmark_comparison}
391
-
392
- ## YOUR TASK
393
-
394
- 1. **Recommend Best Fit**: Which design system should they align with?
395
- 2. **Explain Why**: Consider similarity scores AND use-case fit
396
- 3. **List Changes Needed**: What would they need to change to align?
397
- 4. **Pros/Cons**: Benefits and drawbacks of alignment
398
-
399
- ## OUTPUT FORMAT (JSON only)
400
-
401
- {{
402
- "recommended_benchmark": "<system_key>",
403
- "recommended_benchmark_name": "<full name>",
404
- "reasoning": "Why this is the best fit for their use case",
405
- "alignment_changes": [
406
- {{"change": "Type scale", "from": "1.18", "to": "1.25", "effort": "medium"}},
407
- {{"change": "Spacing grid", "from": "mixed", "to": "4px", "effort": "high"}}
408
- ],
409
- "pros_of_alignment": [
410
- "Familiar patterns for users",
411
- "Well-tested accessibility"
412
- ],
413
- "cons_of_alignment": [
414
- "May lose brand uniqueness"
415
- ],
416
- "alternative_benchmarks": [
417
- {{"name": "Material Design 3", "reason": "Good for Android-first products"}}
418
- ]
419
- }}
420
-
421
- Return ONLY valid JSON."""
422
-
423
- def __init__(self, hf_client):
424
- self.hf_client = hf_client
425
-
426
- async def analyze(
427
- self,
428
- user_ratio: float,
429
- user_base: int,
430
- user_spacing: int,
431
- benchmark_comparisons: list,
432
- log_callback: Callable = None,
433
- ) -> BenchmarkAdvice:
434
- """
435
- Recommend best-fit design system.
436
-
437
- Args:
438
- user_ratio: User's detected type scale ratio
439
- user_base: User's base font size
440
- user_spacing: User's spacing grid base
441
- benchmark_comparisons: List of BenchmarkComparison objects
442
- log_callback: Progress logging function
443
-
444
- Returns:
445
- BenchmarkAdvice with recommendations
446
- """
447
- def log(msg: str):
448
- if log_callback:
449
- log_callback(msg)
450
-
451
- log("")
452
- log(" 🏢 Benchmark Advisor (Qwen 72B)")
453
- log(" └─ Evaluating benchmark fit for your use case...")
454
-
455
- # Format comparison data
456
- comparison_str = self._format_comparisons(benchmark_comparisons)
457
-
458
- prompt = self.PROMPT_TEMPLATE.format(
459
- user_ratio=user_ratio,
460
- user_base=user_base,
461
- user_spacing=user_spacing,
462
- benchmark_comparison=comparison_str,
463
- )
464
-
465
- try:
466
- start_time = datetime.now()
467
-
468
- response = await self.hf_client.complete_async(
469
- agent_name="benchmark_advisor",
470
- system_prompt="You are a senior design system consultant specializing in design system architecture.",
471
- user_message=prompt,
472
- max_tokens=700,
473
- json_mode=True,
474
- )
475
-
476
- duration = (datetime.now() - start_time).total_seconds()
477
-
478
- result = self._parse_response(response)
479
-
480
- log(f" ────────────────────────────────────────────────")
481
- log(f" 🏢 Benchmark Advisor: COMPLETE ({duration:.1f}s)")
482
- log(f" ├─ Recommended: {result.recommended_benchmark_name}")
483
- log(f" ├─ Changes Needed: {len(result.alignment_changes)}")
484
- log(f" └─ Key Change: {result.alignment_changes[0].get('change', 'N/A') if result.alignment_changes else 'None'}")
485
-
486
- return result
487
-
488
- except Exception as e:
489
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
490
- return BenchmarkAdvice()
491
-
492
- def _format_comparisons(self, comparisons: list) -> str:
493
- """Format benchmark comparisons for prompt."""
494
- lines = []
495
- for i, c in enumerate(comparisons[:5]):
496
- b = c.benchmark
497
- lines.append(f"""
498
- {i+1}. {b.icon} {b.name}
499
- - Similarity Score: {c.similarity_score:.2f} (lower = better)
500
- - Match: {c.overall_match_pct:.0f}%
501
- - Type Ratio: {b.typography.get('scale_ratio', '?')} (diff: {c.type_ratio_diff:.3f})
502
- - Base Size: {b.typography.get('base_size', '?')}px (diff: {c.base_size_diff})
503
- - Spacing: {b.spacing.get('base', '?')}px (diff: {c.spacing_grid_diff})
504
- - Best For: {', '.join(b.best_for)}""")
505
-
506
- return "\n".join(lines)
507
-
508
- def _parse_response(self, response: str) -> BenchmarkAdvice:
509
- """Parse LLM response into BenchmarkAdvice."""
510
- try:
511
- json_match = re.search(r'\{[\s\S]*\}', response)
512
- if json_match:
513
- data = json.loads(json_match.group())
514
- return BenchmarkAdvice(
515
- recommended_benchmark=data.get("recommended_benchmark", ""),
516
- recommended_benchmark_name=data.get("recommended_benchmark_name", ""),
517
- reasoning=data.get("reasoning", ""),
518
- alignment_changes=data.get("alignment_changes", []),
519
- pros_of_alignment=data.get("pros_of_alignment", []),
520
- cons_of_alignment=data.get("cons_of_alignment", []),
521
- alternative_benchmarks=data.get("alternative_benchmarks", []),
522
- )
523
- except Exception:
524
- pass
525
-
526
- return BenchmarkAdvice()
527
-
528
-
529
- # =============================================================================
530
- # BEST PRACTICES VALIDATOR AGENT
531
- # =============================================================================
532
-
533
- class BestPracticesValidatorAgent:
534
- """
535
- Validates against design system best practices and prioritizes fixes.
536
-
537
- WHY LLM: Prioritization requires judgment about business impact,
538
- not just checking boxes.
539
- """
540
-
541
- PROMPT_TEMPLATE = """You are a design system auditor. Validate these tokens against best practices.
542
-
543
- ## RULE ENGINE ANALYSIS RESULTS
544
-
545
- ### Typography
546
- - Detected Ratio: {type_ratio} ({type_consistent})
547
- - Base Size: {base_size}px
548
- - Recommendation: {type_recommendation}
549
-
550
- ### Accessibility
551
- - Total Colors: {total_colors}
552
- - AA Pass: {aa_pass}
553
- - AA Fail: {aa_fail}
554
- - Failing Colors: {failing_colors}
555
-
556
- ### Spacing
557
- - Detected Base: {spacing_base}px
558
- - Grid Aligned: {spacing_aligned}%
559
- - Recommendation: {spacing_recommendation}px
560
-
561
- ### Color Statistics
562
- - Unique Colors: {unique_colors}
563
- - Duplicates: {duplicates}
564
- - Near-Duplicates: {near_duplicates}
565
-
566
- ## BEST PRACTICES CHECKLIST
567
-
568
- 1. Type scale uses standard ratio (1.2, 1.25, 1.333, 1.5, 1.618)
569
- 2. Type scale is consistent (variance < 0.15)
570
- 3. Base font size >= 16px (accessibility)
571
- 4. Line height >= 1.5 for body text
572
- 5. All interactive colors pass AA (4.5:1)
573
- 6. Spacing uses consistent grid (4px or 8px)
574
- 7. Limited color palette (< 20 unique semantic colors)
575
- 8. No near-duplicate colors
576
-
577
- ## YOUR TASK
578
-
579
- 1. Score each practice: pass/warn/fail
580
- 2. Calculate overall score (0-100)
581
- 3. Identify TOP 3 priority fixes with impact assessment
582
-
583
- ## OUTPUT FORMAT (JSON only)
584
-
585
- {{
586
- "overall_score": <0-100>,
587
- "checks": {{
588
- "type_scale_standard": {{"status": "pass|warn|fail", "note": "..."}},
589
- "type_scale_consistent": {{"status": "...", "note": "..."}},
590
- "base_size_accessible": {{"status": "...", "note": "..."}},
591
- "aa_compliance": {{"status": "...", "note": "..."}},
592
- "spacing_grid": {{"status": "...", "note": "..."}},
593
- "color_count": {{"status": "...", "note": "..."}}
594
- }},
595
- "priority_fixes": [
596
- {{
597
- "rank": 1,
598
- "issue": "Brand primary fails AA",
599
- "impact": "high|medium|low",
600
- "effort": "low|medium|high",
601
- "action": "Change #06b2c4 → #0891a8"
602
- }}
603
- ],
604
- "passing_practices": ["Base font size", "..."],
605
- "failing_practices": ["AA compliance", "..."]
606
- }}
607
-
608
- Return ONLY valid JSON."""
609
-
610
- def __init__(self, hf_client):
611
- self.hf_client = hf_client
612
-
613
- async def analyze(
614
- self,
615
- rule_engine_results: Any,
616
- log_callback: Callable = None,
617
- ) -> BestPracticesResult:
618
- """
619
- Validate against best practices.
620
-
621
- Args:
622
- rule_engine_results: Results from rule engine
623
- log_callback: Progress logging function
624
-
625
- Returns:
626
- BestPracticesResult with validation
627
- """
628
- def log(msg: str):
629
- if log_callback:
630
- log_callback(msg)
631
-
632
- log("")
633
- log(" ✅ Best Practices Validator (Qwen 72B)")
634
- log(" └─ Checking against design system standards...")
635
-
636
- # Extract data from rule engine
637
- typo = rule_engine_results.typography
638
- spacing = rule_engine_results.spacing
639
- color_stats = rule_engine_results.color_stats
640
- accessibility = rule_engine_results.accessibility
641
-
642
- failures = [a for a in accessibility if not a.passes_aa_normal]
643
- failing_colors_str = ", ".join([f"{a.hex_color} ({a.contrast_on_white:.1f}:1)" for a in failures[:5]])
644
-
645
- prompt = self.PROMPT_TEMPLATE.format(
646
- type_ratio=f"{typo.detected_ratio:.3f}",
647
- type_consistent="consistent" if typo.is_consistent else f"inconsistent, variance={typo.variance:.2f}",
648
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
649
- type_recommendation=f"{typo.recommendation} ({typo.recommendation_name})",
650
- total_colors=len(accessibility),
651
- aa_pass=len(accessibility) - len(failures),
652
- aa_fail=len(failures),
653
- failing_colors=failing_colors_str or "None",
654
- spacing_base=spacing.detected_base,
655
- spacing_aligned=f"{spacing.alignment_percentage:.0f}",
656
- spacing_recommendation=spacing.recommendation,
657
- unique_colors=color_stats.unique_count,
658
- duplicates=color_stats.duplicate_count,
659
- near_duplicates=len(color_stats.near_duplicates),
660
- )
661
-
662
- try:
663
- start_time = datetime.now()
664
-
665
- response = await self.hf_client.complete_async(
666
- agent_name="best_practices_validator",
667
- system_prompt="You are a design system auditor specializing in best practices validation.",
668
- user_message=prompt,
669
- max_tokens=800,
670
- json_mode=True,
671
- )
672
-
673
- duration = (datetime.now() - start_time).total_seconds()
674
-
675
- result = self._parse_response(response)
676
-
677
- log(f" ────────────────────────────────────────────────")
678
- log(f" ✅ Best Practices: COMPLETE ({duration:.1f}s)")
679
- log(f" ├─ Overall Score: {result.overall_score}/100")
680
- log(f" ├─ Passing: {len(result.passing_practices)} | Failing: {len(result.failing_practices)}")
681
- if result.priority_fixes:
682
- log(f" └─ Top Fix: {result.priority_fixes[0].get('issue', 'N/A')}")
683
-
684
- return result
685
-
686
- except Exception as e:
687
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
688
- return BestPracticesResult()
689
-
690
- def _parse_response(self, response: str) -> BestPracticesResult:
691
- """Parse LLM response into BestPracticesResult."""
692
- try:
693
- json_match = re.search(r'\{[\s\S]*\}', response)
694
- if json_match:
695
- data = json.loads(json_match.group())
696
- return BestPracticesResult(
697
- overall_score=data.get("overall_score", 50),
698
- checks=data.get("checks", {}),
699
- priority_fixes=data.get("priority_fixes", []),
700
- passing_practices=data.get("passing_practices", []),
701
- failing_practices=data.get("failing_practices", []),
702
- )
703
- except Exception:
704
- pass
705
-
706
- return BestPracticesResult()
707
-
708
-
709
- # =============================================================================
710
- # HEAD SYNTHESIZER AGENT
711
- # =============================================================================
712
-
713
- class HeadSynthesizerAgent:
714
- """
715
- Combines all agent outputs into final recommendations.
716
-
717
- This is the final step that produces actionable output for the user.
718
- """
719
-
720
- PROMPT_TEMPLATE = """You are a senior design system architect. Synthesize these analysis results into final recommendations.
721
-
722
- ## RULE ENGINE FACTS
723
-
724
- - Type Scale: {type_ratio} ({type_status})
725
- - Base Size: {base_size}px
726
- - AA Failures: {aa_failures}
727
- - Spacing Grid: {spacing_status}
728
- - Unique Colors: {unique_colors}
729
- - Consistency Score: {consistency_score}/100
730
-
731
- ## BENCHMARK COMPARISON
732
-
733
- Closest Match: {closest_benchmark}
734
- Match Percentage: {match_pct}%
735
- Recommended Changes: {benchmark_changes}
736
-
737
- ## BRAND IDENTIFICATION
738
-
739
- - Brand Primary: {brand_primary}
740
- - Brand Secondary: {brand_secondary}
741
- - Palette Cohesion: {cohesion_score}/10
742
-
743
- ## BEST PRACTICES VALIDATION
744
-
745
- Overall Score: {best_practices_score}/100
746
- Priority Fixes: {priority_fixes}
747
-
748
- ## ACCESSIBILITY FIXES NEEDED
749
-
750
- {accessibility_fixes}
751
-
752
- ## YOUR TASK
753
-
754
- Synthesize ALL the above into:
755
- 1. Executive Summary (2-3 sentences)
756
- 2. Overall Scores
757
- 3. Top 3 Priority Actions (with effort estimates)
758
- 4. Specific Color Recommendations (with accept/reject defaults)
759
- 5. Type Scale Recommendation
760
- 6. Spacing Recommendation
761
-
762
- ## OUTPUT FORMAT (JSON only)
763
-
764
- {{
765
- "executive_summary": "Your design system scores X/100. Key issues are Y. Priority action is Z.",
766
- "scores": {{
767
- "overall": <0-100>,
768
- "accessibility": <0-100>,
769
- "consistency": <0-100>,
770
- "organization": <0-100>
771
- }},
772
- "benchmark_fit": {{
773
- "closest": "<name>",
774
- "similarity": "<X%>",
775
- "recommendation": "Align type scale to 1.25"
776
- }},
777
- "brand_analysis": {{
778
- "primary": "#hex",
779
- "secondary": "#hex",
780
- "cohesion": <1-10>
781
- }},
782
- "top_3_actions": [
783
- {{"action": "Fix brand color AA", "impact": "high", "effort": "5 min", "details": "Change #X to #Y"}}
784
- ],
785
- "color_recommendations": [
786
- {{"role": "brand.primary", "current": "#06b2c4", "suggested": "#0891a8", "reason": "AA compliance", "accept": true}}
787
- ],
788
- "type_scale_recommendation": {{
789
- "current_ratio": 1.18,
790
- "recommended_ratio": 1.25,
791
- "reason": "Align with industry standard"
792
- }},
793
- "spacing_recommendation": {{
794
- "current": "mixed",
795
- "recommended": "8px",
796
- "reason": "Consistent grid improves maintainability"
797
- }}
798
- }}
799
-
800
- Return ONLY valid JSON."""
801
-
802
- def __init__(self, hf_client):
803
- self.hf_client = hf_client
804
-
805
- async def synthesize(
806
- self,
807
- rule_engine_results: Any,
808
- benchmark_comparisons: list,
809
- brand_identification: BrandIdentification,
810
- benchmark_advice: BenchmarkAdvice,
811
- best_practices: BestPracticesResult,
812
- log_callback: Callable = None,
813
- ) -> HeadSynthesis:
814
- """
815
- Synthesize all results into final recommendations.
816
- """
817
- def log(msg: str):
818
- if log_callback:
819
- log_callback(msg)
820
-
821
- log("")
822
- log("═" * 60)
823
- log("🧠 LAYER 4: HEAD SYNTHESIZER")
824
- log("═" * 60)
825
- log("")
826
- log(" Combining: Rule Engine + Benchmarks + Brand + Best Practices...")
827
-
828
- # Extract data
829
- typo = rule_engine_results.typography
830
- spacing = rule_engine_results.spacing
831
- color_stats = rule_engine_results.color_stats
832
- accessibility = rule_engine_results.accessibility
833
-
834
- failures = [a for a in accessibility if not a.passes_aa_normal]
835
- aa_fixes_str = "\n".join([
836
- f"- {a.name}: {a.hex_color} ({a.contrast_on_white:.1f}:1) → {a.suggested_fix} ({a.suggested_fix_contrast:.1f}:1)"
837
- for a in failures[:5] if a.suggested_fix
838
- ])
839
-
840
- closest = benchmark_comparisons[0] if benchmark_comparisons else None
841
-
842
- prompt = self.PROMPT_TEMPLATE.format(
843
- type_ratio=f"{typo.detected_ratio:.3f}",
844
- type_status="consistent" if typo.is_consistent else "inconsistent",
845
- base_size=typo.sizes_px[0] if typo.sizes_px else 16,
846
- aa_failures=len(failures),
847
- spacing_status=f"{spacing.detected_base}px, {spacing.alignment_percentage:.0f}% aligned",
848
- unique_colors=color_stats.unique_count,
849
- consistency_score=rule_engine_results.consistency_score,
850
- closest_benchmark=closest.benchmark.name if closest else "Unknown",
851
- match_pct=f"{closest.overall_match_pct:.0f}" if closest else "0",
852
- benchmark_changes="; ".join([c.get("change", "") for c in benchmark_advice.alignment_changes[:3]]),
853
- brand_primary=brand_identification.brand_primary.get("color", "Unknown"),
854
- brand_secondary=brand_identification.brand_secondary.get("color", "Unknown"),
855
- cohesion_score=brand_identification.cohesion_score,
856
- best_practices_score=best_practices.overall_score,
857
- priority_fixes="; ".join([f.get("issue", "") for f in best_practices.priority_fixes[:3]]),
858
- accessibility_fixes=aa_fixes_str or "None needed",
859
- )
860
-
861
- try:
862
- start_time = datetime.now()
863
-
864
- response = await self.hf_client.complete_async(
865
- agent_name="head_synthesizer",
866
- system_prompt="You are a senior design system architect specializing in synthesis and recommendations.",
867
- user_message=prompt,
868
- max_tokens=1000,
869
- json_mode=True,
870
- )
871
-
872
- duration = (datetime.now() - start_time).total_seconds()
873
-
874
- result = self._parse_response(response)
875
-
876
- log("")
877
- log(f" ✅ HEAD Synthesizer: COMPLETE ({duration:.1f}s)")
878
- log("")
879
-
880
- return result
881
-
882
- except Exception as e:
883
- log(f" ├─ ⚠️ Error: {str(e)[:50]}")
884
- return HeadSynthesis()
885
-
886
- def _parse_response(self, response: str) -> HeadSynthesis:
887
- """Parse LLM response into HeadSynthesis."""
888
- try:
889
- json_match = re.search(r'\{[\s\S]*\}', response)
890
- if json_match:
891
- data = json.loads(json_match.group())
892
- return HeadSynthesis(
893
- executive_summary=data.get("executive_summary", ""),
894
- scores=data.get("scores", {}),
895
- benchmark_fit=data.get("benchmark_fit", {}),
896
- brand_analysis=data.get("brand_analysis", {}),
897
- top_3_actions=data.get("top_3_actions", []),
898
- color_recommendations=data.get("color_recommendations", []),
899
- type_scale_recommendation=data.get("type_scale_recommendation", {}),
900
- spacing_recommendation=data.get("spacing_recommendation", {}),
901
- )
902
- except Exception:
903
- pass
904
-
905
- return HeadSynthesis()