riazmo commited on
Commit
45326f9
·
verified ·
1 Parent(s): a19099e

Delete stage2_graph.py

Browse files
Files changed (1) hide show
  1. stage2_graph.py +0 -990
stage2_graph.py DELETED
@@ -1,990 +0,0 @@
1
- """
2
- Stage 2 Multi-Agent Analysis Workflow (LangGraph)
3
-
4
- Architecture:
5
- ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
6
- │ LLM 1 │ │ LLM 2 │ │ Rule Engine │
7
- │ (Qwen) │ │ (Llama) │ │ (No LLM) │
8
- └──────┬──────┘ └──────┬──────┘ └──────┬──────┘
9
- │ │ │
10
- │ PARALLEL │ │
11
- └───────────────────┼───────────────────┘
12
-
13
-
14
- ┌─────────────────┐
15
- │ HEAD │
16
- │ (Compiler) │
17
- └─────────────────┘
18
- """
19
-
20
- import asyncio
21
- import json
22
- import os
23
- import time
24
- import yaml
25
- from dataclasses import dataclass, field
26
- from datetime import datetime
27
- from typing import Any, Callable, Optional
28
-
29
- from langgraph.graph import END, START, StateGraph
30
- from typing_extensions import TypedDict
31
-
32
- # =============================================================================
33
- # CONFIGURATION LOADING
34
- # =============================================================================
35
-
36
- def load_agent_config() -> dict:
37
- """Load agent configuration from YAML."""
38
- config_path = os.path.join(os.path.dirname(__file__), "..", "config", "agents.yaml")
39
- if os.path.exists(config_path):
40
- with open(config_path, 'r') as f:
41
- return yaml.safe_load(f)
42
- return {}
43
-
44
-
45
- # =============================================================================
46
- # STATE DEFINITION
47
- # =============================================================================
48
-
49
- class Stage2State(TypedDict):
50
- """State for Stage 2 multi-agent analysis."""
51
-
52
- # Inputs
53
- desktop_tokens: dict
54
- mobile_tokens: dict
55
- competitors: list[str]
56
-
57
- # Parallel analysis outputs
58
- llm1_analysis: Optional[dict]
59
- llm2_analysis: Optional[dict]
60
- rule_calculations: Optional[dict]
61
-
62
- # HEAD output
63
- final_recommendations: Optional[dict]
64
-
65
- # Metadata
66
- analysis_log: list[str]
67
- cost_tracking: dict
68
- errors: list[str]
69
-
70
- # Timing
71
- start_time: float
72
- llm1_time: float
73
- llm2_time: float
74
- head_time: float
75
-
76
-
77
- # =============================================================================
78
- # COST TRACKING
79
- # =============================================================================
80
-
81
- @dataclass
82
- class CostTracker:
83
- """Track LLM costs during analysis."""
84
-
85
- total_input_tokens: int = 0
86
- total_output_tokens: int = 0
87
- total_cost: float = 0.0
88
- calls: list = field(default_factory=list)
89
-
90
- def add_call(self, agent_name: str, model: str, input_tokens: int, output_tokens: int,
91
- cost_per_m_input: float, cost_per_m_output: float, duration: float):
92
- """Record an LLM call."""
93
- input_cost = (input_tokens / 1_000_000) * cost_per_m_input
94
- output_cost = (output_tokens / 1_000_000) * cost_per_m_output
95
- total_cost = input_cost + output_cost
96
-
97
- self.total_input_tokens += input_tokens
98
- self.total_output_tokens += output_tokens
99
- self.total_cost += total_cost
100
-
101
- self.calls.append({
102
- "agent": agent_name,
103
- "model": model,
104
- "input_tokens": input_tokens,
105
- "output_tokens": output_tokens,
106
- "cost": total_cost,
107
- "duration": duration,
108
- })
109
-
110
- def to_dict(self) -> dict:
111
- return {
112
- "total_input_tokens": self.total_input_tokens,
113
- "total_output_tokens": self.total_output_tokens,
114
- "total_cost": round(self.total_cost, 6),
115
- "calls": self.calls,
116
- }
117
-
118
-
119
- # Global cost tracker
120
- cost_tracker = CostTracker()
121
-
122
-
123
- # =============================================================================
124
- # LLM CLIENT
125
- # =============================================================================
126
-
127
- async def call_llm(
128
- agent_name: str,
129
- model: str,
130
- provider: str,
131
- prompt: str,
132
- max_tokens: int = 1500,
133
- temperature: float = 0.4,
134
- cost_per_m_input: float = 0.5,
135
- cost_per_m_output: float = 0.5,
136
- log_callback: Optional[Callable] = None,
137
- ) -> tuple[str, int, int]:
138
- """Call LLM via HuggingFace Inference Providers."""
139
-
140
- start_time = time.time()
141
-
142
- if log_callback:
143
- log_callback(f" 🚀 {agent_name}: Calling {model} via {provider}...")
144
-
145
- try:
146
- from huggingface_hub import InferenceClient
147
-
148
- hf_token = os.environ.get("HF_TOKEN")
149
- if not hf_token:
150
- raise ValueError("HF_TOKEN not set")
151
-
152
- # Initialize client with provider
153
- # Provider is set at client level, not per-call
154
- client = InferenceClient(
155
- token=hf_token,
156
- provider=provider,
157
- )
158
-
159
- # Call without provider argument (it's set at client level)
160
- response = client.chat_completion(
161
- model=model,
162
- messages=[{"role": "user", "content": prompt}],
163
- max_tokens=max_tokens,
164
- temperature=temperature,
165
- )
166
-
167
- # Extract response
168
- content = response.choices[0].message.content
169
-
170
- # Estimate tokens (rough)
171
- input_tokens = len(prompt.split()) * 1.3 # Rough estimate
172
- output_tokens = len(content.split()) * 1.3
173
-
174
- duration = time.time() - start_time
175
-
176
- # Track cost
177
- cost_tracker.add_call(
178
- agent_name=agent_name,
179
- model=model,
180
- input_tokens=int(input_tokens),
181
- output_tokens=int(output_tokens),
182
- cost_per_m_input=cost_per_m_input,
183
- cost_per_m_output=cost_per_m_output,
184
- duration=duration,
185
- )
186
-
187
- if log_callback:
188
- est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
189
- (output_tokens / 1_000_000) * cost_per_m_output)
190
- log_callback(f" ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
191
- log_callback(f" 💵 Est. cost: ${est_cost:.4f}")
192
-
193
- return content, int(input_tokens), int(output_tokens)
194
-
195
- except TypeError as e:
196
- # Fallback: If provider argument not supported, try model:provider format
197
- if "provider" in str(e):
198
- if log_callback:
199
- log_callback(f" ⚠️ {agent_name}: Trying model:provider format...")
200
-
201
- from huggingface_hub import InferenceClient
202
-
203
- hf_token = os.environ.get("HF_TOKEN")
204
- client = InferenceClient(token=hf_token)
205
-
206
- # Try appending provider to model name
207
- model_with_provider = f"{model}:{provider}"
208
-
209
- try:
210
- response = client.chat_completion(
211
- model=model_with_provider,
212
- messages=[{"role": "user", "content": prompt}],
213
- max_tokens=max_tokens,
214
- temperature=temperature,
215
- )
216
-
217
- content = response.choices[0].message.content
218
- input_tokens = len(prompt.split()) * 1.3
219
- output_tokens = len(content.split()) * 1.3
220
- duration = time.time() - start_time
221
-
222
- cost_tracker.add_call(
223
- agent_name=agent_name,
224
- model=model,
225
- input_tokens=int(input_tokens),
226
- output_tokens=int(output_tokens),
227
- cost_per_m_input=cost_per_m_input,
228
- cost_per_m_output=cost_per_m_output,
229
- duration=duration,
230
- )
231
-
232
- if log_callback:
233
- est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
234
- (output_tokens / 1_000_000) * cost_per_m_output)
235
- log_callback(f" ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
236
- log_callback(f" 💵 Est. cost: ${est_cost:.4f}")
237
-
238
- return content, int(input_tokens), int(output_tokens)
239
-
240
- except Exception as e2:
241
- # Final fallback: Try without provider
242
- if log_callback:
243
- log_callback(f" ⚠️ {agent_name}: Trying without provider...")
244
-
245
- response = client.chat_completion(
246
- model=model,
247
- messages=[{"role": "user", "content": prompt}],
248
- max_tokens=max_tokens,
249
- temperature=temperature,
250
- )
251
-
252
- content = response.choices[0].message.content
253
- input_tokens = len(prompt.split()) * 1.3
254
- output_tokens = len(content.split()) * 1.3
255
- duration = time.time() - start_time
256
-
257
- cost_tracker.add_call(
258
- agent_name=agent_name,
259
- model=model,
260
- input_tokens=int(input_tokens),
261
- output_tokens=int(output_tokens),
262
- cost_per_m_input=cost_per_m_input,
263
- cost_per_m_output=cost_per_m_output,
264
- duration=duration,
265
- )
266
-
267
- if log_callback:
268
- est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
269
- (output_tokens / 1_000_000) * cost_per_m_output)
270
- log_callback(f" ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
271
- log_callback(f" 💵 Est. cost: ${est_cost:.4f}")
272
-
273
- return content, int(input_tokens), int(output_tokens)
274
- else:
275
- raise
276
-
277
- except Exception as e:
278
- duration = time.time() - start_time
279
- if log_callback:
280
- log_callback(f" ❌ {agent_name}: Error after {duration:.1f}s - {str(e)}")
281
- raise
282
-
283
-
284
- # =============================================================================
285
- # ANALYSIS NODES
286
- # =============================================================================
287
-
288
- async def analyze_with_llm1(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
289
- """LLM 1 (Qwen) analysis node with detailed reasoning logs."""
290
-
291
- config = load_agent_config()
292
- llm1_config = config.get("stage2_llm1", {})
293
-
294
- model = llm1_config.get("model", "Qwen/Qwen2.5-72B-Instruct")
295
- provider = llm1_config.get("provider", "novita")
296
-
297
- if log_callback:
298
- log_callback("")
299
- log_callback("=" * 55)
300
- log_callback(f"🤖 LLM 1: {model}")
301
- log_callback("=" * 55)
302
- log_callback(f" Provider: {provider}")
303
- log_callback(f" 💰 Cost: ${llm1_config.get('cost_per_million_input', 0.29)}/M in, ${llm1_config.get('cost_per_million_output', 0.59)}/M out")
304
- log_callback(f" 📝 Task: Typography, Colors, AA, Spacing analysis")
305
- log_callback("")
306
-
307
- # Build prompt
308
- prompt = build_analyst_prompt(
309
- tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
310
- competitors=state["competitors"],
311
- persona=llm1_config.get("persona", "Senior Design Systems Architect"),
312
- )
313
-
314
- try:
315
- response, in_tokens, out_tokens = await call_llm(
316
- agent_name="LLM 1 (Qwen)",
317
- model=model,
318
- provider=provider,
319
- prompt=prompt,
320
- max_tokens=llm1_config.get("max_tokens", 1500),
321
- temperature=llm1_config.get("temperature", 0.4),
322
- cost_per_m_input=llm1_config.get("cost_per_million_input", 0.29),
323
- cost_per_m_output=llm1_config.get("cost_per_million_output", 0.59),
324
- log_callback=log_callback,
325
- )
326
-
327
- # Parse JSON response
328
- analysis = parse_llm_response(response)
329
- analysis["_meta"] = {
330
- "model": model,
331
- "provider": provider,
332
- "input_tokens": in_tokens,
333
- "output_tokens": out_tokens,
334
- }
335
-
336
- # Log detailed findings
337
- if log_callback and not analysis.get("parse_error"):
338
- log_callback("")
339
- log_callback(" 📊 LLM 1 FINDINGS:")
340
- log_callback("")
341
-
342
- # Typography
343
- typo = analysis.get("typography", {})
344
- if isinstance(typo, dict):
345
- log_callback(" TYPOGRAPHY:")
346
- log_callback(f" ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
347
- log_callback(f" ├─ Score: {typo.get('score', '?')}/10")
348
- if typo.get("recommendations"):
349
- for rec in typo.get("recommendations", [])[:2]:
350
- log_callback(f" └─ 💡 {rec[:60]}...")
351
-
352
- # Colors
353
- colors = analysis.get("colors", {})
354
- if isinstance(colors, dict):
355
- log_callback("")
356
- log_callback(" COLORS:")
357
- log_callback(f" ├─ Score: {colors.get('score', '?')}/10")
358
- if colors.get("recommendations"):
359
- for rec in colors.get("recommendations", [])[:2]:
360
- log_callback(f" └─ 💡 {rec[:60]}...")
361
-
362
- # Accessibility
363
- aa = analysis.get("accessibility", {})
364
- if isinstance(aa, dict):
365
- log_callback("")
366
- log_callback(" ACCESSIBILITY:")
367
- log_callback(f" ├─ Score: {aa.get('score', '?')}/10")
368
- issues = aa.get("issues", [])
369
- if issues:
370
- for issue in issues[:2]:
371
- log_callback(f" └─ ⚠️ {issue[:60]}...")
372
-
373
- # Top priorities
374
- priorities = analysis.get("top_3_priorities", [])
375
- if priorities:
376
- log_callback("")
377
- log_callback(" TOP PRIORITIES:")
378
- for i, p in enumerate(priorities[:3], 1):
379
- log_callback(f" {i}. {p[:70]}")
380
-
381
- log_callback("")
382
- log_callback(f" 🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
383
-
384
- return {"llm1_analysis": analysis, "llm1_time": time.time()}
385
-
386
- except Exception as e:
387
- return {
388
- "llm1_analysis": {"error": str(e)},
389
- "errors": state.get("errors", []) + [f"LLM1: {str(e)}"],
390
- "llm1_time": time.time(),
391
- }
392
-
393
-
394
- async def analyze_with_llm2(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
395
- """LLM 2 (Llama) analysis node with detailed reasoning logs."""
396
-
397
- config = load_agent_config()
398
- llm2_config = config.get("stage2_llm2", {})
399
-
400
- model = llm2_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
401
- provider = llm2_config.get("provider", "novita")
402
-
403
- if log_callback:
404
- log_callback("")
405
- log_callback("=" * 55)
406
- log_callback(f"🤖 LLM 2: {model}")
407
- log_callback("=" * 55)
408
- log_callback(f" Provider: {provider}")
409
- log_callback(f" 💰 Cost: ${llm2_config.get('cost_per_million_input', 0.59)}/M in, ${llm2_config.get('cost_per_million_output', 0.79)}/M out")
410
- log_callback(f" 📝 Task: Typography, Colors, AA, Spacing analysis")
411
- log_callback("")
412
-
413
- # Build prompt
414
- prompt = build_analyst_prompt(
415
- tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
416
- competitors=state["competitors"],
417
- persona=llm2_config.get("persona", "Senior Design Systems Architect"),
418
- )
419
-
420
- try:
421
- response, in_tokens, out_tokens = await call_llm(
422
- agent_name="LLM 2 (Llama)",
423
- model=model,
424
- provider=provider,
425
- prompt=prompt,
426
- max_tokens=llm2_config.get("max_tokens", 1500),
427
- temperature=llm2_config.get("temperature", 0.4),
428
- cost_per_m_input=llm2_config.get("cost_per_million_input", 0.59),
429
- cost_per_m_output=llm2_config.get("cost_per_million_output", 0.79),
430
- log_callback=log_callback,
431
- )
432
-
433
- # Parse JSON response
434
- analysis = parse_llm_response(response)
435
- analysis["_meta"] = {
436
- "model": model,
437
- "provider": provider,
438
- "input_tokens": in_tokens,
439
- "output_tokens": out_tokens,
440
- }
441
-
442
- # Log detailed findings
443
- if log_callback and not analysis.get("parse_error"):
444
- log_callback("")
445
- log_callback(" 📊 LLM 2 FINDINGS:")
446
- log_callback("")
447
-
448
- # Typography
449
- typo = analysis.get("typography", {})
450
- if isinstance(typo, dict):
451
- log_callback(" TYPOGRAPHY:")
452
- log_callback(f" ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
453
- log_callback(f" ├─ Score: {typo.get('score', '?')}/10")
454
- if typo.get("recommendations"):
455
- for rec in typo.get("recommendations", [])[:2]:
456
- log_callback(f" └─ 💡 {rec[:60]}...")
457
-
458
- # Colors
459
- colors = analysis.get("colors", {})
460
- if isinstance(colors, dict):
461
- log_callback("")
462
- log_callback(" COLORS:")
463
- log_callback(f" ├─ Score: {colors.get('score', '?')}/10")
464
- if colors.get("recommendations"):
465
- for rec in colors.get("recommendations", [])[:2]:
466
- log_callback(f" └─ 💡 {rec[:60]}...")
467
-
468
- # Accessibility
469
- aa = analysis.get("accessibility", {})
470
- if isinstance(aa, dict):
471
- log_callback("")
472
- log_callback(" ACCESSIBILITY:")
473
- log_callback(f" ├─ Score: {aa.get('score', '?')}/10")
474
- issues = aa.get("issues", [])
475
- if issues:
476
- for issue in issues[:2]:
477
- log_callback(f" └─ ⚠️ {issue[:60]}...")
478
-
479
- # Top priorities
480
- priorities = analysis.get("top_3_priorities", [])
481
- if priorities:
482
- log_callback("")
483
- log_callback(" TOP PRIORITIES:")
484
- for i, p in enumerate(priorities[:3], 1):
485
- log_callback(f" {i}. {p[:70]}")
486
-
487
- log_callback("")
488
- log_callback(f" 🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
489
-
490
- return {"llm2_analysis": analysis, "llm2_time": time.time()}
491
-
492
- except Exception as e:
493
- return {
494
- "llm2_analysis": {"error": str(e)},
495
- "errors": state.get("errors", []) + [f"LLM2: {str(e)}"],
496
- "llm2_time": time.time(),
497
- }
498
-
499
-
500
- def run_rule_engine(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
501
- """Rule engine node (no LLM, always runs)."""
502
-
503
- if log_callback:
504
- log_callback("")
505
- log_callback("⚙️ Rule Engine: Running calculations...")
506
- log_callback(" 💰 Cost: FREE (no LLM)")
507
-
508
- start = time.time()
509
-
510
- # Calculate type scale options
511
- base_size = detect_base_font_size(state["desktop_tokens"])
512
- type_scales = {
513
- "1.2": generate_type_scale(base_size, 1.2),
514
- "1.25": generate_type_scale(base_size, 1.25),
515
- "1.333": generate_type_scale(base_size, 1.333),
516
- }
517
-
518
- # Calculate spacing options
519
- spacing_options = {
520
- "4px": generate_spacing_scale(4),
521
- "8px": generate_spacing_scale(8),
522
- }
523
-
524
- # Generate color ramps for each base color
525
- from core.color_utils import generate_color_ramp
526
-
527
- color_ramps = {}
528
- colors = state["desktop_tokens"].get("colors", {})
529
- for name, color in list(colors.items())[:8]:
530
- hex_val = color.get("value") if isinstance(color, dict) else str(color)
531
- try:
532
- color_ramps[name] = generate_color_ramp(hex_val)
533
- except:
534
- pass
535
-
536
- duration = time.time() - start
537
-
538
- if log_callback:
539
- log_callback(f" ✅ Rule Engine: Complete ({duration:.2f}s)")
540
- log_callback(f" Generated: {len(type_scales)} type scales, {len(spacing_options)} spacing grids, {len(color_ramps)} color ramps")
541
-
542
- return {
543
- "rule_calculations": {
544
- "base_font_size": base_size,
545
- "type_scales": type_scales,
546
- "spacing_options": spacing_options,
547
- "color_ramps": color_ramps,
548
- }
549
- }
550
-
551
-
552
- async def compile_with_head(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
553
- """HEAD compiler node with detailed synthesis logging."""
554
-
555
- config = load_agent_config()
556
- head_config = config.get("stage2_head", {})
557
-
558
- model = head_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
559
- provider = head_config.get("provider", "novita")
560
-
561
- if log_callback:
562
- log_callback("")
563
- log_callback("=" * 60)
564
- log_callback("🧠 HEAD COMPILER: Synthesizing results...")
565
- log_callback("=" * 60)
566
- log_callback(f" Model: {model}")
567
- log_callback(f" Provider: {provider}")
568
- log_callback(f" 💰 Cost: ${head_config.get('cost_per_million_input', 0.59)}/M in, ${head_config.get('cost_per_million_output', 0.79)}/M out")
569
- log_callback("")
570
- log_callback(" 📥 INPUT: Analyzing outputs from LLM 1 + LLM 2 + Rules...")
571
-
572
- # Build HEAD prompt
573
- prompt = build_head_prompt(
574
- llm1_analysis=state.get("llm1_analysis", {}),
575
- llm2_analysis=state.get("llm2_analysis", {}),
576
- rule_calculations=state.get("rule_calculations", {}),
577
- )
578
-
579
- try:
580
- response, in_tokens, out_tokens = await call_llm(
581
- agent_name="HEAD",
582
- model=model,
583
- provider=provider,
584
- prompt=prompt,
585
- max_tokens=head_config.get("max_tokens", 2000),
586
- temperature=head_config.get("temperature", 0.3),
587
- cost_per_m_input=head_config.get("cost_per_million_input", 0.59),
588
- cost_per_m_output=head_config.get("cost_per_million_output", 0.79),
589
- log_callback=log_callback,
590
- )
591
-
592
- # Parse response
593
- recommendations = parse_llm_response(response)
594
- recommendations["_meta"] = {
595
- "model": model,
596
- "provider": provider,
597
- "input_tokens": in_tokens,
598
- "output_tokens": out_tokens,
599
- }
600
-
601
- # Add cost summary
602
- recommendations["cost_summary"] = cost_tracker.to_dict()
603
-
604
- # Log detailed HEAD findings
605
- if log_callback and not recommendations.get("parse_error"):
606
- log_callback("")
607
- log_callback(" 📊 HEAD SYNTHESIS:")
608
- log_callback("")
609
-
610
- # Agreements
611
- agreements = recommendations.get("agreements", [])
612
- if agreements:
613
- log_callback(" ✅ AGREEMENTS (High Confidence):")
614
- for a in agreements[:3]:
615
- topic = a.get("topic", "?") if isinstance(a, dict) else str(a)[:30]
616
- finding = a.get("finding", "")[:50] if isinstance(a, dict) else ""
617
- log_callback(f" ├─ {topic}: {finding}...")
618
-
619
- # Disagreements
620
- disagreements = recommendations.get("disagreements", [])
621
- if disagreements:
622
- log_callback("")
623
- log_callback(" 🔄 DISAGREEMENTS (Resolved):")
624
- for d in disagreements[:3]:
625
- if isinstance(d, dict):
626
- topic = d.get("topic", "?")
627
- resolution = d.get("resolution", "")[:60]
628
- log_callback(f" ├─ {topic}: {resolution}...")
629
-
630
- # Final recommendations
631
- final_recs = recommendations.get("final_recommendations", {})
632
- if final_recs:
633
- log_callback("")
634
- log_callback(" 📋 FINAL RECOMMENDATIONS:")
635
- log_callback(f" ├─ Type Scale: {final_recs.get('type_scale', '?')}")
636
- log_callback(f" ├─ Spacing: {final_recs.get('spacing_base', '?')}")
637
- if final_recs.get("color_improvements"):
638
- log_callback(f" ├─ Colors: {final_recs['color_improvements'][0][:50]}...")
639
- if final_recs.get("accessibility_fixes"):
640
- log_callback(f" └─ AA Fixes: {final_recs['accessibility_fixes'][0][:50]}...")
641
-
642
- # Summary
643
- if recommendations.get("summary"):
644
- log_callback("")
645
- log_callback(" 📝 SUMMARY:")
646
- summary = recommendations["summary"][:150]
647
- log_callback(f" {summary}...")
648
-
649
- log_callback("")
650
- log_callback(f" 🎯 OVERALL CONFIDENCE: {recommendations.get('overall_confidence', '?')}%")
651
-
652
- if log_callback:
653
- log_callback("")
654
- log_callback("=" * 60)
655
- log_callback(f"💰 TOTAL ESTIMATED COST: ${cost_tracker.total_cost:.4f}")
656
- log_callback(f" (Free tier: $0.10/mo | Pro: $2/mo)")
657
- log_callback("=" * 60)
658
-
659
- return {
660
- "final_recommendations": recommendations,
661
- "cost_tracking": cost_tracker.to_dict(),
662
- "head_time": time.time(),
663
- }
664
-
665
- except Exception as e:
666
- if log_callback:
667
- log_callback(f" ❌ HEAD Error: {str(e)}")
668
-
669
- # Fallback to rule-based recommendations
670
- return {
671
- "final_recommendations": build_fallback_recommendations(state),
672
- "errors": state.get("errors", []) + [f"HEAD: {str(e)}"],
673
- "head_time": time.time(),
674
- }
675
-
676
-
677
- # =============================================================================
678
- # HELPER FUNCTIONS
679
- # =============================================================================
680
-
681
- def summarize_tokens(desktop: dict, mobile: dict) -> str:
682
- """Create a summary of tokens for the prompt."""
683
- lines = []
684
-
685
- # Colors
686
- colors = desktop.get("colors", {})
687
- lines.append(f"### Colors ({len(colors)} detected)")
688
- for name, c in list(colors.items())[:5]:
689
- val = c.get("value") if isinstance(c, dict) else str(c)
690
- lines.append(f"- {name}: {val}")
691
-
692
- # Typography Desktop
693
- typo = desktop.get("typography", {})
694
- lines.append(f"\n### Typography Desktop ({len(typo)} styles)")
695
- for name, t in list(typo.items())[:5]:
696
- if isinstance(t, dict):
697
- lines.append(f"- {name}: {t.get('font_size', '?')} / {t.get('font_weight', '?')}")
698
-
699
- # Typography Mobile
700
- mobile_typo = mobile.get("typography", {})
701
- lines.append(f"\n### Typography Mobile ({len(mobile_typo)} styles)")
702
-
703
- # Spacing
704
- spacing = desktop.get("spacing", {})
705
- lines.append(f"\n### Spacing ({len(spacing)} values)")
706
-
707
- return "\n".join(lines)
708
-
709
-
710
- def build_analyst_prompt(tokens_summary: str, competitors: list[str], persona: str) -> str:
711
- """Build prompt for analyst LLMs."""
712
- return f"""You are a {persona}.
713
-
714
- ## YOUR TASK
715
- Analyze these design tokens extracted from a website and compare against industry best practices.
716
-
717
- ## EXTRACTED TOKENS
718
- {tokens_summary}
719
-
720
- ## COMPETITOR DESIGN SYSTEMS TO RESEARCH
721
- {', '.join(competitors)}
722
-
723
- ## ANALYZE THE FOLLOWING:
724
-
725
- ### 1. Typography
726
- - Is the type scale consistent? Does it follow a mathematical ratio?
727
- - What is the detected base size?
728
- - Compare to competitors: what ratios do they use?
729
- - Score (1-10) and specific recommendations
730
-
731
- ### 2. Colors
732
- - Is the color palette cohesive?
733
- - Are semantic colors properly defined (primary, secondary, etc.)?
734
- - Score (1-10) and specific recommendations
735
-
736
- ### 3. Accessibility (AA Compliance)
737
- - What contrast issues might exist?
738
- - Score (1-10)
739
-
740
- ### 4. Spacing
741
- - Is spacing consistent? Does it follow a grid (4px, 8px)?
742
- - Score (1-10) and specific recommendations
743
-
744
- ### 5. Overall Assessment
745
- - Top 3 priorities for improvement
746
-
747
- ## RESPOND IN JSON FORMAT ONLY:
748
- ```json
749
- {{
750
- "typography": {{"analysis": "...", "detected_ratio": 1.2, "score": 7, "recommendations": ["..."]}},
751
- "colors": {{"analysis": "...", "score": 6, "recommendations": ["..."]}},
752
- "accessibility": {{"issues": ["..."], "score": 5}},
753
- "spacing": {{"analysis": "...", "detected_base": 8, "score": 7, "recommendations": ["..."]}},
754
- "top_3_priorities": ["...", "...", "..."],
755
- "confidence": 85
756
- }}
757
- ```"""
758
-
759
-
760
- def build_head_prompt(llm1_analysis: dict, llm2_analysis: dict, rule_calculations: dict) -> str:
761
- """Build prompt for HEAD compiler."""
762
- return f"""You are a Principal Design Systems Architect compiling analyses from two expert analysts.
763
-
764
- ## ANALYST 1 FINDINGS:
765
- {json.dumps(llm1_analysis, indent=2, default=str)[:2000]}
766
-
767
- ## ANALYST 2 FINDINGS:
768
- {json.dumps(llm2_analysis, indent=2, default=str)[:2000]}
769
-
770
- ## RULE-BASED CALCULATIONS:
771
- - Base font size: {rule_calculations.get('base_font_size', 16)}px
772
- - Type scale options: 1.2, 1.25, 1.333
773
- - Spacing options: 4px grid, 8px grid
774
-
775
- ## YOUR TASK:
776
- 1. Compare both analyst perspectives
777
- 2. Identify agreements and disagreements
778
- 3. Synthesize final recommendations
779
-
780
- ## RESPOND IN JSON FORMAT ONLY:
781
- ```json
782
- {{
783
- "agreements": [{{"topic": "...", "finding": "..."}}],
784
- "disagreements": [{{"topic": "...", "resolution": "..."}}],
785
- "final_recommendations": {{
786
- "type_scale": "1.25",
787
- "type_scale_rationale": "...",
788
- "spacing_base": "8px",
789
- "spacing_rationale": "...",
790
- "color_improvements": ["..."],
791
- "accessibility_fixes": ["..."]
792
- }},
793
- "overall_confidence": 85,
794
- "summary": "..."
795
- }}
796
- ```"""
797
-
798
-
799
- def parse_llm_response(response: str) -> dict:
800
- """Parse JSON from LLM response."""
801
- try:
802
- # Try to extract JSON from markdown code block
803
- if "```json" in response:
804
- start = response.find("```json") + 7
805
- end = response.find("```", start)
806
- json_str = response[start:end].strip()
807
- elif "```" in response:
808
- start = response.find("```") + 3
809
- end = response.find("```", start)
810
- json_str = response[start:end].strip()
811
- else:
812
- json_str = response.strip()
813
-
814
- return json.loads(json_str)
815
- except:
816
- return {"raw_response": response[:500], "parse_error": True}
817
-
818
-
819
- def detect_base_font_size(tokens: dict) -> int:
820
- """Detect base font size from typography tokens."""
821
- typography = tokens.get("typography", {})
822
-
823
- sizes = []
824
- for t in typography.values():
825
- if isinstance(t, dict):
826
- size_str = str(t.get("font_size", "16px"))
827
- try:
828
- size = float(size_str.replace("px", "").replace("rem", "").replace("em", ""))
829
- if 14 <= size <= 18:
830
- sizes.append(size)
831
- except:
832
- pass
833
-
834
- if sizes:
835
- return int(max(set(sizes), key=sizes.count))
836
- return 16
837
-
838
-
839
- def generate_type_scale(base: int, ratio: float) -> list[int]:
840
- """Generate type scale from base and ratio."""
841
- # 13 levels: display.2xl down to overline
842
- scales = []
843
- for i in range(8, -5, -1):
844
- size = base * (ratio ** i)
845
- # Round to even
846
- scales.append(int(round(size / 2) * 2))
847
- return scales
848
-
849
-
850
- def generate_spacing_scale(base: int) -> list[int]:
851
- """Generate spacing scale from base."""
852
- return [base * i for i in range(0, 17)]
853
-
854
-
855
- def build_fallback_recommendations(state: Stage2State) -> dict:
856
- """Build fallback recommendations if HEAD fails."""
857
- rule_calc = state.get("rule_calculations", {})
858
-
859
- return {
860
- "final_recommendations": {
861
- "type_scale": "1.25",
862
- "type_scale_rationale": "Major Third (1.25) is industry standard",
863
- "spacing_base": "8px",
864
- "spacing_rationale": "8px grid provides good visual rhythm",
865
- "color_improvements": ["Generate full ramps (50-950)"],
866
- "accessibility_fixes": ["Review contrast ratios"],
867
- },
868
- "overall_confidence": 60,
869
- "summary": "Recommendations based on rule-based analysis (LLM unavailable)",
870
- "fallback": True,
871
- }
872
-
873
-
874
- # =============================================================================
875
- # WORKFLOW BUILDER
876
- # =============================================================================
877
-
878
- def build_stage2_workflow():
879
- """Build the LangGraph workflow for Stage 2."""
880
-
881
- workflow = StateGraph(Stage2State)
882
-
883
- # Add nodes
884
- workflow.add_node("llm1_analyst", analyze_with_llm1)
885
- workflow.add_node("llm2_analyst", analyze_with_llm2)
886
- workflow.add_node("rule_engine", run_rule_engine)
887
- workflow.add_node("head_compiler", compile_with_head)
888
-
889
- # Parallel execution from START
890
- workflow.add_edge(START, "llm1_analyst")
891
- workflow.add_edge(START, "llm2_analyst")
892
- workflow.add_edge(START, "rule_engine")
893
-
894
- # All converge to HEAD
895
- workflow.add_edge("llm1_analyst", "head_compiler")
896
- workflow.add_edge("llm2_analyst", "head_compiler")
897
- workflow.add_edge("rule_engine", "head_compiler")
898
-
899
- # HEAD to END
900
- workflow.add_edge("head_compiler", END)
901
-
902
- return workflow.compile()
903
-
904
-
905
- # =============================================================================
906
- # MAIN RUNNER
907
- # =============================================================================
908
-
909
- async def run_stage2_multi_agent(
910
- desktop_tokens: dict,
911
- mobile_tokens: dict,
912
- competitors: list[str],
913
- log_callback: Optional[Callable] = None,
914
- ) -> dict:
915
- """Run the Stage 2 multi-agent analysis."""
916
-
917
- global cost_tracker
918
- cost_tracker = CostTracker() # Reset
919
-
920
- if log_callback:
921
- log_callback("")
922
- log_callback("=" * 60)
923
- log_callback("🧠 STAGE 2: MULTI-AGENT ANALYSIS")
924
- log_callback("=" * 60)
925
- log_callback("")
926
- log_callback("📦 LLM CONFIGURATION:")
927
-
928
- config = load_agent_config()
929
-
930
- for agent_key in ["stage2_llm1", "stage2_llm2", "stage2_head"]:
931
- agent = config.get(agent_key, {})
932
- log_callback(f"┌─────────────────────────────────────────────────────┐")
933
- log_callback(f"│ {agent.get('name', agent_key)}")
934
- log_callback(f"│ Model: {agent.get('model', 'Unknown')}")
935
- log_callback(f"│ Provider: {agent.get('provider', 'novita')}")
936
- log_callback(f"│ 💰 Cost: ${agent.get('cost_per_million_input', 0.5)}/M in, ${agent.get('cost_per_million_output', 0.5)}/M out")
937
- log_callback(f"│ Task: {', '.join(agent.get('tasks', [])[:2])}")
938
- log_callback(f"└─────────────────────────────────────────────────────┘")
939
-
940
- log_callback("")
941
- log_callback("🔄 RUNNING PARALLEL ANALYSIS...")
942
-
943
- # Initial state
944
- initial_state = {
945
- "desktop_tokens": desktop_tokens,
946
- "mobile_tokens": mobile_tokens,
947
- "competitors": competitors,
948
- "llm1_analysis": None,
949
- "llm2_analysis": None,
950
- "rule_calculations": None,
951
- "final_recommendations": None,
952
- "analysis_log": [],
953
- "cost_tracking": {},
954
- "errors": [],
955
- "start_time": time.time(),
956
- "llm1_time": 0,
957
- "llm2_time": 0,
958
- "head_time": 0,
959
- }
960
-
961
- # Run parallel analysis
962
- try:
963
- # Run LLM1, LLM2, and Rules in parallel
964
- results = await asyncio.gather(
965
- analyze_with_llm1(initial_state, log_callback),
966
- analyze_with_llm2(initial_state, log_callback),
967
- asyncio.to_thread(run_rule_engine, initial_state, log_callback),
968
- return_exceptions=True,
969
- )
970
-
971
- # Merge results
972
- for result in results:
973
- if isinstance(result, dict):
974
- initial_state.update(result)
975
- elif isinstance(result, Exception):
976
- initial_state["errors"].append(str(result))
977
-
978
- # Run HEAD compiler
979
- head_result = await compile_with_head(initial_state, log_callback)
980
- initial_state.update(head_result)
981
-
982
- return initial_state
983
-
984
- except Exception as e:
985
- if log_callback:
986
- log_callback(f"❌ Workflow error: {str(e)}")
987
-
988
- initial_state["errors"].append(str(e))
989
- initial_state["final_recommendations"] = build_fallback_recommendations(initial_state)
990
- return initial_state