riazmo commited on
Commit
a19099e
Β·
verified Β·
1 Parent(s): 43ee65d

Upload stage2_graph.py

Browse files
Files changed (1) hide show
  1. agents/stage2_graph.py +990 -0
agents/stage2_graph.py ADDED
@@ -0,0 +1,990 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stage 2 Multi-Agent Analysis Workflow (LangGraph)
3
+
4
+ Architecture:
5
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
6
+ β”‚ LLM 1 β”‚ β”‚ LLM 2 β”‚ β”‚ Rule Engine β”‚
7
+ β”‚ (Qwen) β”‚ β”‚ (Llama) β”‚ β”‚ (No LLM) β”‚
8
+ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜
9
+ β”‚ β”‚ β”‚
10
+ β”‚ PARALLEL β”‚ β”‚
11
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
12
+ β”‚
13
+ β–Ό
14
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
15
+ β”‚ HEAD β”‚
16
+ β”‚ (Compiler) β”‚
17
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
18
+ """
19
+
20
+ import asyncio
21
+ import json
22
+ import os
23
+ import time
24
+ import yaml
25
+ from dataclasses import dataclass, field
26
+ from datetime import datetime
27
+ from typing import Any, Callable, Optional
28
+
29
+ from langgraph.graph import END, START, StateGraph
30
+ from typing_extensions import TypedDict
31
+
32
+ # =============================================================================
33
+ # CONFIGURATION LOADING
34
+ # =============================================================================
35
+
36
+ def load_agent_config() -> dict:
37
+ """Load agent configuration from YAML."""
38
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config", "agents.yaml")
39
+ if os.path.exists(config_path):
40
+ with open(config_path, 'r') as f:
41
+ return yaml.safe_load(f)
42
+ return {}
43
+
44
+
45
+ # =============================================================================
46
+ # STATE DEFINITION
47
+ # =============================================================================
48
+
49
+ class Stage2State(TypedDict):
50
+ """State for Stage 2 multi-agent analysis."""
51
+
52
+ # Inputs
53
+ desktop_tokens: dict
54
+ mobile_tokens: dict
55
+ competitors: list[str]
56
+
57
+ # Parallel analysis outputs
58
+ llm1_analysis: Optional[dict]
59
+ llm2_analysis: Optional[dict]
60
+ rule_calculations: Optional[dict]
61
+
62
+ # HEAD output
63
+ final_recommendations: Optional[dict]
64
+
65
+ # Metadata
66
+ analysis_log: list[str]
67
+ cost_tracking: dict
68
+ errors: list[str]
69
+
70
+ # Timing
71
+ start_time: float
72
+ llm1_time: float
73
+ llm2_time: float
74
+ head_time: float
75
+
76
+
77
+ # =============================================================================
78
+ # COST TRACKING
79
+ # =============================================================================
80
+
81
+ @dataclass
82
+ class CostTracker:
83
+ """Track LLM costs during analysis."""
84
+
85
+ total_input_tokens: int = 0
86
+ total_output_tokens: int = 0
87
+ total_cost: float = 0.0
88
+ calls: list = field(default_factory=list)
89
+
90
+ def add_call(self, agent_name: str, model: str, input_tokens: int, output_tokens: int,
91
+ cost_per_m_input: float, cost_per_m_output: float, duration: float):
92
+ """Record an LLM call."""
93
+ input_cost = (input_tokens / 1_000_000) * cost_per_m_input
94
+ output_cost = (output_tokens / 1_000_000) * cost_per_m_output
95
+ total_cost = input_cost + output_cost
96
+
97
+ self.total_input_tokens += input_tokens
98
+ self.total_output_tokens += output_tokens
99
+ self.total_cost += total_cost
100
+
101
+ self.calls.append({
102
+ "agent": agent_name,
103
+ "model": model,
104
+ "input_tokens": input_tokens,
105
+ "output_tokens": output_tokens,
106
+ "cost": total_cost,
107
+ "duration": duration,
108
+ })
109
+
110
+ def to_dict(self) -> dict:
111
+ return {
112
+ "total_input_tokens": self.total_input_tokens,
113
+ "total_output_tokens": self.total_output_tokens,
114
+ "total_cost": round(self.total_cost, 6),
115
+ "calls": self.calls,
116
+ }
117
+
118
+
119
+ # Global cost tracker
120
+ cost_tracker = CostTracker()
121
+
122
+
123
+ # =============================================================================
124
+ # LLM CLIENT
125
+ # =============================================================================
126
+
127
+ async def call_llm(
128
+ agent_name: str,
129
+ model: str,
130
+ provider: str,
131
+ prompt: str,
132
+ max_tokens: int = 1500,
133
+ temperature: float = 0.4,
134
+ cost_per_m_input: float = 0.5,
135
+ cost_per_m_output: float = 0.5,
136
+ log_callback: Optional[Callable] = None,
137
+ ) -> tuple[str, int, int]:
138
+ """Call LLM via HuggingFace Inference Providers."""
139
+
140
+ start_time = time.time()
141
+
142
+ if log_callback:
143
+ log_callback(f" πŸš€ {agent_name}: Calling {model} via {provider}...")
144
+
145
+ try:
146
+ from huggingface_hub import InferenceClient
147
+
148
+ hf_token = os.environ.get("HF_TOKEN")
149
+ if not hf_token:
150
+ raise ValueError("HF_TOKEN not set")
151
+
152
+ # Initialize client with provider
153
+ # Provider is set at client level, not per-call
154
+ client = InferenceClient(
155
+ token=hf_token,
156
+ provider=provider,
157
+ )
158
+
159
+ # Call without provider argument (it's set at client level)
160
+ response = client.chat_completion(
161
+ model=model,
162
+ messages=[{"role": "user", "content": prompt}],
163
+ max_tokens=max_tokens,
164
+ temperature=temperature,
165
+ )
166
+
167
+ # Extract response
168
+ content = response.choices[0].message.content
169
+
170
+ # Estimate tokens (rough)
171
+ input_tokens = len(prompt.split()) * 1.3 # Rough estimate
172
+ output_tokens = len(content.split()) * 1.3
173
+
174
+ duration = time.time() - start_time
175
+
176
+ # Track cost
177
+ cost_tracker.add_call(
178
+ agent_name=agent_name,
179
+ model=model,
180
+ input_tokens=int(input_tokens),
181
+ output_tokens=int(output_tokens),
182
+ cost_per_m_input=cost_per_m_input,
183
+ cost_per_m_output=cost_per_m_output,
184
+ duration=duration,
185
+ )
186
+
187
+ if log_callback:
188
+ est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
189
+ (output_tokens / 1_000_000) * cost_per_m_output)
190
+ log_callback(f" βœ… {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
191
+ log_callback(f" πŸ’΅ Est. cost: ${est_cost:.4f}")
192
+
193
+ return content, int(input_tokens), int(output_tokens)
194
+
195
+ except TypeError as e:
196
+ # Fallback: If provider argument not supported, try model:provider format
197
+ if "provider" in str(e):
198
+ if log_callback:
199
+ log_callback(f" ⚠️ {agent_name}: Trying model:provider format...")
200
+
201
+ from huggingface_hub import InferenceClient
202
+
203
+ hf_token = os.environ.get("HF_TOKEN")
204
+ client = InferenceClient(token=hf_token)
205
+
206
+ # Try appending provider to model name
207
+ model_with_provider = f"{model}:{provider}"
208
+
209
+ try:
210
+ response = client.chat_completion(
211
+ model=model_with_provider,
212
+ messages=[{"role": "user", "content": prompt}],
213
+ max_tokens=max_tokens,
214
+ temperature=temperature,
215
+ )
216
+
217
+ content = response.choices[0].message.content
218
+ input_tokens = len(prompt.split()) * 1.3
219
+ output_tokens = len(content.split()) * 1.3
220
+ duration = time.time() - start_time
221
+
222
+ cost_tracker.add_call(
223
+ agent_name=agent_name,
224
+ model=model,
225
+ input_tokens=int(input_tokens),
226
+ output_tokens=int(output_tokens),
227
+ cost_per_m_input=cost_per_m_input,
228
+ cost_per_m_output=cost_per_m_output,
229
+ duration=duration,
230
+ )
231
+
232
+ if log_callback:
233
+ est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
234
+ (output_tokens / 1_000_000) * cost_per_m_output)
235
+ log_callback(f" βœ… {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
236
+ log_callback(f" πŸ’΅ Est. cost: ${est_cost:.4f}")
237
+
238
+ return content, int(input_tokens), int(output_tokens)
239
+
240
+ except Exception as e2:
241
+ # Final fallback: Try without provider
242
+ if log_callback:
243
+ log_callback(f" ⚠️ {agent_name}: Trying without provider...")
244
+
245
+ response = client.chat_completion(
246
+ model=model,
247
+ messages=[{"role": "user", "content": prompt}],
248
+ max_tokens=max_tokens,
249
+ temperature=temperature,
250
+ )
251
+
252
+ content = response.choices[0].message.content
253
+ input_tokens = len(prompt.split()) * 1.3
254
+ output_tokens = len(content.split()) * 1.3
255
+ duration = time.time() - start_time
256
+
257
+ cost_tracker.add_call(
258
+ agent_name=agent_name,
259
+ model=model,
260
+ input_tokens=int(input_tokens),
261
+ output_tokens=int(output_tokens),
262
+ cost_per_m_input=cost_per_m_input,
263
+ cost_per_m_output=cost_per_m_output,
264
+ duration=duration,
265
+ )
266
+
267
+ if log_callback:
268
+ est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
269
+ (output_tokens / 1_000_000) * cost_per_m_output)
270
+ log_callback(f" βœ… {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
271
+ log_callback(f" πŸ’΅ Est. cost: ${est_cost:.4f}")
272
+
273
+ return content, int(input_tokens), int(output_tokens)
274
+ else:
275
+ raise
276
+
277
+ except Exception as e:
278
+ duration = time.time() - start_time
279
+ if log_callback:
280
+ log_callback(f" ❌ {agent_name}: Error after {duration:.1f}s - {str(e)}")
281
+ raise
282
+
283
+
284
+ # =============================================================================
285
+ # ANALYSIS NODES
286
+ # =============================================================================
287
+
288
+ async def analyze_with_llm1(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
289
+ """LLM 1 (Qwen) analysis node with detailed reasoning logs."""
290
+
291
+ config = load_agent_config()
292
+ llm1_config = config.get("stage2_llm1", {})
293
+
294
+ model = llm1_config.get("model", "Qwen/Qwen2.5-72B-Instruct")
295
+ provider = llm1_config.get("provider", "novita")
296
+
297
+ if log_callback:
298
+ log_callback("")
299
+ log_callback("=" * 55)
300
+ log_callback(f"πŸ€– LLM 1: {model}")
301
+ log_callback("=" * 55)
302
+ log_callback(f" Provider: {provider}")
303
+ log_callback(f" πŸ’° Cost: ${llm1_config.get('cost_per_million_input', 0.29)}/M in, ${llm1_config.get('cost_per_million_output', 0.59)}/M out")
304
+ log_callback(f" πŸ“ Task: Typography, Colors, AA, Spacing analysis")
305
+ log_callback("")
306
+
307
+ # Build prompt
308
+ prompt = build_analyst_prompt(
309
+ tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
310
+ competitors=state["competitors"],
311
+ persona=llm1_config.get("persona", "Senior Design Systems Architect"),
312
+ )
313
+
314
+ try:
315
+ response, in_tokens, out_tokens = await call_llm(
316
+ agent_name="LLM 1 (Qwen)",
317
+ model=model,
318
+ provider=provider,
319
+ prompt=prompt,
320
+ max_tokens=llm1_config.get("max_tokens", 1500),
321
+ temperature=llm1_config.get("temperature", 0.4),
322
+ cost_per_m_input=llm1_config.get("cost_per_million_input", 0.29),
323
+ cost_per_m_output=llm1_config.get("cost_per_million_output", 0.59),
324
+ log_callback=log_callback,
325
+ )
326
+
327
+ # Parse JSON response
328
+ analysis = parse_llm_response(response)
329
+ analysis["_meta"] = {
330
+ "model": model,
331
+ "provider": provider,
332
+ "input_tokens": in_tokens,
333
+ "output_tokens": out_tokens,
334
+ }
335
+
336
+ # Log detailed findings
337
+ if log_callback and not analysis.get("parse_error"):
338
+ log_callback("")
339
+ log_callback(" πŸ“Š LLM 1 FINDINGS:")
340
+ log_callback("")
341
+
342
+ # Typography
343
+ typo = analysis.get("typography", {})
344
+ if isinstance(typo, dict):
345
+ log_callback(" TYPOGRAPHY:")
346
+ log_callback(f" β”œβ”€ Detected ratio: {typo.get('detected_ratio', '?')}")
347
+ log_callback(f" β”œβ”€ Score: {typo.get('score', '?')}/10")
348
+ if typo.get("recommendations"):
349
+ for rec in typo.get("recommendations", [])[:2]:
350
+ log_callback(f" └─ πŸ’‘ {rec[:60]}...")
351
+
352
+ # Colors
353
+ colors = analysis.get("colors", {})
354
+ if isinstance(colors, dict):
355
+ log_callback("")
356
+ log_callback(" COLORS:")
357
+ log_callback(f" β”œβ”€ Score: {colors.get('score', '?')}/10")
358
+ if colors.get("recommendations"):
359
+ for rec in colors.get("recommendations", [])[:2]:
360
+ log_callback(f" └─ πŸ’‘ {rec[:60]}...")
361
+
362
+ # Accessibility
363
+ aa = analysis.get("accessibility", {})
364
+ if isinstance(aa, dict):
365
+ log_callback("")
366
+ log_callback(" ACCESSIBILITY:")
367
+ log_callback(f" β”œβ”€ Score: {aa.get('score', '?')}/10")
368
+ issues = aa.get("issues", [])
369
+ if issues:
370
+ for issue in issues[:2]:
371
+ log_callback(f" └─ ⚠️ {issue[:60]}...")
372
+
373
+ # Top priorities
374
+ priorities = analysis.get("top_3_priorities", [])
375
+ if priorities:
376
+ log_callback("")
377
+ log_callback(" TOP PRIORITIES:")
378
+ for i, p in enumerate(priorities[:3], 1):
379
+ log_callback(f" {i}. {p[:70]}")
380
+
381
+ log_callback("")
382
+ log_callback(f" 🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
383
+
384
+ return {"llm1_analysis": analysis, "llm1_time": time.time()}
385
+
386
+ except Exception as e:
387
+ return {
388
+ "llm1_analysis": {"error": str(e)},
389
+ "errors": state.get("errors", []) + [f"LLM1: {str(e)}"],
390
+ "llm1_time": time.time(),
391
+ }
392
+
393
+
394
+ async def analyze_with_llm2(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
395
+ """LLM 2 (Llama) analysis node with detailed reasoning logs."""
396
+
397
+ config = load_agent_config()
398
+ llm2_config = config.get("stage2_llm2", {})
399
+
400
+ model = llm2_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
401
+ provider = llm2_config.get("provider", "novita")
402
+
403
+ if log_callback:
404
+ log_callback("")
405
+ log_callback("=" * 55)
406
+ log_callback(f"πŸ€– LLM 2: {model}")
407
+ log_callback("=" * 55)
408
+ log_callback(f" Provider: {provider}")
409
+ log_callback(f" πŸ’° Cost: ${llm2_config.get('cost_per_million_input', 0.59)}/M in, ${llm2_config.get('cost_per_million_output', 0.79)}/M out")
410
+ log_callback(f" πŸ“ Task: Typography, Colors, AA, Spacing analysis")
411
+ log_callback("")
412
+
413
+ # Build prompt
414
+ prompt = build_analyst_prompt(
415
+ tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
416
+ competitors=state["competitors"],
417
+ persona=llm2_config.get("persona", "Senior Design Systems Architect"),
418
+ )
419
+
420
+ try:
421
+ response, in_tokens, out_tokens = await call_llm(
422
+ agent_name="LLM 2 (Llama)",
423
+ model=model,
424
+ provider=provider,
425
+ prompt=prompt,
426
+ max_tokens=llm2_config.get("max_tokens", 1500),
427
+ temperature=llm2_config.get("temperature", 0.4),
428
+ cost_per_m_input=llm2_config.get("cost_per_million_input", 0.59),
429
+ cost_per_m_output=llm2_config.get("cost_per_million_output", 0.79),
430
+ log_callback=log_callback,
431
+ )
432
+
433
+ # Parse JSON response
434
+ analysis = parse_llm_response(response)
435
+ analysis["_meta"] = {
436
+ "model": model,
437
+ "provider": provider,
438
+ "input_tokens": in_tokens,
439
+ "output_tokens": out_tokens,
440
+ }
441
+
442
+ # Log detailed findings
443
+ if log_callback and not analysis.get("parse_error"):
444
+ log_callback("")
445
+ log_callback(" πŸ“Š LLM 2 FINDINGS:")
446
+ log_callback("")
447
+
448
+ # Typography
449
+ typo = analysis.get("typography", {})
450
+ if isinstance(typo, dict):
451
+ log_callback(" TYPOGRAPHY:")
452
+ log_callback(f" β”œβ”€ Detected ratio: {typo.get('detected_ratio', '?')}")
453
+ log_callback(f" β”œβ”€ Score: {typo.get('score', '?')}/10")
454
+ if typo.get("recommendations"):
455
+ for rec in typo.get("recommendations", [])[:2]:
456
+ log_callback(f" └─ πŸ’‘ {rec[:60]}...")
457
+
458
+ # Colors
459
+ colors = analysis.get("colors", {})
460
+ if isinstance(colors, dict):
461
+ log_callback("")
462
+ log_callback(" COLORS:")
463
+ log_callback(f" β”œβ”€ Score: {colors.get('score', '?')}/10")
464
+ if colors.get("recommendations"):
465
+ for rec in colors.get("recommendations", [])[:2]:
466
+ log_callback(f" └─ πŸ’‘ {rec[:60]}...")
467
+
468
+ # Accessibility
469
+ aa = analysis.get("accessibility", {})
470
+ if isinstance(aa, dict):
471
+ log_callback("")
472
+ log_callback(" ACCESSIBILITY:")
473
+ log_callback(f" β”œβ”€ Score: {aa.get('score', '?')}/10")
474
+ issues = aa.get("issues", [])
475
+ if issues:
476
+ for issue in issues[:2]:
477
+ log_callback(f" └─ ⚠️ {issue[:60]}...")
478
+
479
+ # Top priorities
480
+ priorities = analysis.get("top_3_priorities", [])
481
+ if priorities:
482
+ log_callback("")
483
+ log_callback(" TOP PRIORITIES:")
484
+ for i, p in enumerate(priorities[:3], 1):
485
+ log_callback(f" {i}. {p[:70]}")
486
+
487
+ log_callback("")
488
+ log_callback(f" 🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
489
+
490
+ return {"llm2_analysis": analysis, "llm2_time": time.time()}
491
+
492
+ except Exception as e:
493
+ return {
494
+ "llm2_analysis": {"error": str(e)},
495
+ "errors": state.get("errors", []) + [f"LLM2: {str(e)}"],
496
+ "llm2_time": time.time(),
497
+ }
498
+
499
+
500
+ def run_rule_engine(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
501
+ """Rule engine node (no LLM, always runs)."""
502
+
503
+ if log_callback:
504
+ log_callback("")
505
+ log_callback("βš™οΈ Rule Engine: Running calculations...")
506
+ log_callback(" πŸ’° Cost: FREE (no LLM)")
507
+
508
+ start = time.time()
509
+
510
+ # Calculate type scale options
511
+ base_size = detect_base_font_size(state["desktop_tokens"])
512
+ type_scales = {
513
+ "1.2": generate_type_scale(base_size, 1.2),
514
+ "1.25": generate_type_scale(base_size, 1.25),
515
+ "1.333": generate_type_scale(base_size, 1.333),
516
+ }
517
+
518
+ # Calculate spacing options
519
+ spacing_options = {
520
+ "4px": generate_spacing_scale(4),
521
+ "8px": generate_spacing_scale(8),
522
+ }
523
+
524
+ # Generate color ramps for each base color
525
+ from core.color_utils import generate_color_ramp
526
+
527
+ color_ramps = {}
528
+ colors = state["desktop_tokens"].get("colors", {})
529
+ for name, color in list(colors.items())[:8]:
530
+ hex_val = color.get("value") if isinstance(color, dict) else str(color)
531
+ try:
532
+ color_ramps[name] = generate_color_ramp(hex_val)
533
+ except:
534
+ pass
535
+
536
+ duration = time.time() - start
537
+
538
+ if log_callback:
539
+ log_callback(f" βœ… Rule Engine: Complete ({duration:.2f}s)")
540
+ log_callback(f" Generated: {len(type_scales)} type scales, {len(spacing_options)} spacing grids, {len(color_ramps)} color ramps")
541
+
542
+ return {
543
+ "rule_calculations": {
544
+ "base_font_size": base_size,
545
+ "type_scales": type_scales,
546
+ "spacing_options": spacing_options,
547
+ "color_ramps": color_ramps,
548
+ }
549
+ }
550
+
551
+
552
+ async def compile_with_head(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
553
+ """HEAD compiler node with detailed synthesis logging."""
554
+
555
+ config = load_agent_config()
556
+ head_config = config.get("stage2_head", {})
557
+
558
+ model = head_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
559
+ provider = head_config.get("provider", "novita")
560
+
561
+ if log_callback:
562
+ log_callback("")
563
+ log_callback("=" * 60)
564
+ log_callback("🧠 HEAD COMPILER: Synthesizing results...")
565
+ log_callback("=" * 60)
566
+ log_callback(f" Model: {model}")
567
+ log_callback(f" Provider: {provider}")
568
+ log_callback(f" πŸ’° Cost: ${head_config.get('cost_per_million_input', 0.59)}/M in, ${head_config.get('cost_per_million_output', 0.79)}/M out")
569
+ log_callback("")
570
+ log_callback(" πŸ“₯ INPUT: Analyzing outputs from LLM 1 + LLM 2 + Rules...")
571
+
572
+ # Build HEAD prompt
573
+ prompt = build_head_prompt(
574
+ llm1_analysis=state.get("llm1_analysis", {}),
575
+ llm2_analysis=state.get("llm2_analysis", {}),
576
+ rule_calculations=state.get("rule_calculations", {}),
577
+ )
578
+
579
+ try:
580
+ response, in_tokens, out_tokens = await call_llm(
581
+ agent_name="HEAD",
582
+ model=model,
583
+ provider=provider,
584
+ prompt=prompt,
585
+ max_tokens=head_config.get("max_tokens", 2000),
586
+ temperature=head_config.get("temperature", 0.3),
587
+ cost_per_m_input=head_config.get("cost_per_million_input", 0.59),
588
+ cost_per_m_output=head_config.get("cost_per_million_output", 0.79),
589
+ log_callback=log_callback,
590
+ )
591
+
592
+ # Parse response
593
+ recommendations = parse_llm_response(response)
594
+ recommendations["_meta"] = {
595
+ "model": model,
596
+ "provider": provider,
597
+ "input_tokens": in_tokens,
598
+ "output_tokens": out_tokens,
599
+ }
600
+
601
+ # Add cost summary
602
+ recommendations["cost_summary"] = cost_tracker.to_dict()
603
+
604
+ # Log detailed HEAD findings
605
+ if log_callback and not recommendations.get("parse_error"):
606
+ log_callback("")
607
+ log_callback(" πŸ“Š HEAD SYNTHESIS:")
608
+ log_callback("")
609
+
610
+ # Agreements
611
+ agreements = recommendations.get("agreements", [])
612
+ if agreements:
613
+ log_callback(" βœ… AGREEMENTS (High Confidence):")
614
+ for a in agreements[:3]:
615
+ topic = a.get("topic", "?") if isinstance(a, dict) else str(a)[:30]
616
+ finding = a.get("finding", "")[:50] if isinstance(a, dict) else ""
617
+ log_callback(f" β”œβ”€ {topic}: {finding}...")
618
+
619
+ # Disagreements
620
+ disagreements = recommendations.get("disagreements", [])
621
+ if disagreements:
622
+ log_callback("")
623
+ log_callback(" πŸ”„ DISAGREEMENTS (Resolved):")
624
+ for d in disagreements[:3]:
625
+ if isinstance(d, dict):
626
+ topic = d.get("topic", "?")
627
+ resolution = d.get("resolution", "")[:60]
628
+ log_callback(f" β”œβ”€ {topic}: {resolution}...")
629
+
630
+ # Final recommendations
631
+ final_recs = recommendations.get("final_recommendations", {})
632
+ if final_recs:
633
+ log_callback("")
634
+ log_callback(" πŸ“‹ FINAL RECOMMENDATIONS:")
635
+ log_callback(f" β”œβ”€ Type Scale: {final_recs.get('type_scale', '?')}")
636
+ log_callback(f" β”œβ”€ Spacing: {final_recs.get('spacing_base', '?')}")
637
+ if final_recs.get("color_improvements"):
638
+ log_callback(f" β”œβ”€ Colors: {final_recs['color_improvements'][0][:50]}...")
639
+ if final_recs.get("accessibility_fixes"):
640
+ log_callback(f" └─ AA Fixes: {final_recs['accessibility_fixes'][0][:50]}...")
641
+
642
+ # Summary
643
+ if recommendations.get("summary"):
644
+ log_callback("")
645
+ log_callback(" πŸ“ SUMMARY:")
646
+ summary = recommendations["summary"][:150]
647
+ log_callback(f" {summary}...")
648
+
649
+ log_callback("")
650
+ log_callback(f" 🎯 OVERALL CONFIDENCE: {recommendations.get('overall_confidence', '?')}%")
651
+
652
+ if log_callback:
653
+ log_callback("")
654
+ log_callback("=" * 60)
655
+ log_callback(f"πŸ’° TOTAL ESTIMATED COST: ${cost_tracker.total_cost:.4f}")
656
+ log_callback(f" (Free tier: $0.10/mo | Pro: $2/mo)")
657
+ log_callback("=" * 60)
658
+
659
+ return {
660
+ "final_recommendations": recommendations,
661
+ "cost_tracking": cost_tracker.to_dict(),
662
+ "head_time": time.time(),
663
+ }
664
+
665
+ except Exception as e:
666
+ if log_callback:
667
+ log_callback(f" ❌ HEAD Error: {str(e)}")
668
+
669
+ # Fallback to rule-based recommendations
670
+ return {
671
+ "final_recommendations": build_fallback_recommendations(state),
672
+ "errors": state.get("errors", []) + [f"HEAD: {str(e)}"],
673
+ "head_time": time.time(),
674
+ }
675
+
676
+
677
+ # =============================================================================
678
+ # HELPER FUNCTIONS
679
+ # =============================================================================
680
+
681
+ def summarize_tokens(desktop: dict, mobile: dict) -> str:
682
+ """Create a summary of tokens for the prompt."""
683
+ lines = []
684
+
685
+ # Colors
686
+ colors = desktop.get("colors", {})
687
+ lines.append(f"### Colors ({len(colors)} detected)")
688
+ for name, c in list(colors.items())[:5]:
689
+ val = c.get("value") if isinstance(c, dict) else str(c)
690
+ lines.append(f"- {name}: {val}")
691
+
692
+ # Typography Desktop
693
+ typo = desktop.get("typography", {})
694
+ lines.append(f"\n### Typography Desktop ({len(typo)} styles)")
695
+ for name, t in list(typo.items())[:5]:
696
+ if isinstance(t, dict):
697
+ lines.append(f"- {name}: {t.get('font_size', '?')} / {t.get('font_weight', '?')}")
698
+
699
+ # Typography Mobile
700
+ mobile_typo = mobile.get("typography", {})
701
+ lines.append(f"\n### Typography Mobile ({len(mobile_typo)} styles)")
702
+
703
+ # Spacing
704
+ spacing = desktop.get("spacing", {})
705
+ lines.append(f"\n### Spacing ({len(spacing)} values)")
706
+
707
+ return "\n".join(lines)
708
+
709
+
710
+ def build_analyst_prompt(tokens_summary: str, competitors: list[str], persona: str) -> str:
711
+ """Build prompt for analyst LLMs."""
712
+ return f"""You are a {persona}.
713
+
714
+ ## YOUR TASK
715
+ Analyze these design tokens extracted from a website and compare against industry best practices.
716
+
717
+ ## EXTRACTED TOKENS
718
+ {tokens_summary}
719
+
720
+ ## COMPETITOR DESIGN SYSTEMS TO RESEARCH
721
+ {', '.join(competitors)}
722
+
723
+ ## ANALYZE THE FOLLOWING:
724
+
725
+ ### 1. Typography
726
+ - Is the type scale consistent? Does it follow a mathematical ratio?
727
+ - What is the detected base size?
728
+ - Compare to competitors: what ratios do they use?
729
+ - Score (1-10) and specific recommendations
730
+
731
+ ### 2. Colors
732
+ - Is the color palette cohesive?
733
+ - Are semantic colors properly defined (primary, secondary, etc.)?
734
+ - Score (1-10) and specific recommendations
735
+
736
+ ### 3. Accessibility (AA Compliance)
737
+ - What contrast issues might exist?
738
+ - Score (1-10)
739
+
740
+ ### 4. Spacing
741
+ - Is spacing consistent? Does it follow a grid (4px, 8px)?
742
+ - Score (1-10) and specific recommendations
743
+
744
+ ### 5. Overall Assessment
745
+ - Top 3 priorities for improvement
746
+
747
+ ## RESPOND IN JSON FORMAT ONLY:
748
+ ```json
749
+ {{
750
+ "typography": {{"analysis": "...", "detected_ratio": 1.2, "score": 7, "recommendations": ["..."]}},
751
+ "colors": {{"analysis": "...", "score": 6, "recommendations": ["..."]}},
752
+ "accessibility": {{"issues": ["..."], "score": 5}},
753
+ "spacing": {{"analysis": "...", "detected_base": 8, "score": 7, "recommendations": ["..."]}},
754
+ "top_3_priorities": ["...", "...", "..."],
755
+ "confidence": 85
756
+ }}
757
+ ```"""
758
+
759
+
760
+ def build_head_prompt(llm1_analysis: dict, llm2_analysis: dict, rule_calculations: dict) -> str:
761
+ """Build prompt for HEAD compiler."""
762
+ return f"""You are a Principal Design Systems Architect compiling analyses from two expert analysts.
763
+
764
+ ## ANALYST 1 FINDINGS:
765
+ {json.dumps(llm1_analysis, indent=2, default=str)[:2000]}
766
+
767
+ ## ANALYST 2 FINDINGS:
768
+ {json.dumps(llm2_analysis, indent=2, default=str)[:2000]}
769
+
770
+ ## RULE-BASED CALCULATIONS:
771
+ - Base font size: {rule_calculations.get('base_font_size', 16)}px
772
+ - Type scale options: 1.2, 1.25, 1.333
773
+ - Spacing options: 4px grid, 8px grid
774
+
775
+ ## YOUR TASK:
776
+ 1. Compare both analyst perspectives
777
+ 2. Identify agreements and disagreements
778
+ 3. Synthesize final recommendations
779
+
780
+ ## RESPOND IN JSON FORMAT ONLY:
781
+ ```json
782
+ {{
783
+ "agreements": [{{"topic": "...", "finding": "..."}}],
784
+ "disagreements": [{{"topic": "...", "resolution": "..."}}],
785
+ "final_recommendations": {{
786
+ "type_scale": "1.25",
787
+ "type_scale_rationale": "...",
788
+ "spacing_base": "8px",
789
+ "spacing_rationale": "...",
790
+ "color_improvements": ["..."],
791
+ "accessibility_fixes": ["..."]
792
+ }},
793
+ "overall_confidence": 85,
794
+ "summary": "..."
795
+ }}
796
+ ```"""
797
+
798
+
799
+ def parse_llm_response(response: str) -> dict:
800
+ """Parse JSON from LLM response."""
801
+ try:
802
+ # Try to extract JSON from markdown code block
803
+ if "```json" in response:
804
+ start = response.find("```json") + 7
805
+ end = response.find("```", start)
806
+ json_str = response[start:end].strip()
807
+ elif "```" in response:
808
+ start = response.find("```") + 3
809
+ end = response.find("```", start)
810
+ json_str = response[start:end].strip()
811
+ else:
812
+ json_str = response.strip()
813
+
814
+ return json.loads(json_str)
815
+ except:
816
+ return {"raw_response": response[:500], "parse_error": True}
817
+
818
+
819
+ def detect_base_font_size(tokens: dict) -> int:
820
+ """Detect base font size from typography tokens."""
821
+ typography = tokens.get("typography", {})
822
+
823
+ sizes = []
824
+ for t in typography.values():
825
+ if isinstance(t, dict):
826
+ size_str = str(t.get("font_size", "16px"))
827
+ try:
828
+ size = float(size_str.replace("px", "").replace("rem", "").replace("em", ""))
829
+ if 14 <= size <= 18:
830
+ sizes.append(size)
831
+ except:
832
+ pass
833
+
834
+ if sizes:
835
+ return int(max(set(sizes), key=sizes.count))
836
+ return 16
837
+
838
+
839
+ def generate_type_scale(base: int, ratio: float) -> list[int]:
840
+ """Generate type scale from base and ratio."""
841
+ # 13 levels: display.2xl down to overline
842
+ scales = []
843
+ for i in range(8, -5, -1):
844
+ size = base * (ratio ** i)
845
+ # Round to even
846
+ scales.append(int(round(size / 2) * 2))
847
+ return scales
848
+
849
+
850
+ def generate_spacing_scale(base: int) -> list[int]:
851
+ """Generate spacing scale from base."""
852
+ return [base * i for i in range(0, 17)]
853
+
854
+
855
+ def build_fallback_recommendations(state: Stage2State) -> dict:
856
+ """Build fallback recommendations if HEAD fails."""
857
+ rule_calc = state.get("rule_calculations", {})
858
+
859
+ return {
860
+ "final_recommendations": {
861
+ "type_scale": "1.25",
862
+ "type_scale_rationale": "Major Third (1.25) is industry standard",
863
+ "spacing_base": "8px",
864
+ "spacing_rationale": "8px grid provides good visual rhythm",
865
+ "color_improvements": ["Generate full ramps (50-950)"],
866
+ "accessibility_fixes": ["Review contrast ratios"],
867
+ },
868
+ "overall_confidence": 60,
869
+ "summary": "Recommendations based on rule-based analysis (LLM unavailable)",
870
+ "fallback": True,
871
+ }
872
+
873
+
874
+ # =============================================================================
875
+ # WORKFLOW BUILDER
876
+ # =============================================================================
877
+
878
+ def build_stage2_workflow():
879
+ """Build the LangGraph workflow for Stage 2."""
880
+
881
+ workflow = StateGraph(Stage2State)
882
+
883
+ # Add nodes
884
+ workflow.add_node("llm1_analyst", analyze_with_llm1)
885
+ workflow.add_node("llm2_analyst", analyze_with_llm2)
886
+ workflow.add_node("rule_engine", run_rule_engine)
887
+ workflow.add_node("head_compiler", compile_with_head)
888
+
889
+ # Parallel execution from START
890
+ workflow.add_edge(START, "llm1_analyst")
891
+ workflow.add_edge(START, "llm2_analyst")
892
+ workflow.add_edge(START, "rule_engine")
893
+
894
+ # All converge to HEAD
895
+ workflow.add_edge("llm1_analyst", "head_compiler")
896
+ workflow.add_edge("llm2_analyst", "head_compiler")
897
+ workflow.add_edge("rule_engine", "head_compiler")
898
+
899
+ # HEAD to END
900
+ workflow.add_edge("head_compiler", END)
901
+
902
+ return workflow.compile()
903
+
904
+
905
+ # =============================================================================
906
+ # MAIN RUNNER
907
+ # =============================================================================
908
+
909
+ async def run_stage2_multi_agent(
910
+ desktop_tokens: dict,
911
+ mobile_tokens: dict,
912
+ competitors: list[str],
913
+ log_callback: Optional[Callable] = None,
914
+ ) -> dict:
915
+ """Run the Stage 2 multi-agent analysis."""
916
+
917
+ global cost_tracker
918
+ cost_tracker = CostTracker() # Reset
919
+
920
+ if log_callback:
921
+ log_callback("")
922
+ log_callback("=" * 60)
923
+ log_callback("🧠 STAGE 2: MULTI-AGENT ANALYSIS")
924
+ log_callback("=" * 60)
925
+ log_callback("")
926
+ log_callback("πŸ“¦ LLM CONFIGURATION:")
927
+
928
+ config = load_agent_config()
929
+
930
+ for agent_key in ["stage2_llm1", "stage2_llm2", "stage2_head"]:
931
+ agent = config.get(agent_key, {})
932
+ log_callback(f"β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”")
933
+ log_callback(f"β”‚ {agent.get('name', agent_key)}")
934
+ log_callback(f"β”‚ Model: {agent.get('model', 'Unknown')}")
935
+ log_callback(f"β”‚ Provider: {agent.get('provider', 'novita')}")
936
+ log_callback(f"β”‚ πŸ’° Cost: ${agent.get('cost_per_million_input', 0.5)}/M in, ${agent.get('cost_per_million_output', 0.5)}/M out")
937
+ log_callback(f"β”‚ Task: {', '.join(agent.get('tasks', [])[:2])}")
938
+ log_callback(f"β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜")
939
+
940
+ log_callback("")
941
+ log_callback("πŸ”„ RUNNING PARALLEL ANALYSIS...")
942
+
943
+ # Initial state
944
+ initial_state = {
945
+ "desktop_tokens": desktop_tokens,
946
+ "mobile_tokens": mobile_tokens,
947
+ "competitors": competitors,
948
+ "llm1_analysis": None,
949
+ "llm2_analysis": None,
950
+ "rule_calculations": None,
951
+ "final_recommendations": None,
952
+ "analysis_log": [],
953
+ "cost_tracking": {},
954
+ "errors": [],
955
+ "start_time": time.time(),
956
+ "llm1_time": 0,
957
+ "llm2_time": 0,
958
+ "head_time": 0,
959
+ }
960
+
961
+ # Run parallel analysis
962
+ try:
963
+ # Run LLM1, LLM2, and Rules in parallel
964
+ results = await asyncio.gather(
965
+ analyze_with_llm1(initial_state, log_callback),
966
+ analyze_with_llm2(initial_state, log_callback),
967
+ asyncio.to_thread(run_rule_engine, initial_state, log_callback),
968
+ return_exceptions=True,
969
+ )
970
+
971
+ # Merge results
972
+ for result in results:
973
+ if isinstance(result, dict):
974
+ initial_state.update(result)
975
+ elif isinstance(result, Exception):
976
+ initial_state["errors"].append(str(result))
977
+
978
+ # Run HEAD compiler
979
+ head_result = await compile_with_head(initial_state, log_callback)
980
+ initial_state.update(head_result)
981
+
982
+ return initial_state
983
+
984
+ except Exception as e:
985
+ if log_callback:
986
+ log_callback(f"❌ Workflow error: {str(e)}")
987
+
988
+ initial_state["errors"].append(str(e))
989
+ initial_state["final_recommendations"] = build_fallback_recommendations(initial_state)
990
+ return initial_state