Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

riazmo commited on Jan 25

Commit

a19099e

verified ·

1 Parent(s): 43ee65d

Upload stage2_graph.py

Browse files

Files changed (1) hide show

agents/stage2_graph.py +990 -0

agents/stage2_graph.py ADDED Viewed

	@@ -0,0 +1,990 @@

+"""
+Stage 2 Multi-Agent Analysis Workflow (LangGraph)
+Architecture:
+    ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
+    │   LLM 1     │     │   LLM 2     │     │ Rule Engine │
+    │  (Qwen)     │     │  (Llama)    │     │  (No LLM)   │
+    └──────┬──────┘     └──────┬──────┘     └──────┬──────┘
+           │                   │                   │
+           │     PARALLEL      │                   │
+           └───────────────────┼───────────────────┘
+                               │
+                               ▼
+                    ┌─────────────────┐
+                    │      HEAD       │
+                    │   (Compiler)    │
+                    └─────────────────┘
+"""
+import asyncio
+import json
+import os
+import time
+import yaml
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Callable, Optional
+from langgraph.graph import END, START, StateGraph
+from typing_extensions import TypedDict
+# =============================================================================
+# CONFIGURATION LOADING
+# =============================================================================
+def load_agent_config() -> dict:
+    """Load agent configuration from YAML."""
+    config_path = os.path.join(os.path.dirname(__file__), "..", "config", "agents.yaml")
+    if os.path.exists(config_path):
+        with open(config_path, 'r') as f:
+            return yaml.safe_load(f)
+    return {}
+# =============================================================================
+# STATE DEFINITION
+# =============================================================================
+class Stage2State(TypedDict):
+    """State for Stage 2 multi-agent analysis."""
+    # Inputs
+    desktop_tokens: dict
+    mobile_tokens: dict
+    competitors: list[str]
+    # Parallel analysis outputs
+    llm1_analysis: Optional[dict]
+    llm2_analysis: Optional[dict]
+    rule_calculations: Optional[dict]
+    # HEAD output
+    final_recommendations: Optional[dict]
+    # Metadata
+    analysis_log: list[str]
+    cost_tracking: dict
+    errors: list[str]
+    # Timing
+    start_time: float
+    llm1_time: float
+    llm2_time: float
+    head_time: float
+# =============================================================================
+# COST TRACKING
+# =============================================================================
+@dataclass
+class CostTracker:
+    """Track LLM costs during analysis."""
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    total_cost: float = 0.0
+    calls: list = field(default_factory=list)
+    def add_call(self, agent_name: str, model: str, input_tokens: int, output_tokens: int,
+                 cost_per_m_input: float, cost_per_m_output: float, duration: float):
+        """Record an LLM call."""
+        input_cost = (input_tokens / 1_000_000) * cost_per_m_input
+        output_cost = (output_tokens / 1_000_000) * cost_per_m_output
+        total_cost = input_cost + output_cost
+        self.total_input_tokens += input_tokens
+        self.total_output_tokens += output_tokens
+        self.total_cost += total_cost
+        self.calls.append({
+            "agent": agent_name,
+            "model": model,
+            "input_tokens": input_tokens,
+            "output_tokens": output_tokens,
+            "cost": total_cost,
+            "duration": duration,
+        })
+    def to_dict(self) -> dict:
+        return {
+            "total_input_tokens": self.total_input_tokens,
+            "total_output_tokens": self.total_output_tokens,
+            "total_cost": round(self.total_cost, 6),
+            "calls": self.calls,
+        }
+# Global cost tracker
+cost_tracker = CostTracker()
+# =============================================================================
+# LLM CLIENT
+# =============================================================================
+async def call_llm(
+    agent_name: str,
+    model: str,
+    provider: str,
+    prompt: str,
+    max_tokens: int = 1500,
+    temperature: float = 0.4,
+    cost_per_m_input: float = 0.5,
+    cost_per_m_output: float = 0.5,
+    log_callback: Optional[Callable] = None,
+) -> tuple[str, int, int]:
+    """Call LLM via HuggingFace Inference Providers."""
+    start_time = time.time()
+    if log_callback:
+        log_callback(f"   🚀 {agent_name}: Calling {model} via {provider}...")
+    try:
+        from huggingface_hub import InferenceClient
+        hf_token = os.environ.get("HF_TOKEN")
+        if not hf_token:
+            raise ValueError("HF_TOKEN not set")
+        # Initialize client with provider
+        # Provider is set at client level, not per-call
+        client = InferenceClient(
+            token=hf_token,
+            provider=provider,
+        )
+        # Call without provider argument (it's set at client level)
+        response = client.chat_completion(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=max_tokens,
+            temperature=temperature,
+        )
+        # Extract response
+        content = response.choices[0].message.content
+        # Estimate tokens (rough)
+        input_tokens = len(prompt.split()) * 1.3  # Rough estimate
+        output_tokens = len(content.split()) * 1.3
+        duration = time.time() - start_time
+        # Track cost
+        cost_tracker.add_call(
+            agent_name=agent_name,
+            model=model,
+            input_tokens=int(input_tokens),
+            output_tokens=int(output_tokens),
+            cost_per_m_input=cost_per_m_input,
+            cost_per_m_output=cost_per_m_output,
+            duration=duration,
+        )
+        if log_callback:
+            est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
+                       (output_tokens / 1_000_000) * cost_per_m_output)
+            log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
+            log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
+        return content, int(input_tokens), int(output_tokens)
+    except TypeError as e:
+        # Fallback: If provider argument not supported, try model:provider format
+        if "provider" in str(e):
+            if log_callback:
+                log_callback(f"   ⚠️ {agent_name}: Trying model:provider format...")
+            from huggingface_hub import InferenceClient
+            hf_token = os.environ.get("HF_TOKEN")
+            client = InferenceClient(token=hf_token)
+            # Try appending provider to model name
+            model_with_provider = f"{model}:{provider}"
+            try:
+                response = client.chat_completion(
+                    model=model_with_provider,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                )
+                content = response.choices[0].message.content
+                input_tokens = len(prompt.split()) * 1.3
+                output_tokens = len(content.split()) * 1.3
+                duration = time.time() - start_time
+                cost_tracker.add_call(
+                    agent_name=agent_name,
+                    model=model,
+                    input_tokens=int(input_tokens),
+                    output_tokens=int(output_tokens),
+                    cost_per_m_input=cost_per_m_input,
+                    cost_per_m_output=cost_per_m_output,
+                    duration=duration,
+                )
+                if log_callback:
+                    est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
+                               (output_tokens / 1_000_000) * cost_per_m_output)
+                    log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
+                    log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
+                return content, int(input_tokens), int(output_tokens)
+            except Exception as e2:
+                # Final fallback: Try without provider
+                if log_callback:
+                    log_callback(f"   ⚠️ {agent_name}: Trying without provider...")
+                response = client.chat_completion(
+                    model=model,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                )
+                content = response.choices[0].message.content
+                input_tokens = len(prompt.split()) * 1.3
+                output_tokens = len(content.split()) * 1.3
+                duration = time.time() - start_time
+                cost_tracker.add_call(
+                    agent_name=agent_name,
+                    model=model,
+                    input_tokens=int(input_tokens),
+                    output_tokens=int(output_tokens),
+                    cost_per_m_input=cost_per_m_input,
+                    cost_per_m_output=cost_per_m_output,
+                    duration=duration,
+                )
+                if log_callback:
+                    est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
+                               (output_tokens / 1_000_000) * cost_per_m_output)
+                    log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
+                    log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
+                return content, int(input_tokens), int(output_tokens)
+        else:
+            raise
+    except Exception as e:
+        duration = time.time() - start_time
+        if log_callback:
+            log_callback(f"   ❌ {agent_name}: Error after {duration:.1f}s - {str(e)}")
+        raise
+# =============================================================================
+# ANALYSIS NODES
+# =============================================================================
+async def analyze_with_llm1(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
+    """LLM 1 (Qwen) analysis node with detailed reasoning logs."""
+    config = load_agent_config()
+    llm1_config = config.get("stage2_llm1", {})
+    model = llm1_config.get("model", "Qwen/Qwen2.5-72B-Instruct")
+    provider = llm1_config.get("provider", "novita")
+    if log_callback:
+        log_callback("")
+        log_callback("=" * 55)
+        log_callback(f"🤖 LLM 1: {model}")
+        log_callback("=" * 55)
+        log_callback(f"   Provider: {provider}")
+        log_callback(f"   💰 Cost: ${llm1_config.get('cost_per_million_input', 0.29)}/M in, ${llm1_config.get('cost_per_million_output', 0.59)}/M out")
+        log_callback(f"   📝 Task: Typography, Colors, AA, Spacing analysis")
+        log_callback("")
+    # Build prompt
+    prompt = build_analyst_prompt(
+        tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
+        competitors=state["competitors"],
+        persona=llm1_config.get("persona", "Senior Design Systems Architect"),
+    )
+    try:
+        response, in_tokens, out_tokens = await call_llm(
+            agent_name="LLM 1 (Qwen)",
+            model=model,
+            provider=provider,
+            prompt=prompt,
+            max_tokens=llm1_config.get("max_tokens", 1500),
+            temperature=llm1_config.get("temperature", 0.4),
+            cost_per_m_input=llm1_config.get("cost_per_million_input", 0.29),
+            cost_per_m_output=llm1_config.get("cost_per_million_output", 0.59),
+            log_callback=log_callback,
+        )
+        # Parse JSON response
+        analysis = parse_llm_response(response)
+        analysis["_meta"] = {
+            "model": model,
+            "provider": provider,
+            "input_tokens": in_tokens,
+            "output_tokens": out_tokens,
+        }
+        # Log detailed findings
+        if log_callback and not analysis.get("parse_error"):
+            log_callback("")
+            log_callback("   📊 LLM 1 FINDINGS:")
+            log_callback("")
+            # Typography
+            typo = analysis.get("typography", {})
+            if isinstance(typo, dict):
+                log_callback("   TYPOGRAPHY:")
+                log_callback(f"   ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
+                log_callback(f"   ├─ Score: {typo.get('score', '?')}/10")
+                if typo.get("recommendations"):
+                    for rec in typo.get("recommendations", [])[:2]:
+                        log_callback(f"   └─ 💡 {rec[:60]}...")
+            # Colors
+            colors = analysis.get("colors", {})
+            if isinstance(colors, dict):
+                log_callback("")
+                log_callback("   COLORS:")
+                log_callback(f"   ├─ Score: {colors.get('score', '?')}/10")
+                if colors.get("recommendations"):
+                    for rec in colors.get("recommendations", [])[:2]:
+                        log_callback(f"   └─ 💡 {rec[:60]}...")
+            # Accessibility
+            aa = analysis.get("accessibility", {})
+            if isinstance(aa, dict):
+                log_callback("")
+                log_callback("   ACCESSIBILITY:")
+                log_callback(f"   ├─ Score: {aa.get('score', '?')}/10")
+                issues = aa.get("issues", [])
+                if issues:
+                    for issue in issues[:2]:
+                        log_callback(f"   └─ ⚠️ {issue[:60]}...")
+            # Top priorities
+            priorities = analysis.get("top_3_priorities", [])
+            if priorities:
+                log_callback("")
+                log_callback("   TOP PRIORITIES:")
+                for i, p in enumerate(priorities[:3], 1):
+                    log_callback(f"   {i}. {p[:70]}")
+            log_callback("")
+            log_callback(f"   🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
+        return {"llm1_analysis": analysis, "llm1_time": time.time()}
+    except Exception as e:
+        return {
+            "llm1_analysis": {"error": str(e)},
+            "errors": state.get("errors", []) + [f"LLM1: {str(e)}"],
+            "llm1_time": time.time(),
+        }
+async def analyze_with_llm2(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
+    """LLM 2 (Llama) analysis node with detailed reasoning logs."""
+    config = load_agent_config()
+    llm2_config = config.get("stage2_llm2", {})
+    model = llm2_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
+    provider = llm2_config.get("provider", "novita")
+    if log_callback:
+        log_callback("")
+        log_callback("=" * 55)
+        log_callback(f"🤖 LLM 2: {model}")
+        log_callback("=" * 55)
+        log_callback(f"   Provider: {provider}")
+        log_callback(f"   💰 Cost: ${llm2_config.get('cost_per_million_input', 0.59)}/M in, ${llm2_config.get('cost_per_million_output', 0.79)}/M out")
+        log_callback(f"   📝 Task: Typography, Colors, AA, Spacing analysis")
+        log_callback("")
+    # Build prompt
+    prompt = build_analyst_prompt(
+        tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
+        competitors=state["competitors"],
+        persona=llm2_config.get("persona", "Senior Design Systems Architect"),
+    )
+    try:
+        response, in_tokens, out_tokens = await call_llm(
+            agent_name="LLM 2 (Llama)",
+            model=model,
+            provider=provider,
+            prompt=prompt,
+            max_tokens=llm2_config.get("max_tokens", 1500),
+            temperature=llm2_config.get("temperature", 0.4),
+            cost_per_m_input=llm2_config.get("cost_per_million_input", 0.59),
+            cost_per_m_output=llm2_config.get("cost_per_million_output", 0.79),
+            log_callback=log_callback,
+        )
+        # Parse JSON response
+        analysis = parse_llm_response(response)
+        analysis["_meta"] = {
+            "model": model,
+            "provider": provider,
+            "input_tokens": in_tokens,
+            "output_tokens": out_tokens,
+        }
+        # Log detailed findings
+        if log_callback and not analysis.get("parse_error"):
+            log_callback("")
+            log_callback("   📊 LLM 2 FINDINGS:")
+            log_callback("")
+            # Typography
+            typo = analysis.get("typography", {})
+            if isinstance(typo, dict):
+                log_callback("   TYPOGRAPHY:")
+                log_callback(f"   ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
+                log_callback(f"   ├─ Score: {typo.get('score', '?')}/10")
+                if typo.get("recommendations"):
+                    for rec in typo.get("recommendations", [])[:2]:
+                        log_callback(f"   └─ 💡 {rec[:60]}...")
+            # Colors
+            colors = analysis.get("colors", {})
+            if isinstance(colors, dict):
+                log_callback("")
+                log_callback("   COLORS:")
+                log_callback(f"   ├─ Score: {colors.get('score', '?')}/10")
+                if colors.get("recommendations"):
+                    for rec in colors.get("recommendations", [])[:2]:
+                        log_callback(f"   └─ 💡 {rec[:60]}...")
+            # Accessibility
+            aa = analysis.get("accessibility", {})
+            if isinstance(aa, dict):
+                log_callback("")
+                log_callback("   ACCESSIBILITY:")
+                log_callback(f"   ├─ Score: {aa.get('score', '?')}/10")
+                issues = aa.get("issues", [])
+                if issues:
+                    for issue in issues[:2]:
+                        log_callback(f"   └─ ⚠️ {issue[:60]}...")
+            # Top priorities
+            priorities = analysis.get("top_3_priorities", [])
+            if priorities:
+                log_callback("")
+                log_callback("   TOP PRIORITIES:")
+                for i, p in enumerate(priorities[:3], 1):
+                    log_callback(f"   {i}. {p[:70]}")
+            log_callback("")
+            log_callback(f"   🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
+        return {"llm2_analysis": analysis, "llm2_time": time.time()}
+    except Exception as e:
+        return {
+            "llm2_analysis": {"error": str(e)},
+            "errors": state.get("errors", []) + [f"LLM2: {str(e)}"],
+            "llm2_time": time.time(),
+        }
+def run_rule_engine(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
+    """Rule engine node (no LLM, always runs)."""
+    if log_callback:
+        log_callback("")
+        log_callback("⚙️ Rule Engine: Running calculations...")
+        log_callback("   💰 Cost: FREE (no LLM)")
+    start = time.time()
+    # Calculate type scale options
+    base_size = detect_base_font_size(state["desktop_tokens"])
+    type_scales = {
+        "1.2": generate_type_scale(base_size, 1.2),
+        "1.25": generate_type_scale(base_size, 1.25),
+        "1.333": generate_type_scale(base_size, 1.333),
+    }
+    # Calculate spacing options
+    spacing_options = {
+        "4px": generate_spacing_scale(4),
+        "8px": generate_spacing_scale(8),
+    }
+    # Generate color ramps for each base color
+    from core.color_utils import generate_color_ramp
+    color_ramps = {}
+    colors = state["desktop_tokens"].get("colors", {})
+    for name, color in list(colors.items())[:8]:
+        hex_val = color.get("value") if isinstance(color, dict) else str(color)
+        try:
+            color_ramps[name] = generate_color_ramp(hex_val)
+        except:
+            pass
+    duration = time.time() - start
+    if log_callback:
+        log_callback(f"   ✅ Rule Engine: Complete ({duration:.2f}s)")
+        log_callback(f"      Generated: {len(type_scales)} type scales, {len(spacing_options)} spacing grids, {len(color_ramps)} color ramps")
+    return {
+        "rule_calculations": {
+            "base_font_size": base_size,
+            "type_scales": type_scales,
+            "spacing_options": spacing_options,
+            "color_ramps": color_ramps,
+        }
+    }
+async def compile_with_head(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
+    """HEAD compiler node with detailed synthesis logging."""
+    config = load_agent_config()
+    head_config = config.get("stage2_head", {})
+    model = head_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
+    provider = head_config.get("provider", "novita")
+    if log_callback:
+        log_callback("")
+        log_callback("=" * 60)
+        log_callback("🧠 HEAD COMPILER: Synthesizing results...")
+        log_callback("=" * 60)
+        log_callback(f"   Model: {model}")
+        log_callback(f"   Provider: {provider}")
+        log_callback(f"   💰 Cost: ${head_config.get('cost_per_million_input', 0.59)}/M in, ${head_config.get('cost_per_million_output', 0.79)}/M out")
+        log_callback("")
+        log_callback("   📥 INPUT: Analyzing outputs from LLM 1 + LLM 2 + Rules...")
+    # Build HEAD prompt
+    prompt = build_head_prompt(
+        llm1_analysis=state.get("llm1_analysis", {}),
+        llm2_analysis=state.get("llm2_analysis", {}),
+        rule_calculations=state.get("rule_calculations", {}),
+    )
+    try:
+        response, in_tokens, out_tokens = await call_llm(
+            agent_name="HEAD",
+            model=model,
+            provider=provider,
+            prompt=prompt,
+            max_tokens=head_config.get("max_tokens", 2000),
+            temperature=head_config.get("temperature", 0.3),
+            cost_per_m_input=head_config.get("cost_per_million_input", 0.59),
+            cost_per_m_output=head_config.get("cost_per_million_output", 0.79),
+            log_callback=log_callback,
+        )
+        # Parse response
+        recommendations = parse_llm_response(response)
+        recommendations["_meta"] = {
+            "model": model,
+            "provider": provider,
+            "input_tokens": in_tokens,
+            "output_tokens": out_tokens,
+        }
+        # Add cost summary
+        recommendations["cost_summary"] = cost_tracker.to_dict()
+        # Log detailed HEAD findings
+        if log_callback and not recommendations.get("parse_error"):
+            log_callback("")
+            log_callback("   📊 HEAD SYNTHESIS:")
+            log_callback("")
+            # Agreements
+            agreements = recommendations.get("agreements", [])
+            if agreements:
+                log_callback("   ✅ AGREEMENTS (High Confidence):")
+                for a in agreements[:3]:
+                    topic = a.get("topic", "?") if isinstance(a, dict) else str(a)[:30]
+                    finding = a.get("finding", "")[:50] if isinstance(a, dict) else ""
+                    log_callback(f"   ├─ {topic}: {finding}...")
+            # Disagreements
+            disagreements = recommendations.get("disagreements", [])
+            if disagreements:
+                log_callback("")
+                log_callback("   🔄 DISAGREEMENTS (Resolved):")
+                for d in disagreements[:3]:
+                    if isinstance(d, dict):
+                        topic = d.get("topic", "?")
+                        resolution = d.get("resolution", "")[:60]
+                        log_callback(f"   ├─ {topic}: {resolution}...")
+            # Final recommendations
+            final_recs = recommendations.get("final_recommendations", {})
+            if final_recs:
+                log_callback("")
+                log_callback("   📋 FINAL RECOMMENDATIONS:")
+                log_callback(f"   ├─ Type Scale: {final_recs.get('type_scale', '?')}")
+                log_callback(f"   ├─ Spacing: {final_recs.get('spacing_base', '?')}")
+                if final_recs.get("color_improvements"):
+                    log_callback(f"   ├─ Colors: {final_recs['color_improvements'][0][:50]}...")
+                if final_recs.get("accessibility_fixes"):
+                    log_callback(f"   └─ AA Fixes: {final_recs['accessibility_fixes'][0][:50]}...")
+            # Summary
+            if recommendations.get("summary"):
+                log_callback("")
+                log_callback("   📝 SUMMARY:")
+                summary = recommendations["summary"][:150]
+                log_callback(f"   {summary}...")
+            log_callback("")
+            log_callback(f"   🎯 OVERALL CONFIDENCE: {recommendations.get('overall_confidence', '?')}%")
+        if log_callback:
+            log_callback("")
+            log_callback("=" * 60)
+            log_callback(f"💰 TOTAL ESTIMATED COST: ${cost_tracker.total_cost:.4f}")
+            log_callback(f"   (Free tier: $0.10/mo | Pro: $2/mo)")
+            log_callback("=" * 60)
+        return {
+            "final_recommendations": recommendations,
+            "cost_tracking": cost_tracker.to_dict(),
+            "head_time": time.time(),
+        }
+    except Exception as e:
+        if log_callback:
+            log_callback(f"   ❌ HEAD Error: {str(e)}")
+        # Fallback to rule-based recommendations
+        return {
+            "final_recommendations": build_fallback_recommendations(state),
+            "errors": state.get("errors", []) + [f"HEAD: {str(e)}"],
+            "head_time": time.time(),
+        }
+# =============================================================================
+# HELPER FUNCTIONS
+# =============================================================================
+def summarize_tokens(desktop: dict, mobile: dict) -> str:
+    """Create a summary of tokens for the prompt."""
+    lines = []
+    # Colors
+    colors = desktop.get("colors", {})
+    lines.append(f"### Colors ({len(colors)} detected)")
+    for name, c in list(colors.items())[:5]:
+        val = c.get("value") if isinstance(c, dict) else str(c)
+        lines.append(f"- {name}: {val}")
+    # Typography Desktop
+    typo = desktop.get("typography", {})
+    lines.append(f"\n### Typography Desktop ({len(typo)} styles)")
+    for name, t in list(typo.items())[:5]:
+        if isinstance(t, dict):
+            lines.append(f"- {name}: {t.get('font_size', '?')} / {t.get('font_weight', '?')}")
+    # Typography Mobile
+    mobile_typo = mobile.get("typography", {})
+    lines.append(f"\n### Typography Mobile ({len(mobile_typo)} styles)")
+    # Spacing
+    spacing = desktop.get("spacing", {})
+    lines.append(f"\n### Spacing ({len(spacing)} values)")
+    return "\n".join(lines)
+def build_analyst_prompt(tokens_summary: str, competitors: list[str], persona: str) -> str:
+    """Build prompt for analyst LLMs."""
+    return f"""You are a {persona}.
+## YOUR TASK
+Analyze these design tokens extracted from a website and compare against industry best practices.
+## EXTRACTED TOKENS
+{tokens_summary}
+## COMPETITOR DESIGN SYSTEMS TO RESEARCH
+{', '.join(competitors)}
+## ANALYZE THE FOLLOWING:
+### 1. Typography
+- Is the type scale consistent? Does it follow a mathematical ratio?
+- What is the detected base size?
+- Compare to competitors: what ratios do they use?
+- Score (1-10) and specific recommendations
+### 2. Colors
+- Is the color palette cohesive?
+- Are semantic colors properly defined (primary, secondary, etc.)?
+- Score (1-10) and specific recommendations
+### 3. Accessibility (AA Compliance)
+- What contrast issues might exist?
+- Score (1-10)
+### 4. Spacing
+- Is spacing consistent? Does it follow a grid (4px, 8px)?
+- Score (1-10) and specific recommendations
+### 5. Overall Assessment
+- Top 3 priorities for improvement
+## RESPOND IN JSON FORMAT ONLY:
+```json
+{{
+  "typography": {{"analysis": "...", "detected_ratio": 1.2, "score": 7, "recommendations": ["..."]}},
+  "colors": {{"analysis": "...", "score": 6, "recommendations": ["..."]}},
+  "accessibility": {{"issues": ["..."], "score": 5}},
+  "spacing": {{"analysis": "...", "detected_base": 8, "score": 7, "recommendations": ["..."]}},
+  "top_3_priorities": ["...", "...", "..."],
+  "confidence": 85
+}}
+```"""
+def build_head_prompt(llm1_analysis: dict, llm2_analysis: dict, rule_calculations: dict) -> str:
+    """Build prompt for HEAD compiler."""
+    return f"""You are a Principal Design Systems Architect compiling analyses from two expert analysts.
+## ANALYST 1 FINDINGS:
+{json.dumps(llm1_analysis, indent=2, default=str)[:2000]}
+## ANALYST 2 FINDINGS:
+{json.dumps(llm2_analysis, indent=2, default=str)[:2000]}
+## RULE-BASED CALCULATIONS:
+- Base font size: {rule_calculations.get('base_font_size', 16)}px
+- Type scale options: 1.2, 1.25, 1.333
+- Spacing options: 4px grid, 8px grid
+## YOUR TASK:
+1. Compare both analyst perspectives
+2. Identify agreements and disagreements
+3. Synthesize final recommendations
+## RESPOND IN JSON FORMAT ONLY:
+```json
+{{
+  "agreements": [{{"topic": "...", "finding": "..."}}],
+  "disagreements": [{{"topic": "...", "resolution": "..."}}],
+  "final_recommendations": {{
+    "type_scale": "1.25",
+    "type_scale_rationale": "...",
+    "spacing_base": "8px",
+    "spacing_rationale": "...",
+    "color_improvements": ["..."],
+    "accessibility_fixes": ["..."]
+  }},
+  "overall_confidence": 85,
+  "summary": "..."
+}}
+```"""
+def parse_llm_response(response: str) -> dict:
+    """Parse JSON from LLM response."""
+    try:
+        # Try to extract JSON from markdown code block
+        if "```json" in response:
+            start = response.find("```json") + 7
+            end = response.find("```", start)
+            json_str = response[start:end].strip()
+        elif "```" in response:
+            start = response.find("```") + 3
+            end = response.find("```", start)
+            json_str = response[start:end].strip()
+        else:
+            json_str = response.strip()
+        return json.loads(json_str)
+    except:
+        return {"raw_response": response[:500], "parse_error": True}
+def detect_base_font_size(tokens: dict) -> int:
+    """Detect base font size from typography tokens."""
+    typography = tokens.get("typography", {})
+    sizes = []
+    for t in typography.values():
+        if isinstance(t, dict):
+            size_str = str(t.get("font_size", "16px"))
+            try:
+                size = float(size_str.replace("px", "").replace("rem", "").replace("em", ""))
+                if 14 <= size <= 18:
+                    sizes.append(size)
+            except:
+                pass
+    if sizes:
+        return int(max(set(sizes), key=sizes.count))
+    return 16
+def generate_type_scale(base: int, ratio: float) -> list[int]:
+    """Generate type scale from base and ratio."""
+    # 13 levels: display.2xl down to overline
+    scales = []
+    for i in range(8, -5, -1):
+        size = base * (ratio ** i)
+        # Round to even
+        scales.append(int(round(size / 2) * 2))
+    return scales
+def generate_spacing_scale(base: int) -> list[int]:
+    """Generate spacing scale from base."""
+    return [base * i for i in range(0, 17)]
+def build_fallback_recommendations(state: Stage2State) -> dict:
+    """Build fallback recommendations if HEAD fails."""
+    rule_calc = state.get("rule_calculations", {})
+    return {
+        "final_recommendations": {
+            "type_scale": "1.25",
+            "type_scale_rationale": "Major Third (1.25) is industry standard",
+            "spacing_base": "8px",
+            "spacing_rationale": "8px grid provides good visual rhythm",
+            "color_improvements": ["Generate full ramps (50-950)"],
+            "accessibility_fixes": ["Review contrast ratios"],
+        },
+        "overall_confidence": 60,
+        "summary": "Recommendations based on rule-based analysis (LLM unavailable)",
+        "fallback": True,
+    }
+# =============================================================================
+# WORKFLOW BUILDER
+# =============================================================================
+def build_stage2_workflow():
+    """Build the LangGraph workflow for Stage 2."""
+    workflow = StateGraph(Stage2State)
+    # Add nodes
+    workflow.add_node("llm1_analyst", analyze_with_llm1)
+    workflow.add_node("llm2_analyst", analyze_with_llm2)
+    workflow.add_node("rule_engine", run_rule_engine)
+    workflow.add_node("head_compiler", compile_with_head)
+    # Parallel execution from START
+    workflow.add_edge(START, "llm1_analyst")
+    workflow.add_edge(START, "llm2_analyst")
+    workflow.add_edge(START, "rule_engine")
+    # All converge to HEAD
+    workflow.add_edge("llm1_analyst", "head_compiler")
+    workflow.add_edge("llm2_analyst", "head_compiler")
+    workflow.add_edge("rule_engine", "head_compiler")
+    # HEAD to END
+    workflow.add_edge("head_compiler", END)
+    return workflow.compile()
+# =============================================================================
+# MAIN RUNNER
+# =============================================================================
+async def run_stage2_multi_agent(
+    desktop_tokens: dict,
+    mobile_tokens: dict,
+    competitors: list[str],
+    log_callback: Optional[Callable] = None,
+) -> dict:
+    """Run the Stage 2 multi-agent analysis."""
+    global cost_tracker
+    cost_tracker = CostTracker()  # Reset
+    if log_callback:
+        log_callback("")
+        log_callback("=" * 60)
+        log_callback("🧠 STAGE 2: MULTI-AGENT ANALYSIS")
+        log_callback("=" * 60)
+        log_callback("")
+        log_callback("📦 LLM CONFIGURATION:")
+        config = load_agent_config()
+        for agent_key in ["stage2_llm1", "stage2_llm2", "stage2_head"]:
+            agent = config.get(agent_key, {})
+            log_callback(f"┌─────────────────────────────────────────────────────┐")
+            log_callback(f"│ {agent.get('name', agent_key)}")
+            log_callback(f"│   Model: {agent.get('model', 'Unknown')}")
+            log_callback(f"│   Provider: {agent.get('provider', 'novita')}")
+            log_callback(f"│   💰 Cost: ${agent.get('cost_per_million_input', 0.5)}/M in, ${agent.get('cost_per_million_output', 0.5)}/M out")
+            log_callback(f"│   Task: {', '.join(agent.get('tasks', [])[:2])}")
+            log_callback(f"└─────────────────────────────────────────────────────┘")
+        log_callback("")
+        log_callback("🔄 RUNNING PARALLEL ANALYSIS...")
+    # Initial state
+    initial_state = {
+        "desktop_tokens": desktop_tokens,
+        "mobile_tokens": mobile_tokens,
+        "competitors": competitors,
+        "llm1_analysis": None,
+        "llm2_analysis": None,
+        "rule_calculations": None,
+        "final_recommendations": None,
+        "analysis_log": [],
+        "cost_tracking": {},
+        "errors": [],
+        "start_time": time.time(),
+        "llm1_time": 0,
+        "llm2_time": 0,
+        "head_time": 0,
+    }
+    # Run parallel analysis
+    try:
+        # Run LLM1, LLM2, and Rules in parallel
+        results = await asyncio.gather(
+            analyze_with_llm1(initial_state, log_callback),
+            analyze_with_llm2(initial_state, log_callback),
+            asyncio.to_thread(run_rule_engine, initial_state, log_callback),
+            return_exceptions=True,
+        )
+        # Merge results
+        for result in results:
+            if isinstance(result, dict):
+                initial_state.update(result)
+            elif isinstance(result, Exception):
+                initial_state["errors"].append(str(result))
+        # Run HEAD compiler
+        head_result = await compile_with_head(initial_state, log_callback)
+        initial_state.update(head_result)
+        return initial_state
+    except Exception as e:
+        if log_callback:
+            log_callback(f"❌ Workflow error: {str(e)}")
+        initial_state["errors"].append(str(e))
+        initial_state["final_recommendations"] = build_fallback_recommendations(initial_state)
+        return initial_state