Spaces:

riazmo
/

Design-System-Extractor-2

Sleeping

App Files Files Community

riazmo commited on 13 days ago

Commit

45326f9

verified ·

1 Parent(s): a19099e

Delete stage2_graph.py

Browse files

Files changed (1) hide show

stage2_graph.py +0 -990

stage2_graph.py DELETED Viewed

@@ -1,990 +0,0 @@
-"""
-Stage 2 Multi-Agent Analysis Workflow (LangGraph)
-Architecture:
-    ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
-    │   LLM 1     │     │   LLM 2     │     │ Rule Engine │
-    │  (Qwen)     │     │  (Llama)    │     │  (No LLM)   │
-    └──────┬──────┘     └──────┬──────┘     └──────┬──────┘
-           │                   │                   │
-           │     PARALLEL      │                   │
-           └───────────────────┼───────────────────┘
-                               │
-                               ▼
-                    ┌─────────────────┐
-                    │      HEAD       │
-                    │   (Compiler)    │
-                    └─────────────────┘
-"""
-import asyncio
-import json
-import os
-import time
-import yaml
-from dataclasses import dataclass, field
-from datetime import datetime
-from typing import Any, Callable, Optional
-from langgraph.graph import END, START, StateGraph
-from typing_extensions import TypedDict
-# =============================================================================
-# CONFIGURATION LOADING
-# =============================================================================
-def load_agent_config() -> dict:
-    """Load agent configuration from YAML."""
-    config_path = os.path.join(os.path.dirname(__file__), "..", "config", "agents.yaml")
-    if os.path.exists(config_path):
-        with open(config_path, 'r') as f:
-            return yaml.safe_load(f)
-    return {}
-# =============================================================================
-# STATE DEFINITION
-# =============================================================================
-class Stage2State(TypedDict):
-    """State for Stage 2 multi-agent analysis."""
-    # Inputs
-    desktop_tokens: dict
-    mobile_tokens: dict
-    competitors: list[str]
-    # Parallel analysis outputs
-    llm1_analysis: Optional[dict]
-    llm2_analysis: Optional[dict]
-    rule_calculations: Optional[dict]
-    # HEAD output
-    final_recommendations: Optional[dict]
-    # Metadata
-    analysis_log: list[str]
-    cost_tracking: dict
-    errors: list[str]
-    # Timing
-    start_time: float
-    llm1_time: float
-    llm2_time: float
-    head_time: float
-# =============================================================================
-# COST TRACKING
-# =============================================================================
-@dataclass
-class CostTracker:
-    """Track LLM costs during analysis."""
-    total_input_tokens: int = 0
-    total_output_tokens: int = 0
-    total_cost: float = 0.0
-    calls: list = field(default_factory=list)
-    def add_call(self, agent_name: str, model: str, input_tokens: int, output_tokens: int,
-                 cost_per_m_input: float, cost_per_m_output: float, duration: float):
-        """Record an LLM call."""
-        input_cost = (input_tokens / 1_000_000) * cost_per_m_input
-        output_cost = (output_tokens / 1_000_000) * cost_per_m_output
-        total_cost = input_cost + output_cost
-        self.total_input_tokens += input_tokens
-        self.total_output_tokens += output_tokens
-        self.total_cost += total_cost
-        self.calls.append({
-            "agent": agent_name,
-            "model": model,
-            "input_tokens": input_tokens,
-            "output_tokens": output_tokens,
-            "cost": total_cost,
-            "duration": duration,
-        })
-    def to_dict(self) -> dict:
-        return {
-            "total_input_tokens": self.total_input_tokens,
-            "total_output_tokens": self.total_output_tokens,
-            "total_cost": round(self.total_cost, 6),
-            "calls": self.calls,
-        }
-# Global cost tracker
-cost_tracker = CostTracker()
-# =============================================================================
-# LLM CLIENT
-# =============================================================================
-async def call_llm(
-    agent_name: str,
-    model: str,
-    provider: str,
-    prompt: str,
-    max_tokens: int = 1500,
-    temperature: float = 0.4,
-    cost_per_m_input: float = 0.5,
-    cost_per_m_output: float = 0.5,
-    log_callback: Optional[Callable] = None,
-) -> tuple[str, int, int]:
-    """Call LLM via HuggingFace Inference Providers."""
-    start_time = time.time()
-    if log_callback:
-        log_callback(f"   🚀 {agent_name}: Calling {model} via {provider}...")
-    try:
-        from huggingface_hub import InferenceClient
-        hf_token = os.environ.get("HF_TOKEN")
-        if not hf_token:
-            raise ValueError("HF_TOKEN not set")
-        # Initialize client with provider
-        # Provider is set at client level, not per-call
-        client = InferenceClient(
-            token=hf_token,
-            provider=provider,
-        )
-        # Call without provider argument (it's set at client level)
-        response = client.chat_completion(
-            model=model,
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=max_tokens,
-            temperature=temperature,
-        )
-        # Extract response
-        content = response.choices[0].message.content
-        # Estimate tokens (rough)
-        input_tokens = len(prompt.split()) * 1.3  # Rough estimate
-        output_tokens = len(content.split()) * 1.3
-        duration = time.time() - start_time
-        # Track cost
-        cost_tracker.add_call(
-            agent_name=agent_name,
-            model=model,
-            input_tokens=int(input_tokens),
-            output_tokens=int(output_tokens),
-            cost_per_m_input=cost_per_m_input,
-            cost_per_m_output=cost_per_m_output,
-            duration=duration,
-        )
-        if log_callback:
-            est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
-                       (output_tokens / 1_000_000) * cost_per_m_output)
-            log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
-            log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
-        return content, int(input_tokens), int(output_tokens)
-    except TypeError as e:
-        # Fallback: If provider argument not supported, try model:provider format
-        if "provider" in str(e):
-            if log_callback:
-                log_callback(f"   ⚠️ {agent_name}: Trying model:provider format...")
-            from huggingface_hub import InferenceClient
-            hf_token = os.environ.get("HF_TOKEN")
-            client = InferenceClient(token=hf_token)
-            # Try appending provider to model name
-            model_with_provider = f"{model}:{provider}"
-            try:
-                response = client.chat_completion(
-                    model=model_with_provider,
-                    messages=[{"role": "user", "content": prompt}],
-                    max_tokens=max_tokens,
-                    temperature=temperature,
-                )
-                content = response.choices[0].message.content
-                input_tokens = len(prompt.split()) * 1.3
-                output_tokens = len(content.split()) * 1.3
-                duration = time.time() - start_time
-                cost_tracker.add_call(
-                    agent_name=agent_name,
-                    model=model,
-                    input_tokens=int(input_tokens),
-                    output_tokens=int(output_tokens),
-                    cost_per_m_input=cost_per_m_input,
-                    cost_per_m_output=cost_per_m_output,
-                    duration=duration,
-                )
-                if log_callback:
-                    est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
-                               (output_tokens / 1_000_000) * cost_per_m_output)
-                    log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
-                    log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
-                return content, int(input_tokens), int(output_tokens)
-            except Exception as e2:
-                # Final fallback: Try without provider
-                if log_callback:
-                    log_callback(f"   ⚠️ {agent_name}: Trying without provider...")
-                response = client.chat_completion(
-                    model=model,
-                    messages=[{"role": "user", "content": prompt}],
-                    max_tokens=max_tokens,
-                    temperature=temperature,
-                )
-                content = response.choices[0].message.content
-                input_tokens = len(prompt.split()) * 1.3
-                output_tokens = len(content.split()) * 1.3
-                duration = time.time() - start_time
-                cost_tracker.add_call(
-                    agent_name=agent_name,
-                    model=model,
-                    input_tokens=int(input_tokens),
-                    output_tokens=int(output_tokens),
-                    cost_per_m_input=cost_per_m_input,
-                    cost_per_m_output=cost_per_m_output,
-                    duration=duration,
-                )
-                if log_callback:
-                    est_cost = ((input_tokens / 1_000_000) * cost_per_m_input +
-                               (output_tokens / 1_000_000) * cost_per_m_output)
-                    log_callback(f"   ✅ {agent_name}: Complete ({duration:.1f}s, ~{int(input_tokens)} in, ~{int(output_tokens)} out)")
-                    log_callback(f"      💵 Est. cost: ${est_cost:.4f}")
-                return content, int(input_tokens), int(output_tokens)
-        else:
-            raise
-    except Exception as e:
-        duration = time.time() - start_time
-        if log_callback:
-            log_callback(f"   ❌ {agent_name}: Error after {duration:.1f}s - {str(e)}")
-        raise
-# =============================================================================
-# ANALYSIS NODES
-# =============================================================================
-async def analyze_with_llm1(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
-    """LLM 1 (Qwen) analysis node with detailed reasoning logs."""
-    config = load_agent_config()
-    llm1_config = config.get("stage2_llm1", {})
-    model = llm1_config.get("model", "Qwen/Qwen2.5-72B-Instruct")
-    provider = llm1_config.get("provider", "novita")
-    if log_callback:
-        log_callback("")
-        log_callback("=" * 55)
-        log_callback(f"🤖 LLM 1: {model}")
-        log_callback("=" * 55)
-        log_callback(f"   Provider: {provider}")
-        log_callback(f"   💰 Cost: ${llm1_config.get('cost_per_million_input', 0.29)}/M in, ${llm1_config.get('cost_per_million_output', 0.59)}/M out")
-        log_callback(f"   📝 Task: Typography, Colors, AA, Spacing analysis")
-        log_callback("")
-    # Build prompt
-    prompt = build_analyst_prompt(
-        tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
-        competitors=state["competitors"],
-        persona=llm1_config.get("persona", "Senior Design Systems Architect"),
-    )
-    try:
-        response, in_tokens, out_tokens = await call_llm(
-            agent_name="LLM 1 (Qwen)",
-            model=model,
-            provider=provider,
-            prompt=prompt,
-            max_tokens=llm1_config.get("max_tokens", 1500),
-            temperature=llm1_config.get("temperature", 0.4),
-            cost_per_m_input=llm1_config.get("cost_per_million_input", 0.29),
-            cost_per_m_output=llm1_config.get("cost_per_million_output", 0.59),
-            log_callback=log_callback,
-        )
-        # Parse JSON response
-        analysis = parse_llm_response(response)
-        analysis["_meta"] = {
-            "model": model,
-            "provider": provider,
-            "input_tokens": in_tokens,
-            "output_tokens": out_tokens,
-        }
-        # Log detailed findings
-        if log_callback and not analysis.get("parse_error"):
-            log_callback("")
-            log_callback("   📊 LLM 1 FINDINGS:")
-            log_callback("")
-            # Typography
-            typo = analysis.get("typography", {})
-            if isinstance(typo, dict):
-                log_callback("   TYPOGRAPHY:")
-                log_callback(f"   ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
-                log_callback(f"   ├─ Score: {typo.get('score', '?')}/10")
-                if typo.get("recommendations"):
-                    for rec in typo.get("recommendations", [])[:2]:
-                        log_callback(f"   └─ 💡 {rec[:60]}...")
-            # Colors
-            colors = analysis.get("colors", {})
-            if isinstance(colors, dict):
-                log_callback("")
-                log_callback("   COLORS:")
-                log_callback(f"   ├─ Score: {colors.get('score', '?')}/10")
-                if colors.get("recommendations"):
-                    for rec in colors.get("recommendations", [])[:2]:
-                        log_callback(f"   └─ 💡 {rec[:60]}...")
-            # Accessibility
-            aa = analysis.get("accessibility", {})
-            if isinstance(aa, dict):
-                log_callback("")
-                log_callback("   ACCESSIBILITY:")
-                log_callback(f"   ├─ Score: {aa.get('score', '?')}/10")
-                issues = aa.get("issues", [])
-                if issues:
-                    for issue in issues[:2]:
-                        log_callback(f"   └─ ⚠️ {issue[:60]}...")
-            # Top priorities
-            priorities = analysis.get("top_3_priorities", [])
-            if priorities:
-                log_callback("")
-                log_callback("   TOP PRIORITIES:")
-                for i, p in enumerate(priorities[:3], 1):
-                    log_callback(f"   {i}. {p[:70]}")
-            log_callback("")
-            log_callback(f"   🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
-        return {"llm1_analysis": analysis, "llm1_time": time.time()}
-    except Exception as e:
-        return {
-            "llm1_analysis": {"error": str(e)},
-            "errors": state.get("errors", []) + [f"LLM1: {str(e)}"],
-            "llm1_time": time.time(),
-        }
-async def analyze_with_llm2(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
-    """LLM 2 (Llama) analysis node with detailed reasoning logs."""
-    config = load_agent_config()
-    llm2_config = config.get("stage2_llm2", {})
-    model = llm2_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
-    provider = llm2_config.get("provider", "novita")
-    if log_callback:
-        log_callback("")
-        log_callback("=" * 55)
-        log_callback(f"🤖 LLM 2: {model}")
-        log_callback("=" * 55)
-        log_callback(f"   Provider: {provider}")
-        log_callback(f"   💰 Cost: ${llm2_config.get('cost_per_million_input', 0.59)}/M in, ${llm2_config.get('cost_per_million_output', 0.79)}/M out")
-        log_callback(f"   📝 Task: Typography, Colors, AA, Spacing analysis")
-        log_callback("")
-    # Build prompt
-    prompt = build_analyst_prompt(
-        tokens_summary=summarize_tokens(state["desktop_tokens"], state["mobile_tokens"]),
-        competitors=state["competitors"],
-        persona=llm2_config.get("persona", "Senior Design Systems Architect"),
-    )
-    try:
-        response, in_tokens, out_tokens = await call_llm(
-            agent_name="LLM 2 (Llama)",
-            model=model,
-            provider=provider,
-            prompt=prompt,
-            max_tokens=llm2_config.get("max_tokens", 1500),
-            temperature=llm2_config.get("temperature", 0.4),
-            cost_per_m_input=llm2_config.get("cost_per_million_input", 0.59),
-            cost_per_m_output=llm2_config.get("cost_per_million_output", 0.79),
-            log_callback=log_callback,
-        )
-        # Parse JSON response
-        analysis = parse_llm_response(response)
-        analysis["_meta"] = {
-            "model": model,
-            "provider": provider,
-            "input_tokens": in_tokens,
-            "output_tokens": out_tokens,
-        }
-        # Log detailed findings
-        if log_callback and not analysis.get("parse_error"):
-            log_callback("")
-            log_callback("   📊 LLM 2 FINDINGS:")
-            log_callback("")
-            # Typography
-            typo = analysis.get("typography", {})
-            if isinstance(typo, dict):
-                log_callback("   TYPOGRAPHY:")
-                log_callback(f"   ├─ Detected ratio: {typo.get('detected_ratio', '?')}")
-                log_callback(f"   ├─ Score: {typo.get('score', '?')}/10")
-                if typo.get("recommendations"):
-                    for rec in typo.get("recommendations", [])[:2]:
-                        log_callback(f"   └─ 💡 {rec[:60]}...")
-            # Colors
-            colors = analysis.get("colors", {})
-            if isinstance(colors, dict):
-                log_callback("")
-                log_callback("   COLORS:")
-                log_callback(f"   ├─ Score: {colors.get('score', '?')}/10")
-                if colors.get("recommendations"):
-                    for rec in colors.get("recommendations", [])[:2]:
-                        log_callback(f"   └─ 💡 {rec[:60]}...")
-            # Accessibility
-            aa = analysis.get("accessibility", {})
-            if isinstance(aa, dict):
-                log_callback("")
-                log_callback("   ACCESSIBILITY:")
-                log_callback(f"   ├─ Score: {aa.get('score', '?')}/10")
-                issues = aa.get("issues", [])
-                if issues:
-                    for issue in issues[:2]:
-                        log_callback(f"   └─ ⚠️ {issue[:60]}...")
-            # Top priorities
-            priorities = analysis.get("top_3_priorities", [])
-            if priorities:
-                log_callback("")
-                log_callback("   TOP PRIORITIES:")
-                for i, p in enumerate(priorities[:3], 1):
-                    log_callback(f"   {i}. {p[:70]}")
-            log_callback("")
-            log_callback(f"   🎯 CONFIDENCE: {analysis.get('confidence', '?')}%")
-        return {"llm2_analysis": analysis, "llm2_time": time.time()}
-    except Exception as e:
-        return {
-            "llm2_analysis": {"error": str(e)},
-            "errors": state.get("errors", []) + [f"LLM2: {str(e)}"],
-            "llm2_time": time.time(),
-        }
-def run_rule_engine(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
-    """Rule engine node (no LLM, always runs)."""
-    if log_callback:
-        log_callback("")
-        log_callback("⚙️ Rule Engine: Running calculations...")
-        log_callback("   💰 Cost: FREE (no LLM)")
-    start = time.time()
-    # Calculate type scale options
-    base_size = detect_base_font_size(state["desktop_tokens"])
-    type_scales = {
-        "1.2": generate_type_scale(base_size, 1.2),
-        "1.25": generate_type_scale(base_size, 1.25),
-        "1.333": generate_type_scale(base_size, 1.333),
-    }
-    # Calculate spacing options
-    spacing_options = {
-        "4px": generate_spacing_scale(4),
-        "8px": generate_spacing_scale(8),
-    }
-    # Generate color ramps for each base color
-    from core.color_utils import generate_color_ramp
-    color_ramps = {}
-    colors = state["desktop_tokens"].get("colors", {})
-    for name, color in list(colors.items())[:8]:
-        hex_val = color.get("value") if isinstance(color, dict) else str(color)
-        try:
-            color_ramps[name] = generate_color_ramp(hex_val)
-        except:
-            pass
-    duration = time.time() - start
-    if log_callback:
-        log_callback(f"   ✅ Rule Engine: Complete ({duration:.2f}s)")
-        log_callback(f"      Generated: {len(type_scales)} type scales, {len(spacing_options)} spacing grids, {len(color_ramps)} color ramps")
-    return {
-        "rule_calculations": {
-            "base_font_size": base_size,
-            "type_scales": type_scales,
-            "spacing_options": spacing_options,
-            "color_ramps": color_ramps,
-        }
-    }
-async def compile_with_head(state: Stage2State, log_callback: Optional[Callable] = None) -> dict:
-    """HEAD compiler node with detailed synthesis logging."""
-    config = load_agent_config()
-    head_config = config.get("stage2_head", {})
-    model = head_config.get("model", "meta-llama/Llama-3.3-70B-Instruct")
-    provider = head_config.get("provider", "novita")
-    if log_callback:
-        log_callback("")
-        log_callback("=" * 60)
-        log_callback("🧠 HEAD COMPILER: Synthesizing results...")
-        log_callback("=" * 60)
-        log_callback(f"   Model: {model}")
-        log_callback(f"   Provider: {provider}")
-        log_callback(f"   💰 Cost: ${head_config.get('cost_per_million_input', 0.59)}/M in, ${head_config.get('cost_per_million_output', 0.79)}/M out")
-        log_callback("")
-        log_callback("   📥 INPUT: Analyzing outputs from LLM 1 + LLM 2 + Rules...")
-    # Build HEAD prompt
-    prompt = build_head_prompt(
-        llm1_analysis=state.get("llm1_analysis", {}),
-        llm2_analysis=state.get("llm2_analysis", {}),
-        rule_calculations=state.get("rule_calculations", {}),
-    )
-    try:
-        response, in_tokens, out_tokens = await call_llm(
-            agent_name="HEAD",
-            model=model,
-            provider=provider,
-            prompt=prompt,
-            max_tokens=head_config.get("max_tokens", 2000),
-            temperature=head_config.get("temperature", 0.3),
-            cost_per_m_input=head_config.get("cost_per_million_input", 0.59),
-            cost_per_m_output=head_config.get("cost_per_million_output", 0.79),
-            log_callback=log_callback,
-        )
-        # Parse response
-        recommendations = parse_llm_response(response)
-        recommendations["_meta"] = {
-            "model": model,
-            "provider": provider,
-            "input_tokens": in_tokens,
-            "output_tokens": out_tokens,
-        }
-        # Add cost summary
-        recommendations["cost_summary"] = cost_tracker.to_dict()
-        # Log detailed HEAD findings
-        if log_callback and not recommendations.get("parse_error"):
-            log_callback("")
-            log_callback("   📊 HEAD SYNTHESIS:")
-            log_callback("")
-            # Agreements
-            agreements = recommendations.get("agreements", [])
-            if agreements:
-                log_callback("   ✅ AGREEMENTS (High Confidence):")
-                for a in agreements[:3]:
-                    topic = a.get("topic", "?") if isinstance(a, dict) else str(a)[:30]
-                    finding = a.get("finding", "")[:50] if isinstance(a, dict) else ""
-                    log_callback(f"   ├─ {topic}: {finding}...")
-            # Disagreements
-            disagreements = recommendations.get("disagreements", [])
-            if disagreements:
-                log_callback("")
-                log_callback("   🔄 DISAGREEMENTS (Resolved):")
-                for d in disagreements[:3]:
-                    if isinstance(d, dict):
-                        topic = d.get("topic", "?")
-                        resolution = d.get("resolution", "")[:60]
-                        log_callback(f"   ├─ {topic}: {resolution}...")
-            # Final recommendations
-            final_recs = recommendations.get("final_recommendations", {})
-            if final_recs:
-                log_callback("")
-                log_callback("   📋 FINAL RECOMMENDATIONS:")
-                log_callback(f"   ├─ Type Scale: {final_recs.get('type_scale', '?')}")
-                log_callback(f"   ├─ Spacing: {final_recs.get('spacing_base', '?')}")
-                if final_recs.get("color_improvements"):
-                    log_callback(f"   ├─ Colors: {final_recs['color_improvements'][0][:50]}...")
-                if final_recs.get("accessibility_fixes"):
-                    log_callback(f"   └─ AA Fixes: {final_recs['accessibility_fixes'][0][:50]}...")
-            # Summary
-            if recommendations.get("summary"):
-                log_callback("")
-                log_callback("   📝 SUMMARY:")
-                summary = recommendations["summary"][:150]
-                log_callback(f"   {summary}...")
-            log_callback("")
-            log_callback(f"   🎯 OVERALL CONFIDENCE: {recommendations.get('overall_confidence', '?')}%")
-        if log_callback:
-            log_callback("")
-            log_callback("=" * 60)
-            log_callback(f"💰 TOTAL ESTIMATED COST: ${cost_tracker.total_cost:.4f}")
-            log_callback(f"   (Free tier: $0.10/mo | Pro: $2/mo)")
-            log_callback("=" * 60)
-        return {
-            "final_recommendations": recommendations,
-            "cost_tracking": cost_tracker.to_dict(),
-            "head_time": time.time(),
-        }
-    except Exception as e:
-        if log_callback:
-            log_callback(f"   ❌ HEAD Error: {str(e)}")
-        # Fallback to rule-based recommendations
-        return {
-            "final_recommendations": build_fallback_recommendations(state),
-            "errors": state.get("errors", []) + [f"HEAD: {str(e)}"],
-            "head_time": time.time(),
-        }
-# =============================================================================
-# HELPER FUNCTIONS
-# =============================================================================
-def summarize_tokens(desktop: dict, mobile: dict) -> str:
-    """Create a summary of tokens for the prompt."""
-    lines = []
-    # Colors
-    colors = desktop.get("colors", {})
-    lines.append(f"### Colors ({len(colors)} detected)")
-    for name, c in list(colors.items())[:5]:
-        val = c.get("value") if isinstance(c, dict) else str(c)
-        lines.append(f"- {name}: {val}")
-    # Typography Desktop
-    typo = desktop.get("typography", {})
-    lines.append(f"\n### Typography Desktop ({len(typo)} styles)")
-    for name, t in list(typo.items())[:5]:
-        if isinstance(t, dict):
-            lines.append(f"- {name}: {t.get('font_size', '?')} / {t.get('font_weight', '?')}")
-    # Typography Mobile
-    mobile_typo = mobile.get("typography", {})
-    lines.append(f"\n### Typography Mobile ({len(mobile_typo)} styles)")
-    # Spacing
-    spacing = desktop.get("spacing", {})
-    lines.append(f"\n### Spacing ({len(spacing)} values)")
-    return "\n".join(lines)
-def build_analyst_prompt(tokens_summary: str, competitors: list[str], persona: str) -> str:
-    """Build prompt for analyst LLMs."""
-    return f"""You are a {persona}.
-## YOUR TASK
-Analyze these design tokens extracted from a website and compare against industry best practices.
-## EXTRACTED TOKENS
-{tokens_summary}
-## COMPETITOR DESIGN SYSTEMS TO RESEARCH
-{', '.join(competitors)}
-## ANALYZE THE FOLLOWING:
-### 1. Typography
-- Is the type scale consistent? Does it follow a mathematical ratio?
-- What is the detected base size?
-- Compare to competitors: what ratios do they use?
-- Score (1-10) and specific recommendations
-### 2. Colors
-- Is the color palette cohesive?
-- Are semantic colors properly defined (primary, secondary, etc.)?
-- Score (1-10) and specific recommendations
-### 3. Accessibility (AA Compliance)
-- What contrast issues might exist?
-- Score (1-10)
-### 4. Spacing
-- Is spacing consistent? Does it follow a grid (4px, 8px)?
-- Score (1-10) and specific recommendations
-### 5. Overall Assessment
-- Top 3 priorities for improvement
-## RESPOND IN JSON FORMAT ONLY:
-```json
-{{
-  "typography": {{"analysis": "...", "detected_ratio": 1.2, "score": 7, "recommendations": ["..."]}},
-  "colors": {{"analysis": "...", "score": 6, "recommendations": ["..."]}},
-  "accessibility": {{"issues": ["..."], "score": 5}},
-  "spacing": {{"analysis": "...", "detected_base": 8, "score": 7, "recommendations": ["..."]}},
-  "top_3_priorities": ["...", "...", "..."],
-  "confidence": 85
-}}
-```"""
-def build_head_prompt(llm1_analysis: dict, llm2_analysis: dict, rule_calculations: dict) -> str:
-    """Build prompt for HEAD compiler."""
-    return f"""You are a Principal Design Systems Architect compiling analyses from two expert analysts.
-## ANALYST 1 FINDINGS:
-{json.dumps(llm1_analysis, indent=2, default=str)[:2000]}
-## ANALYST 2 FINDINGS:
-{json.dumps(llm2_analysis, indent=2, default=str)[:2000]}
-## RULE-BASED CALCULATIONS:
-- Base font size: {rule_calculations.get('base_font_size', 16)}px
-- Type scale options: 1.2, 1.25, 1.333
-- Spacing options: 4px grid, 8px grid
-## YOUR TASK:
-1. Compare both analyst perspectives
-2. Identify agreements and disagreements
-3. Synthesize final recommendations
-## RESPOND IN JSON FORMAT ONLY:
-```json
-{{
-  "agreements": [{{"topic": "...", "finding": "..."}}],
-  "disagreements": [{{"topic": "...", "resolution": "..."}}],
-  "final_recommendations": {{
-    "type_scale": "1.25",
-    "type_scale_rationale": "...",
-    "spacing_base": "8px",
-    "spacing_rationale": "...",
-    "color_improvements": ["..."],
-    "accessibility_fixes": ["..."]
-  }},
-  "overall_confidence": 85,
-  "summary": "..."
-}}
-```"""
-def parse_llm_response(response: str) -> dict:
-    """Parse JSON from LLM response."""
-    try:
-        # Try to extract JSON from markdown code block
-        if "```json" in response:
-            start = response.find("```json") + 7
-            end = response.find("```", start)
-            json_str = response[start:end].strip()
-        elif "```" in response:
-            start = response.find("```") + 3
-            end = response.find("```", start)
-            json_str = response[start:end].strip()
-        else:
-            json_str = response.strip()
-        return json.loads(json_str)
-    except:
-        return {"raw_response": response[:500], "parse_error": True}
-def detect_base_font_size(tokens: dict) -> int:
-    """Detect base font size from typography tokens."""
-    typography = tokens.get("typography", {})
-    sizes = []
-    for t in typography.values():
-        if isinstance(t, dict):
-            size_str = str(t.get("font_size", "16px"))
-            try:
-                size = float(size_str.replace("px", "").replace("rem", "").replace("em", ""))
-                if 14 <= size <= 18:
-                    sizes.append(size)
-            except:
-                pass
-    if sizes:
-        return int(max(set(sizes), key=sizes.count))
-    return 16
-def generate_type_scale(base: int, ratio: float) -> list[int]:
-    """Generate type scale from base and ratio."""
-    # 13 levels: display.2xl down to overline
-    scales = []
-    for i in range(8, -5, -1):
-        size = base * (ratio ** i)
-        # Round to even
-        scales.append(int(round(size / 2) * 2))
-    return scales
-def generate_spacing_scale(base: int) -> list[int]:
-    """Generate spacing scale from base."""
-    return [base * i for i in range(0, 17)]
-def build_fallback_recommendations(state: Stage2State) -> dict:
-    """Build fallback recommendations if HEAD fails."""
-    rule_calc = state.get("rule_calculations", {})
-    return {
-        "final_recommendations": {
-            "type_scale": "1.25",
-            "type_scale_rationale": "Major Third (1.25) is industry standard",
-            "spacing_base": "8px",
-            "spacing_rationale": "8px grid provides good visual rhythm",
-            "color_improvements": ["Generate full ramps (50-950)"],
-            "accessibility_fixes": ["Review contrast ratios"],
-        },
-        "overall_confidence": 60,
-        "summary": "Recommendations based on rule-based analysis (LLM unavailable)",
-        "fallback": True,
-    }
-# =============================================================================
-# WORKFLOW BUILDER
-# =============================================================================
-def build_stage2_workflow():
-    """Build the LangGraph workflow for Stage 2."""
-    workflow = StateGraph(Stage2State)
-    # Add nodes
-    workflow.add_node("llm1_analyst", analyze_with_llm1)
-    workflow.add_node("llm2_analyst", analyze_with_llm2)
-    workflow.add_node("rule_engine", run_rule_engine)
-    workflow.add_node("head_compiler", compile_with_head)
-    # Parallel execution from START
-    workflow.add_edge(START, "llm1_analyst")
-    workflow.add_edge(START, "llm2_analyst")
-    workflow.add_edge(START, "rule_engine")
-    # All converge to HEAD
-    workflow.add_edge("llm1_analyst", "head_compiler")
-    workflow.add_edge("llm2_analyst", "head_compiler")
-    workflow.add_edge("rule_engine", "head_compiler")
-    # HEAD to END
-    workflow.add_edge("head_compiler", END)
-    return workflow.compile()
-# =============================================================================
-# MAIN RUNNER
-# =============================================================================
-async def run_stage2_multi_agent(
-    desktop_tokens: dict,
-    mobile_tokens: dict,
-    competitors: list[str],
-    log_callback: Optional[Callable] = None,
-) -> dict:
-    """Run the Stage 2 multi-agent analysis."""
-    global cost_tracker
-    cost_tracker = CostTracker()  # Reset
-    if log_callback:
-        log_callback("")
-        log_callback("=" * 60)
-        log_callback("🧠 STAGE 2: MULTI-AGENT ANALYSIS")
-        log_callback("=" * 60)
-        log_callback("")
-        log_callback("📦 LLM CONFIGURATION:")
-        config = load_agent_config()
-        for agent_key in ["stage2_llm1", "stage2_llm2", "stage2_head"]:
-            agent = config.get(agent_key, {})
-            log_callback(f"┌─────────────────────────────────────────────────────┐")
-            log_callback(f"│ {agent.get('name', agent_key)}")
-            log_callback(f"│   Model: {agent.get('model', 'Unknown')}")
-            log_callback(f"│   Provider: {agent.get('provider', 'novita')}")
-            log_callback(f"│   💰 Cost: ${agent.get('cost_per_million_input', 0.5)}/M in, ${agent.get('cost_per_million_output', 0.5)}/M out")
-            log_callback(f"│   Task: {', '.join(agent.get('tasks', [])[:2])}")
-            log_callback(f"└─────────────────────────────────────────────────────┘")
-        log_callback("")
-        log_callback("🔄 RUNNING PARALLEL ANALYSIS...")
-    # Initial state
-    initial_state = {
-        "desktop_tokens": desktop_tokens,
-        "mobile_tokens": mobile_tokens,
-        "competitors": competitors,
-        "llm1_analysis": None,
-        "llm2_analysis": None,
-        "rule_calculations": None,
-        "final_recommendations": None,
-        "analysis_log": [],
-        "cost_tracking": {},
-        "errors": [],
-        "start_time": time.time(),
-        "llm1_time": 0,
-        "llm2_time": 0,
-        "head_time": 0,
-    }
-    # Run parallel analysis
-    try:
-        # Run LLM1, LLM2, and Rules in parallel
-        results = await asyncio.gather(
-            analyze_with_llm1(initial_state, log_callback),
-            analyze_with_llm2(initial_state, log_callback),
-            asyncio.to_thread(run_rule_engine, initial_state, log_callback),
-            return_exceptions=True,
-        )
-        # Merge results
-        for result in results:
-            if isinstance(result, dict):
-                initial_state.update(result)
-            elif isinstance(result, Exception):
-                initial_state["errors"].append(str(result))
-        # Run HEAD compiler
-        head_result = await compile_with_head(initial_state, log_callback)
-        initial_state.update(head_result)
-        return initial_state
-    except Exception as e:
-        if log_callback:
-            log_callback(f"❌ Workflow error: {str(e)}")
-        initial_state["errors"].append(str(e))
-        initial_state["final_recommendations"] = build_fallback_recommendations(initial_state)
-        return initial_state