Spaces:

riazmo
/

design-system-extractor

Sleeping

App Files Files Community

riazmo commited on Jan 24

Commit

c4557ab

verified ·

1 Parent(s): b565ae1

Upload agents.yaml

Browse files

Files changed (1) hide show

config/agents.yaml +547 -0

config/agents.yaml ADDED Viewed

	@@ -0,0 +1,547 @@

+# Agent Personas & Configuration
+# Design System Extractor v2
+# =============================================================================
+# MODEL CONFIGURATION
+# =============================================================================
+# Model assignments for each agent based on task complexity
+# =============================================================================
+models:
+  # Agent 1: Crawler & Extractor — NO LLM NEEDED (Rule-based)
+  agent_crawler:
+    requires_llm: false
+    notes: "Pure rule-based extraction using Playwright + CSS parsing"
+  # Agent 2: Normalizer — LIGHT MODEL
+  agent_normalizer:
+    requires_llm: true
+    primary_model: "Qwen/Qwen2.5-7B-Instruct"
+    fallback_model: "microsoft/Phi-3-mini-4k-instruct"
+    tasks:
+      - "Suggest semantic token names from values"
+      - "Explain duplicate detection rationale"
+      - "Infer naming conventions from class names"
+    temperature: 0.2  # Low temp for consistent naming
+    max_tokens: 1024
+    why_this_model: |
+      7B model is sufficient because:
+      - Tasks are mostly pattern matching and naming
+      - Doesn't require complex reasoning
+      - Fast inference, good for iterative workflows
+  # Agent 3: Advisor — STRONG MODEL (Key agent)
+  agent_advisor:
+    requires_llm: true
+    primary_model: "Qwen/Qwen2.5-72B-Instruct"
+    fallback_model: "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    budget_model: "Qwen/Qwen2.5-7B-Instruct"  # If quota constrained
+    tasks:
+      - "Analyze extracted system for patterns and anti-patterns"
+      - "Research and recommend type scales with rationale"
+      - "Suggest spacing systems based on detected patterns"
+      - "Identify accessibility issues and fixes"
+      - "Compare against industry design systems"
+    temperature: 0.4  # Moderate temp for creative recommendations
+    max_tokens: 4096
+    why_this_model: |
+      72B model is critical because:
+      - Needs deep understanding of design systems
+      - Must reason about trade-offs (pros/cons)
+      - Should know Material, Polaris, Carbon, etc.
+      - Quality of recommendations is user-facing
+  # Agent 4: Generator — LIGHT MODEL
+  agent_generator:
+    requires_llm: true
+    primary_model: "Qwen/Qwen2.5-7B-Instruct"
+    fallback_model: "mistralai/Mistral-7B-Instruct-v0.3"
+    tasks:
+      - "Format tokens into Tokens Studio JSON"
+      - "Generate CSS custom properties"
+      - "Structure metadata correctly"
+    temperature: 0.1  # Very low temp for consistent formatting
+    max_tokens: 8192  # Larger for full JSON output
+    why_this_model: |
+      7B model is sufficient because:
+      - Structured output generation is formulaic
+      - JSON schema is well-defined
+      - Speed matters for export flow
+# =============================================================================
+# FREE TIER vs PRO RECOMMENDATIONS
+# =============================================================================
+tier_recommendations:
+  free_tier:
+    description: "For users without HF Pro subscription"
+    agent2: "Qwen/Qwen2.5-7B-Instruct"
+    agent3: "mistralai/Mixtral-8x7B-Instruct-v0.1"  # Best free option for reasoning
+    agent4: "Qwen/Qwen2.5-7B-Instruct"
+    notes: "Quality will be slightly lower for Agent 3 recommendations"
+  pro_tier:
+    description: "For users with HF Pro subscription"
+    agent2: "Qwen/Qwen2.5-7B-Instruct"
+    agent3: "Qwen/Qwen2.5-72B-Instruct"  # Full quality
+    agent4: "Qwen/Qwen2.5-7B-Instruct"
+    notes: "Best quality, especially for design system recommendations"
+# =============================================================================
+# AGENT 1: Website Crawler & Extractor
+# =============================================================================
+agent_crawler:
+  name: "Design Archaeologist"
+  persona: |
+    You are a meticulous Design Archaeologist. Your job is to carefully excavate
+    design decisions buried in website code. You approach each site with curiosity
+    and systematic precision, documenting everything you find without judgment.
+    You understand that live websites are often messy — accumulated decisions from
+    multiple designers over years. Your role is to faithfully extract what exists,
+    not to fix or improve it yet.
+  responsibilities:
+    - Auto-discover pages from base URL (minimum 10)
+    - Include key templates: homepage, listing, detail, form, marketing, auth
+    - Scroll pages fully (above + below fold)
+    - Extract separately for Desktop (1440px) and Mobile (375px)
+  extraction_targets:
+    colors:
+      - hex values
+      - rgb/rgba values
+      - usage frequency
+      - context (background, text, border, etc.)
+    typography:
+      - font families
+      - font sizes (px, rem, em)
+      - line heights
+      - font weights
+      - letter spacing
+    spacing:
+      - margin values
+      - padding values
+      - gap values
+      - infer base system (4px, 8px)
+    other:
+      - border radius
+      - box shadows
+      - layout signals (containers, grids)
+  tools:
+    - playwright (crawling, scrolling, computed styles)
+    - css parsing (CSSOM)
+  output_format:
+    - raw extracted tokens
+    - confidence score per token
+    - desktop/mobile separated
+    - errors and warnings logged
+  guardrails:
+    - Never modify or "fix" extracted values
+    - Always preserve original CSS values
+    - Log anomalies, don't hide them
+    - Respect robots.txt (configurable)
+# =============================================================================
+# AGENT 2: Token Normalizer & Structurer
+# =============================================================================
+agent_normalizer:
+  name: "Design System Librarian"
+  persona: |
+    You are a methodical Design System Librarian. Your expertise is in organizing
+    chaotic information into structured, meaningful categories. You see patterns
+    where others see noise.
+    You are careful to distinguish between what you KNOW (detected) and what you
+    THINK (inferred). You never overwrite the truth — you annotate it.
+  responsibilities:
+    - Clean noisy extraction data
+    - Group and merge duplicates (with threshold tolerance)
+    - Infer naming patterns from class names
+    - Propose initial token names
+    - Tag confidence levels
+  naming_conventions:
+    colors:
+      pattern: "color.{role}.{shade}"
+      example: "color.primary.500"
+      roles:
+        - primary
+        - secondary
+        - neutral
+        - success
+        - warning
+        - error
+      shades:
+        - 50
+        - 100
+        - 200
+        - 300
+        - 400
+        - 500
+        - 600
+        - 700
+        - 800
+        - 900
+    typography:
+      pattern: "font.{category}.{size}"
+      example: "font.heading.lg"
+      categories:
+        - heading
+        - body
+        - label
+        - caption
+      sizes:
+        - xs
+        - sm
+        - md
+        - lg
+        - xl
+        - 2xl
+    spacing:
+      pattern: "space.{size}"
+      example: "space.4, space.8"
+      note: "based on pixel value / 4"
+  tagging:
+    detected: "Directly found in CSS, high confidence"
+    inferred: "Derived from patterns, medium confidence"
+    low_confidence: "Appears rarely or inconsistently"
+  duplicate_threshold:
+    colors: 3  # hex values within 3 steps are potential duplicates
+    spacing: 2  # pixel values within 2px are potential duplicates
+  guardrails:
+    - Never overwrite extracted truth
+    - Always mark inferred vs detected
+    - Preserve original values alongside normalized
+    - Flag conflicts, don't resolve them silently
+# =============================================================================
+# AGENT 3: Design System Best Practices Advisor
+# =============================================================================
+agent_advisor:
+  name: "Senior Staff Design Systems Architect"
+  persona: |
+    You are a Senior Staff Design Systems Architect with 15+ years of experience
+    building and scaling design systems at major tech companies. You've seen what
+    works and what doesn't.
+    Your role is to ADVISE, not DECIDE. You present options with clear rationale,
+    letting humans make the final call. You respect existing decisions while
+    offering paths to improvement.
+  responsibilities:
+    - Analyze extracted system for patterns and anti-patterns
+    - Research modern design system best practices
+    - Propose upgrade OPTIONS (never auto-apply)
+    - Ensure accessibility compliance (AA minimum)
+  research_sources:
+    - Material Design (Google)
+    - Polaris (Shopify)
+    - Carbon (IBM)
+    - Fluent (Microsoft)
+    - Primer (GitHub)
+    - Radix
+    - Tailwind CSS
+  upgrade_categories:
+    typography_scales:
+      - name: "Minor Third"
+        ratio: 1.2
+        description: "Subtle progression, good for dense UIs"
+      - name: "Major Third"
+        ratio: 1.25
+        description: "Balanced, most popular choice"
+      - name: "Perfect Fourth"
+        ratio: 1.333
+        description: "Strong hierarchy, good for marketing"
+      - name: "Golden Ratio"
+        ratio: 1.618
+        description: "Dramatic, use sparingly"
+    spacing_systems:
+      - name: "4px base"
+        scale: [4, 8, 12, 16, 20, 24, 32, 40, 48, 64]
+        description: "Fine-grained control"
+      - name: "8px base"
+        scale: [4, 8, 16, 24, 32, 48, 64, 80, 96]
+        description: "Industry standard, recommended"
+      - name: "Tailwind"
+        scale: [4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64, 80, 96]
+        description: "Utility-first, comprehensive"
+    naming_conventions:
+      - name: "T-shirt sizes"
+        example: "xs, sm, md, lg, xl, 2xl"
+        pros: "Intuitive, easy to remember"
+        cons: "Limited granularity"
+      - name: "Numeric"
+        example: "100, 200, 300, 400, 500"
+        pros: "Extensible, precise"
+        cons: "Less intuitive"
+      - name: "Semantic"
+        example: "caption, body, subhead, title, display"
+        pros: "Meaningful, self-documenting"
+        cons: "Harder to extend"
+    color_ramps:
+      shades: [50, 100, 200, 300, 400, 500, 600, 700, 800, 900]
+      aa_contrast_minimum: 4.5
+      aaa_contrast_minimum: 7.0
+  output_format:
+    - option sets (never single recommendations)
+    - rationale for each option
+    - pros and cons
+    - accessibility impact
+    - migration effort estimate
+  guardrails:
+    - Never auto-apply changes
+    - Always provide multiple options
+    - Respect existing system, suggest improvements
+    - Flag accessibility issues prominently
+# =============================================================================
+# AGENT 4: Plugin & JSON Generator
+# =============================================================================
+agent_generator:
+  name: "Automation Engineer"
+  persona: |
+    You are a precise Automation Engineer. Your job is to transform design
+    decisions into machine-readable formats that tools can consume. You care
+    deeply about compatibility, versioning, and clean output.
+    You understand that your output will be used by Figma plugins, CSS
+    preprocessors, and design tools — so format matters.
+  responsibilities:
+    - Convert finalized tokens to standard formats
+    - Generate color ramps with AA compliance
+    - Maintain viewport separation (Desktop/Mobile)
+    - Version all outputs
+  output_formats:
+    tokens_studio:
+      description: "Compatible with Tokens Studio Figma plugin"
+      extension: ".json"
+    figma_variables:
+      description: "Direct Figma Variables format"
+      extension: ".json"
+    css_variables:
+      description: "CSS custom properties"
+      extension: ".css"
+    tailwind_config:
+      description: "Tailwind CSS configuration"
+      extension: ".js"
+  token_structure:
+    colors:
+      include_ramps: true
+      ramp_shades: [50, 100, 200, 300, 400, 500, 600, 700, 800, 900]
+      include_contrast: true
+    typography:
+      include_composite: true  # font-family + size + weight + line-height
+      include_individual: true
+    spacing:
+      base: 8
+      include_negative: false
+  metadata_fields:
+    - source_url
+    - extracted_at
+    - version
+    - viewport
+    - token_source (detected/inferred/upgraded)
+  guardrails:
+    - Always include metadata
+    - Validate JSON before output
+    - Preserve source attribution on each token
+    - Warn on potential conflicts
+# =============================================================================
+# LANGGRAPH WORKFLOW CONFIGURATION
+# =============================================================================
+workflow:
+  name: "design_system_extraction"
+  checkpoints:
+    - id: "confirm_pages"
+      description: "Human confirms discovered pages before crawling"
+      required: true
+    - id: "review_extraction"
+      description: "Human reviews extracted tokens (Stage 1 UI)"
+      required: true
+    - id: "select_upgrades"
+      description: "Human selects upgrade options (Stage 2 UI)"
+      required: true
+    - id: "approve_export"
+      description: "Human approves final output (Stage 3 UI)"
+      required: true
+  parallel_nodes:
+    - name: "viewport_extraction"
+      description: "Extract Desktop and Mobile in parallel"
+      agents: ["agent_crawler"]
+    - name: "research_and_advise"
+      description: "Agent 3 can research while human reviews Stage 1"
+      agents: ["agent_advisor"]
+  error_handling:
+    retry_attempts: 3
+    retry_delay_seconds: 5
+    log_errors: true
+    show_in_ui: true
+# =============================================================================
+# UI CONFIGURATION
+# =============================================================================
+ui:
+  layout: "single_scroll"
+  stages:
+    stage1:
+      name: "Extraction Review"
+      purpose: "Trust building — see what was found"
+      components:
+        - token_tables
+        - color_swatches
+        - typography_samples
+        - viewport_toggle
+        - confidence_indicators
+        - accept_reject_controls
+    stage2:
+      name: "Upgrade Playground"
+      purpose: "Decision making through live visuals"
+      components:
+        - option_selector
+        - live_preview_iframe
+        - side_by_side_comparison
+        - existing_vs_upgraded_toggle
+    stage3:
+      name: "Final Review & Export"
+      purpose: "Confidence before export"
+      components:
+        - token_preview_readonly
+        - json_tree_view
+        - diff_view
+        - viewport_tabs
+        - download_buttons
+        - version_labeling
+  preview:
+    type: "iframe"
+    template: "specimen.html"
+    update_trigger: "on_selection"
+# =============================================================================
+# EXTRACTION SETTINGS
+# =============================================================================
+extraction:
+  viewports:
+    desktop:
+      width: 1440
+      height: 900
+      name: "Desktop"
+    mobile:
+      width: 375
+      height: 812
+      name: "Mobile"
+  crawling:
+    max_pages: 20
+    min_pages: 10
+    scroll_behavior: "smooth"
+    wait_for_network_idle: true
+    network_idle_timeout_ms: 5000
+    skip_infinite_scroll: true
+    respect_robots_txt: true
+  page_templates:
+    required:
+      - homepage
+      - listing
+      - detail
+    optional:
+      - form
+      - marketing
+      - auth
+      - checkout
+      - about
+      - contact
+# =============================================================================
+# COLOR PROCESSING
+# =============================================================================
+color_processing:
+  ramp_generation:
+    enabled: true
+    shades: [50, 100, 200, 300, 400, 500, 600, 700, 800, 900]
+    method: "oklch"  # or "hsl" or "lab"
+  accessibility:
+    check_contrast: true
+    minimum_standard: "AA"  # or "AAA"
+    contrast_pairs:
+      - ["text", "background"]
+      - ["button-text", "button-background"]
+  duplicate_detection:
+    enabled: true
+    threshold_delta_e: 3  # CIE Delta E threshold
+# =============================================================================
+# TYPOGRAPHY PROCESSING
+# =============================================================================
+typography_processing:
+  scale_options:
+    - name: "Minor Third"
+      ratio: 1.2
+    - name: "Major Third"
+      ratio: 1.25
+    - name: "Perfect Fourth"
+      ratio: 1.333
+  base_size: 16  # px
+  text_styles:
+    - display
+    - heading-xl
+    - heading-lg
+    - heading-md
+    - heading-sm
+    - body-lg
+    - body-md
+    - body-sm
+    - caption
+    - label
+# =============================================================================
+# SPACING PROCESSING
+# =============================================================================
+spacing_processing:
+  base: 8  # px
+  scale: [0, 4, 8, 12, 16, 24, 32, 48, 64, 80, 96]
+  names:
+    0: "none"
+    4: "xs"
+    8: "sm"
+    12: "sm-md"
+    16: "md"
+    24: "lg"
+    32: "xl"
+    48: "2xl"
+    64: "3xl"
+    80: "4xl"
+    96: "5xl"