Design-System-Extractor-2 / agents /benchmark_researcher.py
riazmo's picture
Upload 2 files
a6c864a verified
"""
Benchmark Research Agent
=========================
Fetches LIVE data from design system documentation sites
using Firecrawl, with 24-hour caching.
This agent:
1. Fetches official documentation from design system sites
2. Extracts typography, spacing, color specifications using LLM
3. Caches results for 24 hours
4. Compares user's tokens to researched benchmarks
"""
import asyncio
import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional, Callable
import hashlib
# =============================================================================
# DESIGN SYSTEM SOURCES (Official Documentation URLs)
# =============================================================================
DESIGN_SYSTEM_SOURCES = {
"material_design_3": {
"name": "Material Design 3",
"short_name": "Material 3",
"vendor": "Google",
"urls": {
"typography": "https://m3.material.io/styles/typography/type-scale-tokens",
"spacing": "https://m3.material.io/foundations/layout/understanding-layout/spacing",
"colors": "https://m3.material.io/styles/color/the-color-system/key-colors-tones",
},
"best_for": ["Android apps", "Web apps", "Enterprise software"],
"icon": "🟒",
},
"apple_hig": {
"name": "Apple Human Interface Guidelines",
"short_name": "Apple HIG",
"vendor": "Apple",
"urls": {
"typography": "https://developer.apple.com/design/human-interface-guidelines/typography",
"spacing": "https://developer.apple.com/design/human-interface-guidelines/layout",
},
"best_for": ["iOS apps", "macOS apps", "Premium consumer products"],
"icon": "🍎",
},
"shopify_polaris": {
"name": "Shopify Polaris",
"short_name": "Polaris",
"vendor": "Shopify",
"urls": {
"typography": "https://polaris.shopify.com/design/typography",
"spacing": "https://polaris.shopify.com/design/spacing",
"colors": "https://polaris.shopify.com/design/colors",
},
"best_for": ["E-commerce", "Admin dashboards", "Merchant tools"],
"icon": "πŸ›’",
},
"atlassian_design": {
"name": "Atlassian Design System",
"short_name": "Atlassian",
"vendor": "Atlassian",
"urls": {
"typography": "https://atlassian.design/foundations/typography",
"spacing": "https://atlassian.design/foundations/spacing",
"colors": "https://atlassian.design/foundations/color",
},
"best_for": ["Productivity tools", "Dense interfaces", "Enterprise B2B"],
"icon": "πŸ”΅",
},
"ibm_carbon": {
"name": "IBM Carbon Design System",
"short_name": "Carbon",
"vendor": "IBM",
"urls": {
"typography": "https://carbondesignsystem.com/guidelines/typography/overview",
"spacing": "https://carbondesignsystem.com/guidelines/spacing/overview",
"colors": "https://carbondesignsystem.com/guidelines/color/overview",
},
"best_for": ["Enterprise software", "Data-heavy applications", "IBM products"],
"icon": "πŸ”·",
},
"tailwind_css": {
"name": "Tailwind CSS",
"short_name": "Tailwind",
"vendor": "Tailwind Labs",
"urls": {
"typography": "https://tailwindcss.com/docs/font-size",
"spacing": "https://tailwindcss.com/docs/customizing-spacing",
"colors": "https://tailwindcss.com/docs/customizing-colors",
},
"best_for": ["Web applications", "Startups", "Rapid prototyping"],
"icon": "🌊",
},
"ant_design": {
"name": "Ant Design",
"short_name": "Ant Design",
"vendor": "Ant Group",
"urls": {
"typography": "https://ant.design/docs/spec/font",
"spacing": "https://ant.design/docs/spec/layout",
"colors": "https://ant.design/docs/spec/colors",
},
"best_for": ["Enterprise B2B", "Admin panels", "Chinese market"],
"icon": "🐜",
},
"chakra_ui": {
"name": "Chakra UI",
"short_name": "Chakra",
"vendor": "Chakra UI",
"urls": {
"typography": "https://chakra-ui.com/docs/styled-system/theme#typography",
"spacing": "https://chakra-ui.com/docs/styled-system/theme#spacing",
"colors": "https://chakra-ui.com/docs/styled-system/theme#colors",
},
"best_for": ["React applications", "Startups", "Accessible products"],
"icon": "⚑",
},
}
# =============================================================================
# DATA CLASSES
# =============================================================================
@dataclass
class BenchmarkData:
"""Researched benchmark data from a design system."""
key: str
name: str
short_name: str
vendor: str
icon: str
# Extracted specifications
typography: dict = field(default_factory=dict)
# Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}
spacing: dict = field(default_factory=dict)
# Expected: {base, scale[], grid}
colors: dict = field(default_factory=dict)
# Expected: {palette_size, uses_ramps, ramp_steps}
# Metadata
fetched_at: str = ""
confidence: str = "low" # high, medium, low
source_urls: list = field(default_factory=list)
best_for: list = field(default_factory=list)
def to_dict(self) -> dict:
return {
"key": self.key,
"name": self.name,
"short_name": self.short_name,
"vendor": self.vendor,
"icon": self.icon,
"typography": self.typography,
"spacing": self.spacing,
"colors": self.colors,
"fetched_at": self.fetched_at,
"confidence": self.confidence,
"best_for": self.best_for,
}
@dataclass
class BenchmarkComparison:
"""Comparison result between user's tokens and a benchmark."""
benchmark: BenchmarkData
similarity_score: float # Lower = more similar
# Individual comparisons
type_ratio_diff: float
base_size_diff: int
spacing_grid_diff: int
# Match percentages
type_match_pct: float
spacing_match_pct: float
overall_match_pct: float
def to_dict(self) -> dict:
return {
"name": self.benchmark.name,
"short_name": self.benchmark.short_name,
"icon": self.benchmark.icon,
"similarity_score": round(self.similarity_score, 2),
"overall_match_pct": round(self.overall_match_pct, 1),
"comparison": {
"type_ratio": {
"diff": round(self.type_ratio_diff, 3),
"match_pct": round(self.type_match_pct, 1),
},
"base_size": {
"diff": self.base_size_diff,
},
"spacing_grid": {
"diff": self.spacing_grid_diff,
"match_pct": round(self.spacing_match_pct, 1),
},
},
"benchmark_values": {
"type_ratio": self.benchmark.typography.get("scale_ratio"),
"base_size": self.benchmark.typography.get("base_size"),
"spacing_grid": self.benchmark.spacing.get("base"),
},
"best_for": self.benchmark.best_for,
"confidence": self.benchmark.confidence,
}
# =============================================================================
# CACHE MANAGER
# =============================================================================
class BenchmarkCache:
"""Manages 24-hour caching of benchmark research results."""
def __init__(self, cache_dir: str = None):
if cache_dir is None:
cache_dir = os.path.join(os.path.dirname(__file__), "..", "storage")
self.cache_file = os.path.join(cache_dir, "benchmark_cache.json")
self._ensure_cache_dir()
def _ensure_cache_dir(self):
"""Ensure cache directory exists."""
os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)
def _load_cache(self) -> dict:
"""Load cache from file."""
if os.path.exists(self.cache_file):
try:
with open(self.cache_file, 'r') as f:
return json.load(f)
except Exception:
return {}
return {}
def _save_cache(self, cache: dict):
"""Save cache to file."""
try:
with open(self.cache_file, 'w') as f:
json.dump(cache, f, indent=2)
except Exception:
pass
def get(self, key: str) -> Optional[BenchmarkData]:
"""Get cached benchmark if valid (< 24 hours old)."""
cache = self._load_cache()
if key not in cache:
return None
entry = cache[key]
fetched_at = datetime.fromisoformat(entry.get("fetched_at", "2000-01-01"))
# Check if expired (24 hours)
if datetime.now() - fetched_at > timedelta(hours=24):
return None
# Reconstruct BenchmarkData
source = DESIGN_SYSTEM_SOURCES.get(key, {})
return BenchmarkData(
key=key,
name=entry.get("name", source.get("name", key)),
short_name=entry.get("short_name", source.get("short_name", key)),
vendor=entry.get("vendor", source.get("vendor", "")),
icon=entry.get("icon", source.get("icon", "πŸ“¦")),
typography=entry.get("typography", {}),
spacing=entry.get("spacing", {}),
colors=entry.get("colors", {}),
fetched_at=entry.get("fetched_at", ""),
confidence=entry.get("confidence", "low"),
source_urls=entry.get("source_urls", []),
best_for=entry.get("best_for", source.get("best_for", [])),
)
def set(self, key: str, data: BenchmarkData):
"""Cache benchmark data."""
cache = self._load_cache()
cache[key] = data.to_dict()
self._save_cache(cache)
def get_cache_status(self) -> dict:
"""Get status of all cached items."""
cache = self._load_cache()
status = {}
for key in DESIGN_SYSTEM_SOURCES.keys():
if key in cache:
fetched_at = datetime.fromisoformat(cache[key].get("fetched_at", "2000-01-01"))
age_hours = (datetime.now() - fetched_at).total_seconds() / 3600
is_valid = age_hours < 24
status[key] = {
"cached": True,
"valid": is_valid,
"age_hours": round(age_hours, 1),
}
else:
status[key] = {"cached": False, "valid": False}
return status
# =============================================================================
# FALLBACK DATA (Used when research fails)
# =============================================================================
FALLBACK_BENCHMARKS = {
"material_design_3": {
"typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
"colors": {"palette_size": 13, "uses_ramps": True},
},
"apple_hig": {
"typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
"colors": {"palette_size": 9, "uses_ramps": True},
},
"shopify_polaris": {
"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
"colors": {"palette_size": 11, "uses_ramps": True},
},
"atlassian_design": {
"typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
"colors": {"palette_size": 15, "uses_ramps": True},
},
"ibm_carbon": {
"typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
"spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
"colors": {"palette_size": 12, "uses_ramps": True},
},
"tailwind_css": {
"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
"spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
"colors": {"palette_size": 22, "uses_ramps": True},
},
"ant_design": {
"typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
"colors": {"palette_size": 13, "uses_ramps": True},
},
"chakra_ui": {
"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
"colors": {"palette_size": 15, "uses_ramps": True},
},
}
# =============================================================================
# BENCHMARK RESEARCHER
# =============================================================================
class BenchmarkResearcher:
"""
Research agent that fetches live design system specifications.
Uses Firecrawl to fetch documentation and LLM to extract specs.
Results are cached for 24 hours.
"""
def __init__(self, firecrawl_client=None, hf_client=None):
"""
Initialize researcher.
Args:
firecrawl_client: Firecrawl API client for fetching docs
hf_client: HuggingFace client for LLM extraction
"""
self.firecrawl = firecrawl_client
self.hf_client = hf_client
self.cache = BenchmarkCache()
async def research_benchmark(
self,
system_key: str,
log_callback: Callable = None,
force_refresh: bool = False,
) -> BenchmarkData:
"""
Research a specific design system.
Args:
system_key: Key from DESIGN_SYSTEM_SOURCES
log_callback: Function to log progress
force_refresh: Bypass cache and fetch fresh
Returns:
BenchmarkData with extracted specifications
"""
def log(msg: str):
if log_callback:
log_callback(msg)
if system_key not in DESIGN_SYSTEM_SOURCES:
raise ValueError(f"Unknown design system: {system_key}")
source = DESIGN_SYSTEM_SOURCES[system_key]
# Check cache first (unless force refresh)
if not force_refresh:
cached = self.cache.get(system_key)
if cached:
log(f" β”œβ”€ {source['icon']} {source['short_name']}: Using cached data βœ…")
return cached
log(f" β”œβ”€ {source['icon']} {source['short_name']}: Fetching documentation...")
# Try to fetch and extract
raw_content = ""
confidence = "low"
if self.firecrawl:
try:
# Fetch typography docs
typo_url = source["urls"].get("typography")
if typo_url:
log(f" β”‚ β”œβ”€ Fetching {typo_url[:50]}...")
typo_content = await self._fetch_url(typo_url)
if typo_content:
raw_content += f"\n\n=== TYPOGRAPHY ===\n{typo_content[:4000]}"
confidence = "medium"
# Fetch spacing docs
spacing_url = source["urls"].get("spacing")
if spacing_url:
log(f" β”‚ β”œβ”€ Fetching spacing docs...")
spacing_content = await self._fetch_url(spacing_url)
if spacing_content:
raw_content += f"\n\n=== SPACING ===\n{spacing_content[:3000]}"
if confidence == "medium":
confidence = "high"
except Exception as e:
log(f" β”‚ β”œβ”€ ⚠️ Fetch error: {str(e)[:50]}")
# Extract specs with LLM (or use fallback)
if raw_content and self.hf_client:
log(f" β”‚ β”œβ”€ Extracting specifications...")
extracted = await self._extract_specs_with_llm(source["name"], raw_content)
else:
log(f" β”‚ β”œβ”€ Using fallback data (fetch unavailable)")
extracted = FALLBACK_BENCHMARKS.get(system_key, {})
confidence = "fallback"
# Build result
result = BenchmarkData(
key=system_key,
name=source["name"],
short_name=source["short_name"],
vendor=source["vendor"],
icon=source["icon"],
typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
fetched_at=datetime.now().isoformat(),
confidence=confidence,
source_urls=list(source["urls"].values()),
best_for=source["best_for"],
)
# Cache result
self.cache.set(system_key, result)
ratio = result.typography.get("scale_ratio", "?")
base = result.typography.get("base_size", "?")
grid = result.spacing.get("base", "?")
log(f" β”‚ └─ βœ… ratio={ratio}, base={base}px, grid={grid}px [{confidence}]")
return result
async def _fetch_url(self, url: str) -> Optional[str]:
"""Fetch URL content using Firecrawl."""
if not self.firecrawl:
return None
try:
# Firecrawl scrape
result = self.firecrawl.scrape_url(
url,
params={"formats": ["markdown"]}
)
if result and result.get("markdown"):
return result["markdown"]
elif result and result.get("content"):
return result["content"]
except Exception as e:
pass
return None
async def _extract_specs_with_llm(self, system_name: str, raw_content: str) -> dict:
"""Extract structured specs from documentation using LLM."""
if not self.hf_client:
return {}
prompt = f"""Extract the design system specifications from this documentation.
DESIGN SYSTEM: {system_name}
DOCUMENTATION:
{raw_content[:6000]}
Return ONLY a JSON object with these exact fields (use null if not found):
{{
"typography": {{
"scale_ratio": <number like 1.2 or 1.25>,
"base_size": <number in px>,
"font_family": "<font name>",
"sizes": [<list of sizes in px>],
"line_height_body": <number like 1.5>
}},
"spacing": {{
"base": <base unit in px like 4 or 8>,
"scale": [<spacing values>],
"grid": "<description>"
}},
"colors": {{
"palette_size": <number>,
"uses_ramps": <true/false>
}}
}}
Return ONLY valid JSON, no explanation."""
try:
response = await self.hf_client.complete_async(
agent_name="benchmark_extractor",
system_prompt="You are a design system specification extractor. Extract only the factual specifications.",
user_message=prompt,
max_tokens=600,
json_mode=True,
)
# Parse JSON from response
import re
json_match = re.search(r'\{[\s\S]*\}', response)
if json_match:
return json.loads(json_match.group())
except Exception as e:
pass
return {}
async def research_selected_benchmarks(
self,
selected_keys: list[str],
log_callback: Callable = None,
) -> list[BenchmarkData]:
"""
Research multiple selected design systems.
Args:
selected_keys: List of system keys to research
log_callback: Function to log progress
Returns:
List of BenchmarkData
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log("")
log("═" * 60)
log("πŸ”¬ LAYER 2: BENCHMARK RESEARCH (Firecrawl + Cache)")
log("═" * 60)
log("")
log(f" Selected systems: {', '.join(selected_keys)}")
log("")
results = []
for key in selected_keys:
if key in DESIGN_SYSTEM_SOURCES:
try:
result = await self.research_benchmark(key, log_callback)
results.append(result)
except Exception as e:
log(f" β”œβ”€ ⚠️ Error researching {key}: {e}")
# Use fallback
source = DESIGN_SYSTEM_SOURCES[key]
fallback = FALLBACK_BENCHMARKS.get(key, {})
results.append(BenchmarkData(
key=key,
name=source["name"],
short_name=source["short_name"],
vendor=source["vendor"],
icon=source["icon"],
typography=fallback.get("typography", {}),
spacing=fallback.get("spacing", {}),
colors=fallback.get("colors", {}),
fetched_at=datetime.now().isoformat(),
confidence="fallback",
best_for=source["best_for"],
))
log("")
log(f" βœ… Researched {len(results)}/{len(selected_keys)} design systems")
return results
def compare_to_benchmarks(
self,
your_ratio: float,
your_base_size: int,
your_spacing_grid: int,
benchmarks: list[BenchmarkData],
log_callback: Callable = None,
) -> list[BenchmarkComparison]:
"""
Compare user's tokens to researched benchmarks.
Args:
your_ratio: Detected type scale ratio
your_base_size: Detected base font size
your_spacing_grid: Detected spacing grid base
benchmarks: List of researched BenchmarkData
log_callback: Function to log progress
Returns:
List of BenchmarkComparison sorted by similarity
"""
def log(msg: str):
if log_callback:
log_callback(msg)
log("")
log(" πŸ“Š BENCHMARK COMPARISON")
log(" " + "─" * 40)
log(f" Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px")
log("")
comparisons = []
for b in benchmarks:
b_ratio = b.typography.get("scale_ratio", 1.25)
b_base = b.typography.get("base_size", 16)
b_grid = b.spacing.get("base", 8)
# Calculate differences
ratio_diff = abs(your_ratio - b_ratio)
base_diff = abs(your_base_size - b_base)
grid_diff = abs(your_spacing_grid - b_grid)
# Calculate match percentages
type_match = max(0, 100 - (ratio_diff * 100)) # 0.1 diff = 90% match
spacing_match = max(0, 100 - (grid_diff * 10)) # 4px diff = 60% match
# Weighted similarity score (lower = more similar)
similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3)
# Overall match percentage
overall_match = (type_match * 0.5) + (spacing_match * 0.3) + (100 - base_diff * 5) * 0.2
overall_match = max(0, min(100, overall_match))
comparisons.append(BenchmarkComparison(
benchmark=b,
similarity_score=similarity,
type_ratio_diff=ratio_diff,
base_size_diff=base_diff,
spacing_grid_diff=grid_diff,
type_match_pct=type_match,
spacing_match_pct=spacing_match,
overall_match_pct=overall_match,
))
# Sort by similarity (lower = better)
comparisons.sort(key=lambda x: x.similarity_score)
# Log results
medals = ["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"]
for i, c in enumerate(comparisons[:5]):
medal = medals[i] if i < 3 else " "
b = c.benchmark
log(f" {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% match (score: {c.similarity_score:.2f})")
log(f" └─ ratio={b.typography.get('scale_ratio')}, base={b.typography.get('base_size')}px, grid={b.spacing.get('base')}px")
return comparisons
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def get_available_benchmarks() -> list[dict]:
"""Get list of available design systems for UI dropdown."""
return [
{
"key": key,
"name": source["name"],
"short_name": source["short_name"],
"icon": source["icon"],
"vendor": source["vendor"],
"best_for": source["best_for"],
}
for key, source in DESIGN_SYSTEM_SOURCES.items()
]
def get_benchmark_choices() -> list[tuple[str, str]]:
"""Get choices for Gradio dropdown."""
return [
(f"{source['icon']} {source['short_name']} ({source['vendor']})", key)
for key, source in DESIGN_SYSTEM_SOURCES.items()
]