| """ |
| Benchmark Research Agent |
| ========================= |
| Fetches LIVE data from design system documentation sites |
| using Firecrawl, with 24-hour caching. |
| |
| This agent: |
| 1. Fetches official documentation from design system sites |
| 2. Extracts typography, spacing, color specifications using LLM |
| 3. Caches results for 24 hours |
| 4. Compares user's tokens to researched benchmarks |
| """ |
|
|
| import asyncio |
| import json |
| import os |
| from dataclasses import dataclass, field |
| from datetime import datetime, timedelta |
| from typing import Optional, Callable |
| import hashlib |
|
|
|
|
| |
| |
| |
|
|
| DESIGN_SYSTEM_SOURCES = { |
| "material_design_3": { |
| "name": "Material Design 3", |
| "short_name": "Material 3", |
| "vendor": "Google", |
| "urls": { |
| "typography": "https://m3.material.io/styles/typography/type-scale-tokens", |
| "spacing": "https://m3.material.io/foundations/layout/understanding-layout/spacing", |
| "colors": "https://m3.material.io/styles/color/the-color-system/key-colors-tones", |
| }, |
| "best_for": ["Android apps", "Web apps", "Enterprise software"], |
| "icon": "π’", |
| }, |
| "apple_hig": { |
| "name": "Apple Human Interface Guidelines", |
| "short_name": "Apple HIG", |
| "vendor": "Apple", |
| "urls": { |
| "typography": "https://developer.apple.com/design/human-interface-guidelines/typography", |
| "spacing": "https://developer.apple.com/design/human-interface-guidelines/layout", |
| }, |
| "best_for": ["iOS apps", "macOS apps", "Premium consumer products"], |
| "icon": "π", |
| }, |
| "shopify_polaris": { |
| "name": "Shopify Polaris", |
| "short_name": "Polaris", |
| "vendor": "Shopify", |
| "urls": { |
| "typography": "https://polaris.shopify.com/design/typography", |
| "spacing": "https://polaris.shopify.com/design/spacing", |
| "colors": "https://polaris.shopify.com/design/colors", |
| }, |
| "best_for": ["E-commerce", "Admin dashboards", "Merchant tools"], |
| "icon": "π", |
| }, |
| "atlassian_design": { |
| "name": "Atlassian Design System", |
| "short_name": "Atlassian", |
| "vendor": "Atlassian", |
| "urls": { |
| "typography": "https://atlassian.design/foundations/typography", |
| "spacing": "https://atlassian.design/foundations/spacing", |
| "colors": "https://atlassian.design/foundations/color", |
| }, |
| "best_for": ["Productivity tools", "Dense interfaces", "Enterprise B2B"], |
| "icon": "π΅", |
| }, |
| "ibm_carbon": { |
| "name": "IBM Carbon Design System", |
| "short_name": "Carbon", |
| "vendor": "IBM", |
| "urls": { |
| "typography": "https://carbondesignsystem.com/guidelines/typography/overview", |
| "spacing": "https://carbondesignsystem.com/guidelines/spacing/overview", |
| "colors": "https://carbondesignsystem.com/guidelines/color/overview", |
| }, |
| "best_for": ["Enterprise software", "Data-heavy applications", "IBM products"], |
| "icon": "π·", |
| }, |
| "tailwind_css": { |
| "name": "Tailwind CSS", |
| "short_name": "Tailwind", |
| "vendor": "Tailwind Labs", |
| "urls": { |
| "typography": "https://tailwindcss.com/docs/font-size", |
| "spacing": "https://tailwindcss.com/docs/customizing-spacing", |
| "colors": "https://tailwindcss.com/docs/customizing-colors", |
| }, |
| "best_for": ["Web applications", "Startups", "Rapid prototyping"], |
| "icon": "π", |
| }, |
| "ant_design": { |
| "name": "Ant Design", |
| "short_name": "Ant Design", |
| "vendor": "Ant Group", |
| "urls": { |
| "typography": "https://ant.design/docs/spec/font", |
| "spacing": "https://ant.design/docs/spec/layout", |
| "colors": "https://ant.design/docs/spec/colors", |
| }, |
| "best_for": ["Enterprise B2B", "Admin panels", "Chinese market"], |
| "icon": "π", |
| }, |
| "chakra_ui": { |
| "name": "Chakra UI", |
| "short_name": "Chakra", |
| "vendor": "Chakra UI", |
| "urls": { |
| "typography": "https://chakra-ui.com/docs/styled-system/theme#typography", |
| "spacing": "https://chakra-ui.com/docs/styled-system/theme#spacing", |
| "colors": "https://chakra-ui.com/docs/styled-system/theme#colors", |
| }, |
| "best_for": ["React applications", "Startups", "Accessible products"], |
| "icon": "β‘", |
| }, |
| } |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class BenchmarkData: |
| """Researched benchmark data from a design system.""" |
| key: str |
| name: str |
| short_name: str |
| vendor: str |
| icon: str |
| |
| |
| typography: dict = field(default_factory=dict) |
| |
| |
| spacing: dict = field(default_factory=dict) |
| |
| |
| colors: dict = field(default_factory=dict) |
| |
| |
| |
| fetched_at: str = "" |
| confidence: str = "low" |
| source_urls: list = field(default_factory=list) |
| best_for: list = field(default_factory=list) |
| |
| def to_dict(self) -> dict: |
| return { |
| "key": self.key, |
| "name": self.name, |
| "short_name": self.short_name, |
| "vendor": self.vendor, |
| "icon": self.icon, |
| "typography": self.typography, |
| "spacing": self.spacing, |
| "colors": self.colors, |
| "fetched_at": self.fetched_at, |
| "confidence": self.confidence, |
| "best_for": self.best_for, |
| } |
|
|
|
|
| @dataclass |
| class BenchmarkComparison: |
| """Comparison result between user's tokens and a benchmark.""" |
| benchmark: BenchmarkData |
| similarity_score: float |
| |
| |
| type_ratio_diff: float |
| base_size_diff: int |
| spacing_grid_diff: int |
| |
| |
| type_match_pct: float |
| spacing_match_pct: float |
| overall_match_pct: float |
| |
| def to_dict(self) -> dict: |
| return { |
| "name": self.benchmark.name, |
| "short_name": self.benchmark.short_name, |
| "icon": self.benchmark.icon, |
| "similarity_score": round(self.similarity_score, 2), |
| "overall_match_pct": round(self.overall_match_pct, 1), |
| "comparison": { |
| "type_ratio": { |
| "diff": round(self.type_ratio_diff, 3), |
| "match_pct": round(self.type_match_pct, 1), |
| }, |
| "base_size": { |
| "diff": self.base_size_diff, |
| }, |
| "spacing_grid": { |
| "diff": self.spacing_grid_diff, |
| "match_pct": round(self.spacing_match_pct, 1), |
| }, |
| }, |
| "benchmark_values": { |
| "type_ratio": self.benchmark.typography.get("scale_ratio"), |
| "base_size": self.benchmark.typography.get("base_size"), |
| "spacing_grid": self.benchmark.spacing.get("base"), |
| }, |
| "best_for": self.benchmark.best_for, |
| "confidence": self.benchmark.confidence, |
| } |
|
|
|
|
| |
| |
| |
|
|
| class BenchmarkCache: |
| """Manages 24-hour caching of benchmark research results.""" |
| |
| def __init__(self, cache_dir: str = None): |
| if cache_dir is None: |
| cache_dir = os.path.join(os.path.dirname(__file__), "..", "storage") |
| self.cache_file = os.path.join(cache_dir, "benchmark_cache.json") |
| self._ensure_cache_dir() |
| |
| def _ensure_cache_dir(self): |
| """Ensure cache directory exists.""" |
| os.makedirs(os.path.dirname(self.cache_file), exist_ok=True) |
| |
| def _load_cache(self) -> dict: |
| """Load cache from file.""" |
| if os.path.exists(self.cache_file): |
| try: |
| with open(self.cache_file, 'r') as f: |
| return json.load(f) |
| except Exception: |
| return {} |
| return {} |
| |
| def _save_cache(self, cache: dict): |
| """Save cache to file.""" |
| try: |
| with open(self.cache_file, 'w') as f: |
| json.dump(cache, f, indent=2) |
| except Exception: |
| pass |
| |
| def get(self, key: str) -> Optional[BenchmarkData]: |
| """Get cached benchmark if valid (< 24 hours old).""" |
| cache = self._load_cache() |
| |
| if key not in cache: |
| return None |
| |
| entry = cache[key] |
| fetched_at = datetime.fromisoformat(entry.get("fetched_at", "2000-01-01")) |
| |
| |
| if datetime.now() - fetched_at > timedelta(hours=24): |
| return None |
| |
| |
| source = DESIGN_SYSTEM_SOURCES.get(key, {}) |
| return BenchmarkData( |
| key=key, |
| name=entry.get("name", source.get("name", key)), |
| short_name=entry.get("short_name", source.get("short_name", key)), |
| vendor=entry.get("vendor", source.get("vendor", "")), |
| icon=entry.get("icon", source.get("icon", "π¦")), |
| typography=entry.get("typography", {}), |
| spacing=entry.get("spacing", {}), |
| colors=entry.get("colors", {}), |
| fetched_at=entry.get("fetched_at", ""), |
| confidence=entry.get("confidence", "low"), |
| source_urls=entry.get("source_urls", []), |
| best_for=entry.get("best_for", source.get("best_for", [])), |
| ) |
| |
| def set(self, key: str, data: BenchmarkData): |
| """Cache benchmark data.""" |
| cache = self._load_cache() |
| cache[key] = data.to_dict() |
| self._save_cache(cache) |
| |
| def get_cache_status(self) -> dict: |
| """Get status of all cached items.""" |
| cache = self._load_cache() |
| status = {} |
| |
| for key in DESIGN_SYSTEM_SOURCES.keys(): |
| if key in cache: |
| fetched_at = datetime.fromisoformat(cache[key].get("fetched_at", "2000-01-01")) |
| age_hours = (datetime.now() - fetched_at).total_seconds() / 3600 |
| is_valid = age_hours < 24 |
| status[key] = { |
| "cached": True, |
| "valid": is_valid, |
| "age_hours": round(age_hours, 1), |
| } |
| else: |
| status[key] = {"cached": False, "valid": False} |
| |
| return status |
|
|
|
|
| |
| |
| |
|
|
| FALLBACK_BENCHMARKS = { |
| "material_design_3": { |
| "typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5}, |
| "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"}, |
| "colors": {"palette_size": 13, "uses_ramps": True}, |
| }, |
| "apple_hig": { |
| "typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47}, |
| "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"}, |
| "colors": {"palette_size": 9, "uses_ramps": True}, |
| }, |
| "shopify_polaris": { |
| "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5}, |
| "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"}, |
| "colors": {"palette_size": 11, "uses_ramps": True}, |
| }, |
| "atlassian_design": { |
| "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43}, |
| "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"}, |
| "colors": {"palette_size": 15, "uses_ramps": True}, |
| }, |
| "ibm_carbon": { |
| "typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5}, |
| "spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"}, |
| "colors": {"palette_size": 12, "uses_ramps": True}, |
| }, |
| "tailwind_css": { |
| "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5}, |
| "spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"}, |
| "colors": {"palette_size": 22, "uses_ramps": True}, |
| }, |
| "ant_design": { |
| "typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57}, |
| "spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"}, |
| "colors": {"palette_size": 13, "uses_ramps": True}, |
| }, |
| "chakra_ui": { |
| "typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5}, |
| "spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"}, |
| "colors": {"palette_size": 15, "uses_ramps": True}, |
| }, |
| } |
|
|
|
|
| |
| |
| |
|
|
| class BenchmarkResearcher: |
| """ |
| Research agent that fetches live design system specifications. |
| |
| Uses Firecrawl to fetch documentation and LLM to extract specs. |
| Results are cached for 24 hours. |
| """ |
| |
| def __init__(self, firecrawl_client=None, hf_client=None): |
| """ |
| Initialize researcher. |
| |
| Args: |
| firecrawl_client: Firecrawl API client for fetching docs |
| hf_client: HuggingFace client for LLM extraction |
| """ |
| self.firecrawl = firecrawl_client |
| self.hf_client = hf_client |
| self.cache = BenchmarkCache() |
| |
| async def research_benchmark( |
| self, |
| system_key: str, |
| log_callback: Callable = None, |
| force_refresh: bool = False, |
| ) -> BenchmarkData: |
| """ |
| Research a specific design system. |
| |
| Args: |
| system_key: Key from DESIGN_SYSTEM_SOURCES |
| log_callback: Function to log progress |
| force_refresh: Bypass cache and fetch fresh |
| |
| Returns: |
| BenchmarkData with extracted specifications |
| """ |
| def log(msg: str): |
| if log_callback: |
| log_callback(msg) |
| |
| if system_key not in DESIGN_SYSTEM_SOURCES: |
| raise ValueError(f"Unknown design system: {system_key}") |
| |
| source = DESIGN_SYSTEM_SOURCES[system_key] |
| |
| |
| if not force_refresh: |
| cached = self.cache.get(system_key) |
| if cached: |
| log(f" ββ {source['icon']} {source['short_name']}: Using cached data β
") |
| return cached |
| |
| log(f" ββ {source['icon']} {source['short_name']}: Fetching documentation...") |
| |
| |
| raw_content = "" |
| confidence = "low" |
| |
| if self.firecrawl: |
| try: |
| |
| typo_url = source["urls"].get("typography") |
| if typo_url: |
| log(f" β ββ Fetching {typo_url[:50]}...") |
| typo_content = await self._fetch_url(typo_url) |
| if typo_content: |
| raw_content += f"\n\n=== TYPOGRAPHY ===\n{typo_content[:4000]}" |
| confidence = "medium" |
| |
| |
| spacing_url = source["urls"].get("spacing") |
| if spacing_url: |
| log(f" β ββ Fetching spacing docs...") |
| spacing_content = await self._fetch_url(spacing_url) |
| if spacing_content: |
| raw_content += f"\n\n=== SPACING ===\n{spacing_content[:3000]}" |
| if confidence == "medium": |
| confidence = "high" |
| |
| except Exception as e: |
| log(f" β ββ β οΈ Fetch error: {str(e)[:50]}") |
| |
| |
| if raw_content and self.hf_client: |
| log(f" β ββ Extracting specifications...") |
| extracted = await self._extract_specs_with_llm(source["name"], raw_content) |
| else: |
| log(f" β ββ Using fallback data (fetch unavailable)") |
| extracted = FALLBACK_BENCHMARKS.get(system_key, {}) |
| confidence = "fallback" |
| |
| |
| result = BenchmarkData( |
| key=system_key, |
| name=source["name"], |
| short_name=source["short_name"], |
| vendor=source["vendor"], |
| icon=source["icon"], |
| typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})), |
| spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})), |
| colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})), |
| fetched_at=datetime.now().isoformat(), |
| confidence=confidence, |
| source_urls=list(source["urls"].values()), |
| best_for=source["best_for"], |
| ) |
| |
| |
| self.cache.set(system_key, result) |
| |
| ratio = result.typography.get("scale_ratio", "?") |
| base = result.typography.get("base_size", "?") |
| grid = result.spacing.get("base", "?") |
| log(f" β ββ β
ratio={ratio}, base={base}px, grid={grid}px [{confidence}]") |
| |
| return result |
| |
| async def _fetch_url(self, url: str) -> Optional[str]: |
| """Fetch URL content using Firecrawl.""" |
| if not self.firecrawl: |
| return None |
| |
| try: |
| |
| result = self.firecrawl.scrape_url( |
| url, |
| params={"formats": ["markdown"]} |
| ) |
| |
| if result and result.get("markdown"): |
| return result["markdown"] |
| elif result and result.get("content"): |
| return result["content"] |
| |
| except Exception as e: |
| pass |
| |
| return None |
| |
| async def _extract_specs_with_llm(self, system_name: str, raw_content: str) -> dict: |
| """Extract structured specs from documentation using LLM.""" |
| if not self.hf_client: |
| return {} |
| |
| prompt = f"""Extract the design system specifications from this documentation. |
| |
| DESIGN SYSTEM: {system_name} |
| |
| DOCUMENTATION: |
| {raw_content[:6000]} |
| |
| Return ONLY a JSON object with these exact fields (use null if not found): |
| {{ |
| "typography": {{ |
| "scale_ratio": <number like 1.2 or 1.25>, |
| "base_size": <number in px>, |
| "font_family": "<font name>", |
| "sizes": [<list of sizes in px>], |
| "line_height_body": <number like 1.5> |
| }}, |
| "spacing": {{ |
| "base": <base unit in px like 4 or 8>, |
| "scale": [<spacing values>], |
| "grid": "<description>" |
| }}, |
| "colors": {{ |
| "palette_size": <number>, |
| "uses_ramps": <true/false> |
| }} |
| }} |
| |
| Return ONLY valid JSON, no explanation.""" |
|
|
| try: |
| response = await self.hf_client.complete_async( |
| agent_name="benchmark_extractor", |
| system_prompt="You are a design system specification extractor. Extract only the factual specifications.", |
| user_message=prompt, |
| max_tokens=600, |
| json_mode=True, |
| ) |
| |
| |
| import re |
| json_match = re.search(r'\{[\s\S]*\}', response) |
| if json_match: |
| return json.loads(json_match.group()) |
| |
| except Exception as e: |
| pass |
| |
| return {} |
| |
| async def research_selected_benchmarks( |
| self, |
| selected_keys: list[str], |
| log_callback: Callable = None, |
| ) -> list[BenchmarkData]: |
| """ |
| Research multiple selected design systems. |
| |
| Args: |
| selected_keys: List of system keys to research |
| log_callback: Function to log progress |
| |
| Returns: |
| List of BenchmarkData |
| """ |
| def log(msg: str): |
| if log_callback: |
| log_callback(msg) |
| |
| log("") |
| log("β" * 60) |
| log("π¬ LAYER 2: BENCHMARK RESEARCH (Firecrawl + Cache)") |
| log("β" * 60) |
| log("") |
| log(f" Selected systems: {', '.join(selected_keys)}") |
| log("") |
| |
| results = [] |
| |
| for key in selected_keys: |
| if key in DESIGN_SYSTEM_SOURCES: |
| try: |
| result = await self.research_benchmark(key, log_callback) |
| results.append(result) |
| except Exception as e: |
| log(f" ββ β οΈ Error researching {key}: {e}") |
| |
| source = DESIGN_SYSTEM_SOURCES[key] |
| fallback = FALLBACK_BENCHMARKS.get(key, {}) |
| results.append(BenchmarkData( |
| key=key, |
| name=source["name"], |
| short_name=source["short_name"], |
| vendor=source["vendor"], |
| icon=source["icon"], |
| typography=fallback.get("typography", {}), |
| spacing=fallback.get("spacing", {}), |
| colors=fallback.get("colors", {}), |
| fetched_at=datetime.now().isoformat(), |
| confidence="fallback", |
| best_for=source["best_for"], |
| )) |
| |
| log("") |
| log(f" β
Researched {len(results)}/{len(selected_keys)} design systems") |
| |
| return results |
| |
| def compare_to_benchmarks( |
| self, |
| your_ratio: float, |
| your_base_size: int, |
| your_spacing_grid: int, |
| benchmarks: list[BenchmarkData], |
| log_callback: Callable = None, |
| ) -> list[BenchmarkComparison]: |
| """ |
| Compare user's tokens to researched benchmarks. |
| |
| Args: |
| your_ratio: Detected type scale ratio |
| your_base_size: Detected base font size |
| your_spacing_grid: Detected spacing grid base |
| benchmarks: List of researched BenchmarkData |
| log_callback: Function to log progress |
| |
| Returns: |
| List of BenchmarkComparison sorted by similarity |
| """ |
| def log(msg: str): |
| if log_callback: |
| log_callback(msg) |
| |
| log("") |
| log(" π BENCHMARK COMPARISON") |
| log(" " + "β" * 40) |
| log(f" Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px") |
| log("") |
| |
| comparisons = [] |
| |
| for b in benchmarks: |
| b_ratio = b.typography.get("scale_ratio", 1.25) |
| b_base = b.typography.get("base_size", 16) |
| b_grid = b.spacing.get("base", 8) |
| |
| |
| ratio_diff = abs(your_ratio - b_ratio) |
| base_diff = abs(your_base_size - b_base) |
| grid_diff = abs(your_spacing_grid - b_grid) |
| |
| |
| type_match = max(0, 100 - (ratio_diff * 100)) |
| spacing_match = max(0, 100 - (grid_diff * 10)) |
| |
| |
| similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3) |
| |
| |
| overall_match = (type_match * 0.5) + (spacing_match * 0.3) + (100 - base_diff * 5) * 0.2 |
| overall_match = max(0, min(100, overall_match)) |
| |
| comparisons.append(BenchmarkComparison( |
| benchmark=b, |
| similarity_score=similarity, |
| type_ratio_diff=ratio_diff, |
| base_size_diff=base_diff, |
| spacing_grid_diff=grid_diff, |
| type_match_pct=type_match, |
| spacing_match_pct=spacing_match, |
| overall_match_pct=overall_match, |
| )) |
| |
| |
| comparisons.sort(key=lambda x: x.similarity_score) |
| |
| |
| medals = ["π₯", "π₯", "π₯"] |
| for i, c in enumerate(comparisons[:5]): |
| medal = medals[i] if i < 3 else " " |
| b = c.benchmark |
| log(f" {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% match (score: {c.similarity_score:.2f})") |
| log(f" ββ ratio={b.typography.get('scale_ratio')}, base={b.typography.get('base_size')}px, grid={b.spacing.get('base')}px") |
| |
| return comparisons |
|
|
|
|
| |
| |
| |
|
|
| def get_available_benchmarks() -> list[dict]: |
| """Get list of available design systems for UI dropdown.""" |
| return [ |
| { |
| "key": key, |
| "name": source["name"], |
| "short_name": source["short_name"], |
| "icon": source["icon"], |
| "vendor": source["vendor"], |
| "best_for": source["best_for"], |
| } |
| for key, source in DESIGN_SYSTEM_SOURCES.items() |
| ] |
|
|
|
|
| def get_benchmark_choices() -> list[tuple[str, str]]: |
| """Get choices for Gradio dropdown.""" |
| return [ |
| (f"{source['icon']} {source['short_name']} ({source['vendor']})", key) |
| for key, source in DESIGN_SYSTEM_SOURCES.items() |
| ] |
|
|