Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

Design-System-Extractor-2 / agents /benchmark_researcher.py

riazmo

Upload 2 files

a6c864a verified 2 months ago

raw

history blame contribute delete

26.8 kB

	"""
	Benchmark Research Agent
	=========================
	Fetches LIVE data from design system documentation sites
	using Firecrawl, with 24-hour caching.

	This agent:
	1. Fetches official documentation from design system sites
	2. Extracts typography, spacing, color specifications using LLM
	3. Caches results for 24 hours
	4. Compares user's tokens to researched benchmarks
	"""

	import asyncio
	import json
	import os
	from dataclasses import dataclass, field
	from datetime import datetime, timedelta
	from typing import Optional, Callable
	import hashlib


	# =============================================================================
	# DESIGN SYSTEM SOURCES (Official Documentation URLs)
	# =============================================================================

	DESIGN_SYSTEM_SOURCES = {
	"material_design_3": {
	"name": "Material Design 3",
	"short_name": "Material 3",
	"vendor": "Google",
	"urls": {
	"typography": "https://m3.material.io/styles/typography/type-scale-tokens",
	"spacing": "https://m3.material.io/foundations/layout/understanding-layout/spacing",
	"colors": "https://m3.material.io/styles/color/the-color-system/key-colors-tones",
	},
	"best_for": ["Android apps", "Web apps", "Enterprise software"],
	"icon": "🟢",
	},
	"apple_hig": {
	"name": "Apple Human Interface Guidelines",
	"short_name": "Apple HIG",
	"vendor": "Apple",
	"urls": {
	"typography": "https://developer.apple.com/design/human-interface-guidelines/typography",
	"spacing": "https://developer.apple.com/design/human-interface-guidelines/layout",
	},
	"best_for": ["iOS apps", "macOS apps", "Premium consumer products"],
	"icon": "🍎",
	},
	"shopify_polaris": {
	"name": "Shopify Polaris",
	"short_name": "Polaris",
	"vendor": "Shopify",
	"urls": {
	"typography": "https://polaris.shopify.com/design/typography",
	"spacing": "https://polaris.shopify.com/design/spacing",
	"colors": "https://polaris.shopify.com/design/colors",
	},
	"best_for": ["E-commerce", "Admin dashboards", "Merchant tools"],
	"icon": "🛒",
	},
	"atlassian_design": {
	"name": "Atlassian Design System",
	"short_name": "Atlassian",
	"vendor": "Atlassian",
	"urls": {
	"typography": "https://atlassian.design/foundations/typography",
	"spacing": "https://atlassian.design/foundations/spacing",
	"colors": "https://atlassian.design/foundations/color",
	},
	"best_for": ["Productivity tools", "Dense interfaces", "Enterprise B2B"],
	"icon": "🔵",
	},
	"ibm_carbon": {
	"name": "IBM Carbon Design System",
	"short_name": "Carbon",
	"vendor": "IBM",
	"urls": {
	"typography": "https://carbondesignsystem.com/guidelines/typography/overview",
	"spacing": "https://carbondesignsystem.com/guidelines/spacing/overview",
	"colors": "https://carbondesignsystem.com/guidelines/color/overview",
	},
	"best_for": ["Enterprise software", "Data-heavy applications", "IBM products"],
	"icon": "🔷",
	},
	"tailwind_css": {
	"name": "Tailwind CSS",
	"short_name": "Tailwind",
	"vendor": "Tailwind Labs",
	"urls": {
	"typography": "https://tailwindcss.com/docs/font-size",
	"spacing": "https://tailwindcss.com/docs/customizing-spacing",
	"colors": "https://tailwindcss.com/docs/customizing-colors",
	},
	"best_for": ["Web applications", "Startups", "Rapid prototyping"],
	"icon": "🌊",
	},
	"ant_design": {
	"name": "Ant Design",
	"short_name": "Ant Design",
	"vendor": "Ant Group",
	"urls": {
	"typography": "https://ant.design/docs/spec/font",
	"spacing": "https://ant.design/docs/spec/layout",
	"colors": "https://ant.design/docs/spec/colors",
	},
	"best_for": ["Enterprise B2B", "Admin panels", "Chinese market"],
	"icon": "🐜",
	},
	"chakra_ui": {
	"name": "Chakra UI",
	"short_name": "Chakra",
	"vendor": "Chakra UI",
	"urls": {
	"typography": "https://chakra-ui.com/docs/styled-system/theme#typography",
	"spacing": "https://chakra-ui.com/docs/styled-system/theme#spacing",
	"colors": "https://chakra-ui.com/docs/styled-system/theme#colors",
	},
	"best_for": ["React applications", "Startups", "Accessible products"],
	"icon": "⚡",
	},
	}


	# =============================================================================
	# DATA CLASSES
	# =============================================================================

	@dataclass
	class BenchmarkData:
	"""Researched benchmark data from a design system."""
	key: str
	name: str
	short_name: str
	vendor: str
	icon: str

	# Extracted specifications
	typography: dict = field(default_factory=dict)
	# Expected: {scale_ratio, base_size, sizes[], font_family, line_height_body}

	spacing: dict = field(default_factory=dict)
	# Expected: {base, scale[], grid}

	colors: dict = field(default_factory=dict)
	# Expected: {palette_size, uses_ramps, ramp_steps}

	# Metadata
	fetched_at: str = ""
	confidence: str = "low" # high, medium, low
	source_urls: list = field(default_factory=list)
	best_for: list = field(default_factory=list)

	def to_dict(self) -> dict:
	return {
	"key": self.key,
	"name": self.name,
	"short_name": self.short_name,
	"vendor": self.vendor,
	"icon": self.icon,
	"typography": self.typography,
	"spacing": self.spacing,
	"colors": self.colors,
	"fetched_at": self.fetched_at,
	"confidence": self.confidence,
	"best_for": self.best_for,
	}


	@dataclass
	class BenchmarkComparison:
	"""Comparison result between user's tokens and a benchmark."""
	benchmark: BenchmarkData
	similarity_score: float # Lower = more similar

	# Individual comparisons
	type_ratio_diff: float
	base_size_diff: int
	spacing_grid_diff: int

	# Match percentages
	type_match_pct: float
	spacing_match_pct: float
	overall_match_pct: float

	def to_dict(self) -> dict:
	return {
	"name": self.benchmark.name,
	"short_name": self.benchmark.short_name,
	"icon": self.benchmark.icon,
	"similarity_score": round(self.similarity_score, 2),
	"overall_match_pct": round(self.overall_match_pct, 1),
	"comparison": {
	"type_ratio": {
	"diff": round(self.type_ratio_diff, 3),
	"match_pct": round(self.type_match_pct, 1),
	},
	"base_size": {
	"diff": self.base_size_diff,
	},
	"spacing_grid": {
	"diff": self.spacing_grid_diff,
	"match_pct": round(self.spacing_match_pct, 1),
	},
	},
	"benchmark_values": {
	"type_ratio": self.benchmark.typography.get("scale_ratio"),
	"base_size": self.benchmark.typography.get("base_size"),
	"spacing_grid": self.benchmark.spacing.get("base"),
	},
	"best_for": self.benchmark.best_for,
	"confidence": self.benchmark.confidence,
	}


	# =============================================================================
	# CACHE MANAGER
	# =============================================================================

	class BenchmarkCache:
	"""Manages 24-hour caching of benchmark research results."""

	def __init__(self, cache_dir: str = None):
	if cache_dir is None:
	cache_dir = os.path.join(os.path.dirname(__file__), "..", "storage")
	self.cache_file = os.path.join(cache_dir, "benchmark_cache.json")
	self._ensure_cache_dir()

	def _ensure_cache_dir(self):
	"""Ensure cache directory exists."""
	os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)

	def _load_cache(self) -> dict:
	"""Load cache from file."""
	if os.path.exists(self.cache_file):
	try:
	with open(self.cache_file, 'r') as f:
	return json.load(f)
	except Exception:
	return {}
	return {}

	def _save_cache(self, cache: dict):
	"""Save cache to file."""
	try:
	with open(self.cache_file, 'w') as f:
	json.dump(cache, f, indent=2)
	except Exception:
	pass

	def get(self, key: str) -> Optional[BenchmarkData]:
	"""Get cached benchmark if valid (< 24 hours old)."""
	cache = self._load_cache()

	if key not in cache:
	return None

	entry = cache[key]
	fetched_at = datetime.fromisoformat(entry.get("fetched_at", "2000-01-01"))

	# Check if expired (24 hours)
	if datetime.now() - fetched_at > timedelta(hours=24):
	return None

	# Reconstruct BenchmarkData
	source = DESIGN_SYSTEM_SOURCES.get(key, {})
	return BenchmarkData(
	key=key,
	name=entry.get("name", source.get("name", key)),
	short_name=entry.get("short_name", source.get("short_name", key)),
	vendor=entry.get("vendor", source.get("vendor", "")),
	icon=entry.get("icon", source.get("icon", "📦")),
	typography=entry.get("typography", {}),
	spacing=entry.get("spacing", {}),
	colors=entry.get("colors", {}),
	fetched_at=entry.get("fetched_at", ""),
	confidence=entry.get("confidence", "low"),
	source_urls=entry.get("source_urls", []),
	best_for=entry.get("best_for", source.get("best_for", [])),
	)

	def set(self, key: str, data: BenchmarkData):
	"""Cache benchmark data."""
	cache = self._load_cache()
	cache[key] = data.to_dict()
	self._save_cache(cache)

	def get_cache_status(self) -> dict:
	"""Get status of all cached items."""
	cache = self._load_cache()
	status = {}

	for key in DESIGN_SYSTEM_SOURCES.keys():
	if key in cache:
	fetched_at = datetime.fromisoformat(cache[key].get("fetched_at", "2000-01-01"))
	age_hours = (datetime.now() - fetched_at).total_seconds() / 3600
	is_valid = age_hours < 24
	status[key] = {
	"cached": True,
	"valid": is_valid,
	"age_hours": round(age_hours, 1),
	}
	else:
	status[key] = {"cached": False, "valid": False}

	return status


	# =============================================================================
	# FALLBACK DATA (Used when research fails)
	# =============================================================================

	FALLBACK_BENCHMARKS = {
	"material_design_3": {
	"typography": {"scale_ratio": 1.2, "base_size": 16, "font_family": "Roboto", "line_height_body": 1.5},
	"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 48, 64], "grid": "8px"},
	"colors": {"palette_size": 13, "uses_ramps": True},
	},
	"apple_hig": {
	"typography": {"scale_ratio": 1.19, "base_size": 17, "font_family": "SF Pro", "line_height_body": 1.47},
	"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40], "grid": "4px"},
	"colors": {"palette_size": 9, "uses_ramps": True},
	},
	"shopify_polaris": {
	"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "Inter", "line_height_body": 1.5},
	"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64], "grid": "4px"},
	"colors": {"palette_size": 11, "uses_ramps": True},
	},
	"atlassian_design": {
	"typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "Inter", "line_height_body": 1.43},
	"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
	"colors": {"palette_size": 15, "uses_ramps": True},
	},
	"ibm_carbon": {
	"typography": {"scale_ratio": 1.25, "base_size": 14, "font_family": "IBM Plex Sans", "line_height_body": 1.5},
	"spacing": {"base": 8, "scale": [0, 2, 4, 8, 12, 16, 24, 32, 40, 48], "grid": "8px"},
	"colors": {"palette_size": 12, "uses_ramps": True},
	},
	"tailwind_css": {
	"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
	"spacing": {"base": 4, "scale": [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32], "grid": "4px"},
	"colors": {"palette_size": 22, "uses_ramps": True},
	},
	"ant_design": {
	"typography": {"scale_ratio": 1.14, "base_size": 14, "font_family": "system-ui", "line_height_body": 1.57},
	"spacing": {"base": 8, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48], "grid": "8px"},
	"colors": {"palette_size": 13, "uses_ramps": True},
	},
	"chakra_ui": {
	"typography": {"scale_ratio": 1.25, "base_size": 16, "font_family": "system-ui", "line_height_body": 1.5},
	"spacing": {"base": 4, "scale": [0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 64], "grid": "4px"},
	"colors": {"palette_size": 15, "uses_ramps": True},
	},
	}


	# =============================================================================
	# BENCHMARK RESEARCHER
	# =============================================================================

	class BenchmarkResearcher:
	"""
	Research agent that fetches live design system specifications.

	Uses Firecrawl to fetch documentation and LLM to extract specs.
	Results are cached for 24 hours.
	"""

	def __init__(self, firecrawl_client=None, hf_client=None):
	"""
	Initialize researcher.

	Args:
	firecrawl_client: Firecrawl API client for fetching docs
	hf_client: HuggingFace client for LLM extraction
	"""
	self.firecrawl = firecrawl_client
	self.hf_client = hf_client
	self.cache = BenchmarkCache()

	async def research_benchmark(
	self,
	system_key: str,
	log_callback: Callable = None,
	force_refresh: bool = False,
	) -> BenchmarkData:
	"""
	Research a specific design system.

	Args:
	system_key: Key from DESIGN_SYSTEM_SOURCES
	log_callback: Function to log progress
	force_refresh: Bypass cache and fetch fresh

	Returns:
	BenchmarkData with extracted specifications
	"""
	def log(msg: str):
	if log_callback:
	log_callback(msg)

	if system_key not in DESIGN_SYSTEM_SOURCES:
	raise ValueError(f"Unknown design system: {system_key}")

	source = DESIGN_SYSTEM_SOURCES[system_key]

	# Check cache first (unless force refresh)
	if not force_refresh:
	cached = self.cache.get(system_key)
	if cached:
	log(f" ├─ {source['icon']} {source['short_name']}: Using cached data ✅")
	return cached

	log(f" ├─ {source['icon']} {source['short_name']}: Fetching documentation...")

	# Try to fetch and extract
	raw_content = ""
	confidence = "low"

	if self.firecrawl:
	try:
	# Fetch typography docs
	typo_url = source["urls"].get("typography")
	if typo_url:
	log(f" │ ├─ Fetching {typo_url[:50]}...")
	typo_content = await self._fetch_url(typo_url)
	if typo_content:
	raw_content += f"\n\n=== TYPOGRAPHY ===\n{typo_content[:4000]}"
	confidence = "medium"

	# Fetch spacing docs
	spacing_url = source["urls"].get("spacing")
	if spacing_url:
	log(f" │ ├─ Fetching spacing docs...")
	spacing_content = await self._fetch_url(spacing_url)
	if spacing_content:
	raw_content += f"\n\n=== SPACING ===\n{spacing_content[:3000]}"
	if confidence == "medium":
	confidence = "high"

	except Exception as e:
	log(f" │ ├─ ⚠️ Fetch error: {str(e)[:50]}")

	# Extract specs with LLM (or use fallback)
	if raw_content and self.hf_client:
	log(f" │ ├─ Extracting specifications...")
	extracted = await self._extract_specs_with_llm(source["name"], raw_content)
	else:
	log(f" │ ├─ Using fallback data (fetch unavailable)")
	extracted = FALLBACK_BENCHMARKS.get(system_key, {})
	confidence = "fallback"

	# Build result
	result = BenchmarkData(
	key=system_key,
	name=source["name"],
	short_name=source["short_name"],
	vendor=source["vendor"],
	icon=source["icon"],
	typography=extracted.get("typography", FALLBACK_BENCHMARKS.get(system_key, {}).get("typography", {})),
	spacing=extracted.get("spacing", FALLBACK_BENCHMARKS.get(system_key, {}).get("spacing", {})),
	colors=extracted.get("colors", FALLBACK_BENCHMARKS.get(system_key, {}).get("colors", {})),
	fetched_at=datetime.now().isoformat(),
	confidence=confidence,
	source_urls=list(source["urls"].values()),
	best_for=source["best_for"],
	)

	# Cache result
	self.cache.set(system_key, result)

	ratio = result.typography.get("scale_ratio", "?")
	base = result.typography.get("base_size", "?")
	grid = result.spacing.get("base", "?")
	log(f" │ └─ ✅ ratio={ratio}, base={base}px, grid={grid}px [{confidence}]")

	return result

	async def _fetch_url(self, url: str) -> Optional[str]:
	"""Fetch URL content using Firecrawl."""
	if not self.firecrawl:
	return None

	try:
	# Firecrawl scrape
	result = self.firecrawl.scrape_url(
	url,
	params={"formats": ["markdown"]}
	)

	if result and result.get("markdown"):
	return result["markdown"]
	elif result and result.get("content"):
	return result["content"]

	except Exception as e:
	pass

	return None

	async def _extract_specs_with_llm(self, system_name: str, raw_content: str) -> dict:
	"""Extract structured specs from documentation using LLM."""
	if not self.hf_client:
	return {}

	prompt = f"""Extract the design system specifications from this documentation.

	DESIGN SYSTEM: {system_name}

	DOCUMENTATION:
	{raw_content[:6000]}

	Return ONLY a JSON object with these exact fields (use null if not found):
	{{
	"typography": {{
	"scale_ratio": <number like 1.2 or 1.25>,
	"base_size": <number in px>,
	"font_family": "<font name>",
	"sizes": [<list of sizes in px>],
	"line_height_body": <number like 1.5>
	}},
	"spacing": {{
	"base": <base unit in px like 4 or 8>,
	"scale": [<spacing values>],
	"grid": "<description>"
	}},
	"colors": {{
	"palette_size": <number>,
	"uses_ramps": <true/false>
	}}
	}}

	Return ONLY valid JSON, no explanation."""

	try:
	response = await self.hf_client.complete_async(
	agent_name="benchmark_extractor",
	system_prompt="You are a design system specification extractor. Extract only the factual specifications.",
	user_message=prompt,
	max_tokens=600,
	json_mode=True,
	)

	# Parse JSON from response
	import re
	json_match = re.search(r'\{[\s\S]*\}', response)
	if json_match:
	return json.loads(json_match.group())

	except Exception as e:
	pass

	return {}

	async def research_selected_benchmarks(
	self,
	selected_keys: list[str],
	log_callback: Callable = None,
	) -> list[BenchmarkData]:
	"""
	Research multiple selected design systems.

	Args:
	selected_keys: List of system keys to research
	log_callback: Function to log progress

	Returns:
	List of BenchmarkData
	"""
	def log(msg: str):
	if log_callback:
	log_callback(msg)

	log("")
	log("═" * 60)
	log("🔬 LAYER 2: BENCHMARK RESEARCH (Firecrawl + Cache)")
	log("═" * 60)
	log("")
	log(f" Selected systems: {', '.join(selected_keys)}")
	log("")

	results = []

	for key in selected_keys:
	if key in DESIGN_SYSTEM_SOURCES:
	try:
	result = await self.research_benchmark(key, log_callback)
	results.append(result)
	except Exception as e:
	log(f" ├─ ⚠️ Error researching {key}: {e}")
	# Use fallback
	source = DESIGN_SYSTEM_SOURCES[key]
	fallback = FALLBACK_BENCHMARKS.get(key, {})
	results.append(BenchmarkData(
	key=key,
	name=source["name"],
	short_name=source["short_name"],
	vendor=source["vendor"],
	icon=source["icon"],
	typography=fallback.get("typography", {}),
	spacing=fallback.get("spacing", {}),
	colors=fallback.get("colors", {}),
	fetched_at=datetime.now().isoformat(),
	confidence="fallback",
	best_for=source["best_for"],
	))

	log("")
	log(f" ✅ Researched {len(results)}/{len(selected_keys)} design systems")

	return results

	def compare_to_benchmarks(
	self,
	your_ratio: float,
	your_base_size: int,
	your_spacing_grid: int,
	benchmarks: list[BenchmarkData],
	log_callback: Callable = None,
	) -> list[BenchmarkComparison]:
	"""
	Compare user's tokens to researched benchmarks.

	Args:
	your_ratio: Detected type scale ratio
	your_base_size: Detected base font size
	your_spacing_grid: Detected spacing grid base
	benchmarks: List of researched BenchmarkData
	log_callback: Function to log progress

	Returns:
	List of BenchmarkComparison sorted by similarity
	"""
	def log(msg: str):
	if log_callback:
	log_callback(msg)

	log("")
	log(" 📊 BENCHMARK COMPARISON")
	log(" " + "─" * 40)
	log(f" Your values: ratio={your_ratio:.2f}, base={your_base_size}px, grid={your_spacing_grid}px")
	log("")

	comparisons = []

	for b in benchmarks:
	b_ratio = b.typography.get("scale_ratio", 1.25)
	b_base = b.typography.get("base_size", 16)
	b_grid = b.spacing.get("base", 8)

	# Calculate differences
	ratio_diff = abs(your_ratio - b_ratio)
	base_diff = abs(your_base_size - b_base)
	grid_diff = abs(your_spacing_grid - b_grid)

	# Calculate match percentages
	type_match = max(0, 100 - (ratio_diff * 100)) # 0.1 diff = 90% match
	spacing_match = max(0, 100 - (grid_diff * 10)) # 4px diff = 60% match

	# Weighted similarity score (lower = more similar)
	similarity = (ratio_diff * 10) + (base_diff * 0.5) + (grid_diff * 0.3)

	# Overall match percentage
	overall_match = (type_match * 0.5) + (spacing_match * 0.3) + (100 - base_diff * 5) * 0.2
	overall_match = max(0, min(100, overall_match))

	comparisons.append(BenchmarkComparison(
	benchmark=b,
	similarity_score=similarity,
	type_ratio_diff=ratio_diff,
	base_size_diff=base_diff,
	spacing_grid_diff=grid_diff,
	type_match_pct=type_match,
	spacing_match_pct=spacing_match,
	overall_match_pct=overall_match,
	))

	# Sort by similarity (lower = better)
	comparisons.sort(key=lambda x: x.similarity_score)

	# Log results
	medals = ["🥇", "🥈", "🥉"]
	for i, c in enumerate(comparisons[:5]):
	medal = medals[i] if i < 3 else " "
	b = c.benchmark
	log(f" {medal} {b.icon} {b.short_name}: {c.overall_match_pct:.0f}% match (score: {c.similarity_score:.2f})")
	log(f" └─ ratio={b.typography.get('scale_ratio')}, base={b.typography.get('base_size')}px, grid={b.spacing.get('base')}px")

	return comparisons


	# =============================================================================
	# HELPER FUNCTIONS
	# =============================================================================

	def get_available_benchmarks() -> list[dict]:
	"""Get list of available design systems for UI dropdown."""
	return [
	{
	"key": key,
	"name": source["name"],
	"short_name": source["short_name"],
	"icon": source["icon"],
	"vendor": source["vendor"],
	"best_for": source["best_for"],
	}
	for key, source in DESIGN_SYSTEM_SOURCES.items()
	]


	def get_benchmark_choices() -> list[tuple[str, str]]:
	"""Get choices for Gradio dropdown."""
	return [
	(f"{source['icon']} {source['short_name']} ({source['vendor']})", key)
	for key, source in DESIGN_SYSTEM_SOURCES.items()
	]