| | """ |
| | Design System Extractor v2 — Main Application |
| | ============================================== |
| | |
| | Flow: |
| | 1. User enters URL |
| | 2. Agent 1 discovers pages → User confirms |
| | 3. Agent 1 extracts tokens (Desktop + Mobile) |
| | 4. Agent 2 normalizes tokens |
| | 5. Stage 1 UI: User reviews tokens (accept/reject, Desktop↔Mobile toggle) |
| | 6. Agent 3 proposes upgrades |
| | 7. Stage 2 UI: User selects options with live preview |
| | 8. Agent 4 generates JSON |
| | 9. Stage 3 UI: User exports |
| | """ |
| |
|
| | import os |
| | import asyncio |
| | import json |
| | import gradio as gr |
| | from datetime import datetime |
| | from typing import Optional |
| |
|
| | |
| | HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "") |
| |
|
| | |
| | |
| | |
| |
|
| | class AppState: |
| | """Global application state.""" |
| | def __init__(self): |
| | self.reset() |
| | |
| | def reset(self): |
| | self.discovered_pages = [] |
| | self.base_url = "" |
| | self.desktop_raw = None |
| | self.mobile_raw = None |
| | self.desktop_normalized = None |
| | self.mobile_normalized = None |
| | self.logs = [] |
| | |
| | def log(self, message: str): |
| | timestamp = datetime.now().strftime("%H:%M:%S") |
| | self.logs.append(f"[{timestamp}] {message}") |
| | if len(self.logs) > 100: |
| | self.logs.pop(0) |
| | |
| | def get_logs(self) -> str: |
| | return "\n".join(self.logs) |
| |
|
| | state = AppState() |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def get_crawler(): |
| | import agents.crawler |
| | return agents.crawler |
| |
|
| | def get_extractor(): |
| | import agents.extractor |
| | return agents.extractor |
| |
|
| | def get_normalizer(): |
| | import agents.normalizer |
| | return agents.normalizer |
| |
|
| | def get_schema(): |
| | import core.token_schema |
| | return core.token_schema |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | async def discover_pages(url: str, progress=gr.Progress()): |
| | """Discover pages from URL.""" |
| | state.reset() |
| | |
| | if not url or not url.startswith(("http://", "https://")): |
| | return "❌ Please enter a valid URL", "", None |
| | |
| | state.log(f"🚀 Starting discovery for: {url}") |
| | progress(0.1, desc="🔍 Discovering pages...") |
| | |
| | try: |
| | crawler = get_crawler() |
| | discoverer = crawler.PageDiscoverer() |
| | |
| | pages = await discoverer.discover(url) |
| | |
| | state.discovered_pages = pages |
| | state.base_url = url |
| | |
| | state.log(f"✅ Found {len(pages)} pages") |
| | |
| | |
| | pages_data = [] |
| | for page in pages: |
| | pages_data.append([ |
| | True, |
| | page.url, |
| | page.title if page.title else "(No title)", |
| | page.page_type.value, |
| | "✓" if not page.error else f"⚠ {page.error}" |
| | ]) |
| | |
| | progress(1.0, desc="✅ Discovery complete!") |
| | |
| | status = f"✅ Found {len(pages)} pages. Review and click 'Extract Tokens' to continue." |
| | |
| | return status, state.get_logs(), pages_data |
| | |
| | except Exception as e: |
| | import traceback |
| | state.log(f"❌ Error: {str(e)}") |
| | return f"❌ Error: {str(e)}", state.get_logs(), None |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | async def extract_tokens(pages_data, progress=gr.Progress()): |
| | """Extract tokens from selected pages (both viewports).""" |
| | |
| | state.log(f"📥 Received pages_data type: {type(pages_data)}") |
| | |
| | if pages_data is None: |
| | return "❌ Please discover pages first", state.get_logs(), None, None |
| | |
| | |
| | selected_urls = [] |
| | |
| | try: |
| | |
| | if hasattr(pages_data, 'iterrows'): |
| | state.log(f"📥 DataFrame with {len(pages_data)} rows, columns: {list(pages_data.columns)}") |
| | |
| | for idx, row in pages_data.iterrows(): |
| | |
| | try: |
| | |
| | is_selected = row.get('Select', row.iloc[0] if len(row) > 0 else False) |
| | url = row.get('URL', row.iloc[1] if len(row) > 1 else '') |
| | except: |
| | |
| | is_selected = row.iloc[0] if len(row) > 0 else False |
| | url = row.iloc[1] if len(row) > 1 else '' |
| | |
| | if is_selected and url: |
| | selected_urls.append(url) |
| | |
| | |
| | elif isinstance(pages_data, dict): |
| | state.log(f"📥 Dict with keys: {list(pages_data.keys())}") |
| | data = pages_data.get('data', []) |
| | for row in data: |
| | if isinstance(row, (list, tuple)) and len(row) >= 2 and row[0]: |
| | selected_urls.append(row[1]) |
| | |
| | |
| | elif isinstance(pages_data, (list, tuple)): |
| | state.log(f"📥 List with {len(pages_data)} items") |
| | for row in pages_data: |
| | if isinstance(row, (list, tuple)) and len(row) >= 2 and row[0]: |
| | selected_urls.append(row[1]) |
| | |
| | except Exception as e: |
| | state.log(f"❌ Error parsing pages_data: {str(e)}") |
| | import traceback |
| | state.log(traceback.format_exc()) |
| | |
| | state.log(f"📋 Found {len(selected_urls)} selected URLs") |
| | |
| | |
| | if not selected_urls and state.discovered_pages: |
| | state.log("⚠️ No URLs from table, using all discovered pages") |
| | selected_urls = [p.url for p in state.discovered_pages if not p.error][:10] |
| | |
| | if not selected_urls: |
| | return "❌ No pages selected. Please select pages or rediscover.", state.get_logs(), None, None |
| | |
| | |
| | selected_urls = selected_urls[:10] |
| | |
| | state.log(f"📋 Extracting from {len(selected_urls)} pages:") |
| | for url in selected_urls[:3]: |
| | state.log(f" • {url}") |
| | if len(selected_urls) > 3: |
| | state.log(f" ... and {len(selected_urls) - 3} more") |
| | |
| | progress(0.05, desc="🚀 Starting extraction...") |
| | |
| | try: |
| | schema = get_schema() |
| | extractor_mod = get_extractor() |
| | normalizer_mod = get_normalizer() |
| | |
| | |
| | state.log("") |
| | state.log("🖥️ DESKTOP EXTRACTION (1440px)") |
| | progress(0.1, desc="🖥️ Extracting desktop tokens...") |
| | |
| | desktop_extractor = extractor_mod.TokenExtractor(viewport=schema.Viewport.DESKTOP) |
| | |
| | def desktop_progress(p): |
| | progress(0.1 + (p * 0.35), desc=f"🖥️ Desktop... {int(p*100)}%") |
| | |
| | state.desktop_raw = await desktop_extractor.extract(selected_urls, progress_callback=desktop_progress) |
| | |
| | state.log(f" Raw: {len(state.desktop_raw.colors)} colors, {len(state.desktop_raw.typography)} typography, {len(state.desktop_raw.spacing)} spacing") |
| | |
| | |
| | state.log(" Normalizing...") |
| | state.desktop_normalized = normalizer_mod.normalize_tokens(state.desktop_raw) |
| | state.log(f" Normalized: {len(state.desktop_normalized.colors)} colors, {len(state.desktop_normalized.typography)} typography, {len(state.desktop_normalized.spacing)} spacing") |
| | |
| | |
| | state.log("") |
| | state.log("📱 MOBILE EXTRACTION (375px)") |
| | progress(0.5, desc="📱 Extracting mobile tokens...") |
| | |
| | mobile_extractor = extractor_mod.TokenExtractor(viewport=schema.Viewport.MOBILE) |
| | |
| | def mobile_progress(p): |
| | progress(0.5 + (p * 0.35), desc=f"📱 Mobile... {int(p*100)}%") |
| | |
| | state.mobile_raw = await mobile_extractor.extract(selected_urls, progress_callback=mobile_progress) |
| | |
| | state.log(f" Raw: {len(state.mobile_raw.colors)} colors, {len(state.mobile_raw.typography)} typography, {len(state.mobile_raw.spacing)} spacing") |
| | |
| | |
| | state.log(" Normalizing...") |
| | state.mobile_normalized = normalizer_mod.normalize_tokens(state.mobile_raw) |
| | state.log(f" Normalized: {len(state.mobile_normalized.colors)} colors, {len(state.mobile_normalized.typography)} typography, {len(state.mobile_normalized.spacing)} spacing") |
| | |
| | progress(0.95, desc="📊 Preparing results...") |
| | |
| | |
| | desktop_data = format_tokens_for_display(state.desktop_normalized) |
| | mobile_data = format_tokens_for_display(state.mobile_normalized) |
| | |
| | state.log("") |
| | state.log("=" * 50) |
| | state.log("✅ EXTRACTION COMPLETE!") |
| | state.log("=" * 50) |
| | |
| | progress(1.0, desc="✅ Complete!") |
| | |
| | status = f"""## ✅ Extraction Complete! |
| | |
| | | Viewport | Colors | Typography | Spacing | |
| | |----------|--------|------------|---------| |
| | | Desktop | {len(state.desktop_normalized.colors)} | {len(state.desktop_normalized.typography)} | {len(state.desktop_normalized.spacing)} | |
| | | Mobile | {len(state.mobile_normalized.colors)} | {len(state.mobile_normalized.typography)} | {len(state.mobile_normalized.spacing)} | |
| | |
| | **Next:** Review the tokens below. Accept or reject, then proceed to Stage 2. |
| | """ |
| | |
| | return status, state.get_logs(), desktop_data, mobile_data |
| | |
| | except Exception as e: |
| | import traceback |
| | state.log(f"❌ Error: {str(e)}") |
| | state.log(traceback.format_exc()) |
| | return f"❌ Error: {str(e)}", state.get_logs(), None, None |
| |
|
| |
|
| | def format_tokens_for_display(normalized) -> dict: |
| | """Format normalized tokens for Gradio display.""" |
| | if normalized is None: |
| | return {"colors": [], "typography": [], "spacing": []} |
| | |
| | |
| | colors = [] |
| | color_items = list(normalized.colors.values()) if isinstance(normalized.colors, dict) else normalized.colors |
| | for c in sorted(color_items, key=lambda x: -x.frequency)[:50]: |
| | colors.append([ |
| | True, |
| | c.value, |
| | c.suggested_name or "", |
| | c.frequency, |
| | c.confidence.value if c.confidence else "medium", |
| | f"{c.contrast_white:.1f}:1" if c.contrast_white else "N/A", |
| | "✓" if c.wcag_aa_small_text else "✗", |
| | ", ".join(c.contexts[:2]) if c.contexts else "", |
| | ]) |
| | |
| | |
| | typography = [] |
| | typo_items = list(normalized.typography.values()) if isinstance(normalized.typography, dict) else normalized.typography |
| | for t in sorted(typo_items, key=lambda x: -x.frequency)[:30]: |
| | typography.append([ |
| | True, |
| | t.font_family, |
| | t.font_size, |
| | str(t.font_weight), |
| | t.line_height or "", |
| | t.suggested_name or "", |
| | t.frequency, |
| | t.confidence.value if t.confidence else "medium", |
| | ]) |
| | |
| | |
| | spacing = [] |
| | spacing_items = list(normalized.spacing.values()) if isinstance(normalized.spacing, dict) else normalized.spacing |
| | for s in sorted(spacing_items, key=lambda x: x.value_px)[:20]: |
| | spacing.append([ |
| | True, |
| | s.value, |
| | f"{s.value_px}px", |
| | s.suggested_name or "", |
| | s.frequency, |
| | "✓" if s.fits_base_8 else "", |
| | s.confidence.value if s.confidence else "medium", |
| | ]) |
| | |
| | return { |
| | "colors": colors, |
| | "typography": typography, |
| | "spacing": spacing, |
| | } |
| |
|
| |
|
| | def switch_viewport(viewport: str): |
| | """Switch between desktop and mobile view.""" |
| | if viewport == "Desktop (1440px)": |
| | data = format_tokens_for_display(state.desktop_normalized) |
| | else: |
| | data = format_tokens_for_display(state.mobile_normalized) |
| | |
| | return data["colors"], data["typography"], data["spacing"] |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def export_tokens_json(): |
| | """Export tokens to JSON.""" |
| | result = { |
| | "metadata": { |
| | "source_url": state.base_url, |
| | "extracted_at": datetime.now().isoformat(), |
| | "version": "v1-extracted", |
| | }, |
| | "desktop": None, |
| | "mobile": None, |
| | } |
| | |
| | if state.desktop_normalized: |
| | result["desktop"] = { |
| | "colors": [ |
| | {"value": c.value, "name": c.suggested_name, "frequency": c.frequency, |
| | "confidence": c.confidence.value if c.confidence else "medium"} |
| | for c in state.desktop_normalized.colors |
| | ], |
| | "typography": [ |
| | {"font_family": t.font_family, "font_size": t.font_size, |
| | "font_weight": t.font_weight, "line_height": t.line_height, |
| | "name": t.suggested_name, "frequency": t.frequency} |
| | for t in state.desktop_normalized.typography |
| | ], |
| | "spacing": [ |
| | {"value": s.value, "value_px": s.value_px, "name": s.suggested_name, |
| | "frequency": s.frequency, "fits_base_8": s.fits_base_8} |
| | for s in state.desktop_normalized.spacing |
| | ], |
| | } |
| | |
| | if state.mobile_normalized: |
| | result["mobile"] = { |
| | "colors": [ |
| | {"value": c.value, "name": c.suggested_name, "frequency": c.frequency, |
| | "confidence": c.confidence.value if c.confidence else "medium"} |
| | for c in state.mobile_normalized.colors |
| | ], |
| | "typography": [ |
| | {"font_family": t.font_family, "font_size": t.font_size, |
| | "font_weight": t.font_weight, "line_height": t.line_height, |
| | "name": t.suggested_name, "frequency": t.frequency} |
| | for t in state.mobile_normalized.typography |
| | ], |
| | "spacing": [ |
| | {"value": s.value, "value_px": s.value_px, "name": s.suggested_name, |
| | "frequency": s.frequency, "fits_base_8": s.fits_base_8} |
| | for s in state.mobile_normalized.spacing |
| | ], |
| | } |
| | |
| | return json.dumps(result, indent=2, default=str) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def create_ui(): |
| | """Create the Gradio interface.""" |
| | |
| | with gr.Blocks( |
| | title="Design System Extractor v2", |
| | theme=gr.themes.Soft(), |
| | css=""" |
| | .color-swatch { display: inline-block; width: 24px; height: 24px; border-radius: 4px; margin-right: 8px; vertical-align: middle; } |
| | """ |
| | ) as app: |
| | |
| | gr.Markdown(""" |
| | # 🎨 Design System Extractor v2 |
| | |
| | **Reverse-engineer design systems from live websites.** |
| | |
| | A semi-automated, human-in-the-loop system that extracts, normalizes, and upgrades design tokens. |
| | |
| | --- |
| | """) |
| | |
| | |
| | |
| | |
| | |
| | with gr.Accordion("⚙️ Configuration", open=not bool(HF_TOKEN_FROM_ENV)): |
| | gr.Markdown("**HuggingFace Token** — Required for Stage 2 (AI upgrades)") |
| | with gr.Row(): |
| | hf_token_input = gr.Textbox( |
| | label="HF Token", placeholder="hf_xxxx", type="password", |
| | scale=4, value=HF_TOKEN_FROM_ENV, |
| | ) |
| | save_token_btn = gr.Button("💾 Save", scale=1) |
| | token_status = gr.Markdown("✅ Token loaded" if HF_TOKEN_FROM_ENV else "⏳ Enter token") |
| | |
| | def save_token(token): |
| | if token and len(token) > 10: |
| | os.environ["HF_TOKEN"] = token.strip() |
| | return "✅ Token saved!" |
| | return "❌ Invalid token" |
| | |
| | save_token_btn.click(save_token, [hf_token_input], [token_status]) |
| | |
| | |
| | |
| | |
| | |
| | with gr.Accordion("🔍 Step 1: Discover Pages", open=True): |
| | gr.Markdown("Enter your website URL to discover pages for extraction.") |
| | |
| | with gr.Row(): |
| | url_input = gr.Textbox(label="Website URL", placeholder="https://example.com", scale=4) |
| | discover_btn = gr.Button("🔍 Discover Pages", variant="primary", scale=1) |
| | |
| | discover_status = gr.Markdown("") |
| | |
| | with gr.Row(): |
| | log_output = gr.Textbox(label="📋 Log", lines=8, interactive=False) |
| | |
| | pages_table = gr.Dataframe( |
| | headers=["Select", "URL", "Title", "Type", "Status"], |
| | datatype=["bool", "str", "str", "str", "str"], |
| | label="Discovered Pages", |
| | interactive=True, |
| | visible=False, |
| | ) |
| | |
| | extract_btn = gr.Button("🚀 Extract Tokens (Desktop + Mobile)", variant="primary", visible=False) |
| | |
| | |
| | |
| | |
| | |
| | with gr.Accordion("📊 Stage 1: Review Extracted Tokens", open=False) as stage1_accordion: |
| | |
| | extraction_status = gr.Markdown("") |
| | |
| | gr.Markdown(""" |
| | **Review the extracted tokens.** Toggle between Desktop and Mobile viewports. |
| | Accept or reject tokens, then proceed to Stage 2 for AI-powered upgrades. |
| | """) |
| | |
| | viewport_toggle = gr.Radio( |
| | choices=["Desktop (1440px)", "Mobile (375px)"], |
| | value="Desktop (1440px)", |
| | label="Viewport", |
| | ) |
| | |
| | with gr.Tabs(): |
| | with gr.Tab("🎨 Colors"): |
| | colors_table = gr.Dataframe( |
| | headers=["Accept", "Color", "Suggested Name", "Frequency", "Confidence", "Contrast", "AA", "Context"], |
| | datatype=["bool", "str", "str", "number", "str", "str", "str", "str"], |
| | label="Colors", |
| | interactive=True, |
| | ) |
| | |
| | with gr.Tab("📝 Typography"): |
| | typography_table = gr.Dataframe( |
| | headers=["Accept", "Font", "Size", "Weight", "Line Height", "Suggested Name", "Frequency", "Confidence"], |
| | datatype=["bool", "str", "str", "str", "str", "str", "number", "str"], |
| | label="Typography", |
| | interactive=True, |
| | ) |
| | |
| | with gr.Tab("📏 Spacing"): |
| | spacing_table = gr.Dataframe( |
| | headers=["Accept", "Value", "Pixels", "Suggested Name", "Frequency", "Base 8", "Confidence"], |
| | datatype=["bool", "str", "str", "str", "number", "str", "str"], |
| | label="Spacing", |
| | interactive=True, |
| | ) |
| | |
| | proceed_stage2_btn = gr.Button("➡️ Proceed to Stage 2: AI Upgrades", variant="primary") |
| | |
| | |
| | |
| | |
| | |
| | with gr.Accordion("🧠 Stage 2: AI-Powered Upgrades (Coming Soon)", open=False): |
| | gr.Markdown(""" |
| | **Agent 3 (Design System Advisor)** will analyze your tokens and propose: |
| | |
| | - **Type Scale Options:** Choose from A/B/C (1.25, 1.333, 1.414 ratios) |
| | - **Color Ramp Generation:** AA-compliant tints and shades |
| | - **Spacing System:** Aligned to 8px base grid |
| | - **Naming Conventions:** Semantic token names |
| | |
| | Each option will show a **live preview** so you can see the changes before accepting. |
| | |
| | *Requires HuggingFace token for LLM inference.* |
| | """) |
| | |
| | |
| | |
| | |
| | |
| | with gr.Accordion("📦 Stage 3: Export", open=False): |
| | gr.Markdown("Export your design tokens to JSON (compatible with Figma Tokens Studio).") |
| | |
| | export_btn = gr.Button("📥 Export JSON", variant="secondary") |
| | export_output = gr.Code(label="Tokens JSON", language="json", lines=20) |
| | |
| | export_btn.click(export_tokens_json, outputs=[export_output]) |
| | |
| | |
| | |
| | |
| | |
| | |
| | desktop_data = gr.State({}) |
| | mobile_data = gr.State({}) |
| | |
| | |
| | discover_btn.click( |
| | fn=discover_pages, |
| | inputs=[url_input], |
| | outputs=[discover_status, log_output, pages_table], |
| | ).then( |
| | fn=lambda: (gr.update(visible=True), gr.update(visible=True)), |
| | outputs=[pages_table, extract_btn], |
| | ) |
| | |
| | |
| | extract_btn.click( |
| | fn=extract_tokens, |
| | inputs=[pages_table], |
| | outputs=[extraction_status, log_output, desktop_data, mobile_data], |
| | ).then( |
| | fn=lambda d: (d.get("colors", []), d.get("typography", []), d.get("spacing", [])), |
| | inputs=[desktop_data], |
| | outputs=[colors_table, typography_table, spacing_table], |
| | ).then( |
| | fn=lambda: gr.update(open=True), |
| | outputs=[stage1_accordion], |
| | ) |
| | |
| | |
| | viewport_toggle.change( |
| | fn=switch_viewport, |
| | inputs=[viewport_toggle], |
| | outputs=[colors_table, typography_table, spacing_table], |
| | ) |
| | |
| | |
| | |
| | |
| | |
| | gr.Markdown(""" |
| | --- |
| | **Design System Extractor v2** | Built with Playwright + Gradio + LangGraph + HuggingFace |
| | |
| | *A semi-automated co-pilot for design system recovery and modernization.* |
| | """) |
| | |
| | return app |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | if __name__ == "__main__": |
| | app = create_ui() |
| | app.launch(server_name="0.0.0.0", server_port=7860) |
| |
|