""" Design System Extractor v2 — Main Application ============================================== A semi-automated, human-in-the-loop agentic system that reverse-engineers design systems from live websites. Usage: python app.py """ import os import asyncio import gradio as gr from datetime import datetime # Get HF token from environment if available HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "") # ============================================================================= # GLOBAL STATE # ============================================================================= current_extraction: dict = {} user_hf_token: str = "" # ============================================================================= # HF TOKEN MANAGEMENT # ============================================================================= def set_hf_token(token: str) -> str: """Set the HF token globally.""" global user_hf_token if not token or len(token) < 10: return "❌ Please enter a valid HuggingFace token" user_hf_token = token.strip() os.environ["HF_TOKEN"] = user_hf_token return "✅ Token saved! You can now use the extractor." # ============================================================================= # LAZY IMPORTS (avoid circular imports at startup) # ============================================================================= _crawler_module = None _extractor_module = None _schema_module = None def get_crawler(): global _crawler_module if _crawler_module is None: from agents import crawler as _crawler_module return _crawler_module def get_extractor(): global _extractor_module if _extractor_module is None: from agents import extractor as _extractor_module return _extractor_module def get_schema(): global _schema_module if _schema_module is None: from core import token_schema as _schema_module return _schema_module # ============================================================================= # STAGE 1: URL INPUT & PAGE DISCOVERY # ============================================================================= async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple: """ Discover pages from a website URL. Returns tuple of (status_message, pages_dataframe, pages_json) """ if not url or not url.startswith(("http://", "https://")): return "❌ Please enter a valid URL starting with http:// or https://", None, None progress(0, desc="Initializing browser...") try: crawler = get_crawler() discoverer = crawler.PageDiscoverer() def update_progress(p): progress(p, desc=f"Discovering pages... ({int(p*100)}%)") pages = await discoverer.discover(url, progress_callback=update_progress) # Format for display pages_data = [] for page in pages: pages_data.append({ "Select": page.selected, "URL": page.url, "Title": page.title or "(No title)", "Type": page.page_type.value, "Status": "✓" if not page.error else f"⚠ {page.error}", }) # Store for later use current_extraction["discovered_pages"] = pages current_extraction["base_url"] = url status = f"✅ Found {len(pages)} pages. Select the pages you want to extract tokens from." return status, pages_data, [p.model_dump() for p in pages] except Exception as e: import traceback return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None, None async def start_extraction(pages_selection: list, viewport_choice: str, progress=gr.Progress()) -> tuple: """ Start token extraction from selected pages. Returns tuple of (status, colors_data, typography_data, spacing_data) """ if not pages_selection: return "❌ Please select at least one page", None, None, None # Get selected URLs selected_urls = [] for row in pages_selection: if row.get("Select", False): selected_urls.append(row["URL"]) if not selected_urls: return "❌ Please select at least one page using the checkboxes", None, None, None # Determine viewport schema = get_schema() viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE progress(0, desc=f"Starting {viewport.value} extraction...") try: extractor_mod = get_extractor() extractor = extractor_mod.TokenExtractor(viewport=viewport) def update_progress(p): progress(p, desc=f"Extracting tokens... ({int(p*100)}%)") result = await extractor.extract(selected_urls, progress_callback=update_progress) # Store result current_extraction[f"{viewport.value}_tokens"] = result # Format colors for display colors_data = [] for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]: colors_data.append({ "Accept": True, "Color": color.value, "Frequency": color.frequency, "Context": ", ".join(color.contexts[:3]), "Contrast (White)": f"{color.contrast_white}:1", "AA Text": "✓" if color.wcag_aa_small_text else "✗", "Confidence": color.confidence.value, }) # Format typography for display typography_data = [] for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]: typography_data.append({ "Accept": True, "Font": typo.font_family, "Size": typo.font_size, "Weight": typo.font_weight, "Line Height": typo.line_height, "Elements": ", ".join(typo.elements[:3]), "Frequency": typo.frequency, }) # Format spacing for display spacing_data = [] for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]: spacing_data.append({ "Accept": True, "Value": space.value, "Frequency": space.frequency, "Context": ", ".join(space.contexts[:2]), "Fits 8px": "✓" if space.fits_base_8 else "", "Outlier": "⚠" if space.is_outlier else "", }) # Summary status = f"""✅ Extraction Complete ({viewport.value}) **Summary:** - Pages crawled: {len(result.pages_crawled)} - Colors found: {len(result.colors)} - Typography styles: {len(result.typography)} - Spacing values: {len(result.spacing)} - Font families: {len(result.font_families)} - Detected spacing base: {result.spacing_base or 'Unknown'}px - Duration: {result.extraction_duration_ms}ms """ if result.warnings: status += f"\n⚠️ Warnings: {len(result.warnings)}" if result.errors: status += f"\n❌ Errors: {len(result.errors)}" return status, colors_data, typography_data, spacing_data except Exception as e: import traceback return f"❌ Extraction failed: {str(e)}\n\n{traceback.format_exc()}", None, None, None def export_tokens_json(): """Export current tokens to JSON.""" import json result = {} if "desktop_tokens" in current_extraction: desktop = current_extraction["desktop_tokens"] result["desktop"] = { "colors": [c.model_dump() for c in desktop.colors], "typography": [t.model_dump() for t in desktop.typography], "spacing": [s.model_dump() for s in desktop.spacing], "metadata": desktop.summary(), } if "mobile_tokens" in current_extraction: mobile = current_extraction["mobile_tokens"] result["mobile"] = { "colors": [c.model_dump() for c in mobile.colors], "typography": [t.model_dump() for t in mobile.typography], "spacing": [s.model_dump() for s in mobile.spacing], "metadata": mobile.summary(), } if not result: return '{"error": "No tokens extracted yet. Please run extraction first."}' return json.dumps(result, indent=2, default=str) # ============================================================================= # UI BUILDING # ============================================================================= def create_ui(): """Create the Gradio interface.""" with gr.Blocks( title="Design System Extractor v2", theme=gr.themes.Soft(), ) as app: # Header gr.Markdown(""" # 🎨 Design System Extractor v2 **Reverse-engineer design systems from live websites.** Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON. --- """) # ================================================================= # CONFIGURATION SECTION # ================================================================= with gr.Accordion("⚙️ Configuration", open=not bool(HF_TOKEN_FROM_ENV)): gr.Markdown(""" **HuggingFace Token** is required for AI-powered features (Agent 2-4). Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) *Note: Basic extraction (Agent 1) works without a token.* """) with gr.Row(): hf_token_input = gr.Textbox( label="HuggingFace Token", placeholder="hf_xxxxxxxxxxxxxxxxxxxx", type="password", scale=4, value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "", ) save_token_btn = gr.Button("💾 Save Token", scale=1) token_status = gr.Markdown( "✅ Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features" ) save_token_btn.click( fn=set_hf_token, inputs=[hf_token_input], outputs=[token_status], ) # ================================================================= # STAGE 1: URL Input & Discovery # ================================================================= with gr.Accordion("📍 Stage 1: Website Discovery", open=True): gr.Markdown(""" **Step 1:** Enter your website URL and discover pages. The system will automatically find and classify pages for extraction. """) with gr.Row(): url_input = gr.Textbox( label="Website URL", placeholder="https://example.com", scale=4, ) discover_btn = gr.Button("🔍 Discover Pages", variant="primary", scale=1) discovery_status = gr.Markdown("") pages_table = gr.Dataframe( headers=["Select", "URL", "Title", "Type", "Status"], datatype=["bool", "str", "str", "str", "str"], interactive=True, label="Discovered Pages", visible=False, ) pages_json = gr.JSON(visible=False) # ================================================================= # STAGE 2: Extraction # ================================================================= with gr.Accordion("🔬 Stage 2: Token Extraction", open=False): gr.Markdown(""" **Step 2:** Select pages and viewport, then extract design tokens. """) with gr.Row(): viewport_radio = gr.Radio( choices=["Desktop (1440px)", "Mobile (375px)"], value="Desktop (1440px)", label="Viewport", ) extract_btn = gr.Button("🚀 Extract Tokens", variant="primary") extraction_status = gr.Markdown("") with gr.Tabs(): with gr.Tab("🎨 Colors"): colors_table = gr.Dataframe( headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"], datatype=["bool", "str", "number", "str", "str", "str", "str"], interactive=True, label="Extracted Colors", ) with gr.Tab("📝 Typography"): typography_table = gr.Dataframe( headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"], datatype=["bool", "str", "str", "number", "str", "str", "number"], interactive=True, label="Extracted Typography", ) with gr.Tab("📏 Spacing"): spacing_table = gr.Dataframe( headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"], datatype=["bool", "str", "number", "str", "str", "str"], interactive=True, label="Extracted Spacing", ) # ================================================================= # STAGE 3: Export # ================================================================= with gr.Accordion("📦 Stage 3: Export", open=False): gr.Markdown(""" **Step 3:** Review and export your design tokens. """) with gr.Row(): export_btn = gr.Button("📥 Export JSON", variant="secondary") export_output = gr.Code( label="Exported Tokens (JSON)", language="json", lines=20, ) # ================================================================= # EVENT HANDLERS # ================================================================= # Discovery discover_btn.click( fn=discover_site_pages, inputs=[url_input], outputs=[discovery_status, pages_table, pages_json], ).then( fn=lambda: gr.update(visible=True), outputs=[pages_table], ) # Extraction extract_btn.click( fn=start_extraction, inputs=[pages_table, viewport_radio], outputs=[extraction_status, colors_table, typography_table, spacing_table], ) # Export export_btn.click( fn=export_tokens_json, outputs=[export_output], ) # ================================================================= # FOOTER # ================================================================= gr.Markdown(""" --- **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace *A semi-automated co-pilot for design system recovery and modernization.* **Models:** Microsoft Phi (Normalizer) • Meta Llama (Advisor) • Mistral Codestral (Generator) """) return app # ============================================================================= # MAIN # ============================================================================= if __name__ == "__main__": app = create_ui() app.launch( server_name="0.0.0.0", server_port=7860, )