Spaces:
Sleeping
Sleeping
| """ | |
| Design System Extractor v2 β Main Application | |
| ============================================== | |
| A semi-automated, human-in-the-loop agentic system that reverse-engineers | |
| design systems from live websites. | |
| Usage: | |
| python app.py | |
| """ | |
| import os | |
| import asyncio | |
| import gradio as gr | |
| from datetime import datetime | |
| # Get HF token from environment if available | |
| HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "") | |
| # ============================================================================= | |
| # GLOBAL STATE | |
| # ============================================================================= | |
| current_extraction: dict = {} | |
| user_hf_token: str = "" | |
| # ============================================================================= | |
| # HF TOKEN MANAGEMENT | |
| # ============================================================================= | |
| def set_hf_token(token: str) -> str: | |
| """Set the HF token globally.""" | |
| global user_hf_token | |
| if not token or len(token) < 10: | |
| return "β Please enter a valid HuggingFace token" | |
| user_hf_token = token.strip() | |
| os.environ["HF_TOKEN"] = user_hf_token | |
| return "β Token saved! You can now use the extractor." | |
| # ============================================================================= | |
| # LAZY IMPORTS (avoid circular imports at startup) | |
| # ============================================================================= | |
| _crawler_module = None | |
| _extractor_module = None | |
| _schema_module = None | |
| def get_crawler(): | |
| global _crawler_module | |
| if _crawler_module is None: | |
| from agents import crawler as _crawler_module | |
| return _crawler_module | |
| def get_extractor(): | |
| global _extractor_module | |
| if _extractor_module is None: | |
| from agents import extractor as _extractor_module | |
| return _extractor_module | |
| def get_schema(): | |
| global _schema_module | |
| if _schema_module is None: | |
| from core import token_schema as _schema_module | |
| return _schema_module | |
| # ============================================================================= | |
| # STAGE 1: URL INPUT & PAGE DISCOVERY | |
| # ============================================================================= | |
| async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple: | |
| """ | |
| Discover pages from a website URL. | |
| Returns tuple of (status_message, pages_dataframe, pages_json) | |
| """ | |
| if not url or not url.startswith(("http://", "https://")): | |
| return "β Please enter a valid URL starting with http:// or https://", None, None | |
| progress(0, desc="Initializing browser...") | |
| try: | |
| crawler = get_crawler() | |
| discoverer = crawler.PageDiscoverer() | |
| def update_progress(p): | |
| progress(p, desc=f"Discovering pages... ({int(p*100)}%)") | |
| pages = await discoverer.discover(url, progress_callback=update_progress) | |
| # Format for display | |
| pages_data = [] | |
| for page in pages: | |
| pages_data.append({ | |
| "Select": page.selected, | |
| "URL": page.url, | |
| "Title": page.title or "(No title)", | |
| "Type": page.page_type.value, | |
| "Status": "β" if not page.error else f"β {page.error}", | |
| }) | |
| # Store for later use | |
| current_extraction["discovered_pages"] = pages | |
| current_extraction["base_url"] = url | |
| status = f"β Found {len(pages)} pages. Select the pages you want to extract tokens from." | |
| return status, pages_data, [p.model_dump() for p in pages] | |
| except Exception as e: | |
| import traceback | |
| return f"β Error: {str(e)}\n\n{traceback.format_exc()}", None, None | |
| async def start_extraction(pages_selection: list, viewport_choice: str, progress=gr.Progress()) -> tuple: | |
| """ | |
| Start token extraction from selected pages. | |
| Returns tuple of (status, colors_data, typography_data, spacing_data) | |
| """ | |
| if not pages_selection: | |
| return "β Please select at least one page", None, None, None | |
| # Get selected URLs | |
| selected_urls = [] | |
| for row in pages_selection: | |
| if row.get("Select", False): | |
| selected_urls.append(row["URL"]) | |
| if not selected_urls: | |
| return "β Please select at least one page using the checkboxes", None, None, None | |
| # Determine viewport | |
| schema = get_schema() | |
| viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE | |
| progress(0, desc=f"Starting {viewport.value} extraction...") | |
| try: | |
| extractor_mod = get_extractor() | |
| extractor = extractor_mod.TokenExtractor(viewport=viewport) | |
| def update_progress(p): | |
| progress(p, desc=f"Extracting tokens... ({int(p*100)}%)") | |
| result = await extractor.extract(selected_urls, progress_callback=update_progress) | |
| # Store result | |
| current_extraction[f"{viewport.value}_tokens"] = result | |
| # Format colors for display | |
| colors_data = [] | |
| for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]: | |
| colors_data.append({ | |
| "Accept": True, | |
| "Color": color.value, | |
| "Frequency": color.frequency, | |
| "Context": ", ".join(color.contexts[:3]), | |
| "Contrast (White)": f"{color.contrast_white}:1", | |
| "AA Text": "β" if color.wcag_aa_small_text else "β", | |
| "Confidence": color.confidence.value, | |
| }) | |
| # Format typography for display | |
| typography_data = [] | |
| for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]: | |
| typography_data.append({ | |
| "Accept": True, | |
| "Font": typo.font_family, | |
| "Size": typo.font_size, | |
| "Weight": typo.font_weight, | |
| "Line Height": typo.line_height, | |
| "Elements": ", ".join(typo.elements[:3]), | |
| "Frequency": typo.frequency, | |
| }) | |
| # Format spacing for display | |
| spacing_data = [] | |
| for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]: | |
| spacing_data.append({ | |
| "Accept": True, | |
| "Value": space.value, | |
| "Frequency": space.frequency, | |
| "Context": ", ".join(space.contexts[:2]), | |
| "Fits 8px": "β" if space.fits_base_8 else "", | |
| "Outlier": "β " if space.is_outlier else "", | |
| }) | |
| # Summary | |
| status = f"""β Extraction Complete ({viewport.value}) | |
| **Summary:** | |
| - Pages crawled: {len(result.pages_crawled)} | |
| - Colors found: {len(result.colors)} | |
| - Typography styles: {len(result.typography)} | |
| - Spacing values: {len(result.spacing)} | |
| - Font families: {len(result.font_families)} | |
| - Detected spacing base: {result.spacing_base or 'Unknown'}px | |
| - Duration: {result.extraction_duration_ms}ms | |
| """ | |
| if result.warnings: | |
| status += f"\nβ οΈ Warnings: {len(result.warnings)}" | |
| if result.errors: | |
| status += f"\nβ Errors: {len(result.errors)}" | |
| return status, colors_data, typography_data, spacing_data | |
| except Exception as e: | |
| import traceback | |
| return f"β Extraction failed: {str(e)}\n\n{traceback.format_exc()}", None, None, None | |
| def export_tokens_json(): | |
| """Export current tokens to JSON.""" | |
| import json | |
| result = {} | |
| if "desktop_tokens" in current_extraction: | |
| desktop = current_extraction["desktop_tokens"] | |
| result["desktop"] = { | |
| "colors": [c.model_dump() for c in desktop.colors], | |
| "typography": [t.model_dump() for t in desktop.typography], | |
| "spacing": [s.model_dump() for s in desktop.spacing], | |
| "metadata": desktop.summary(), | |
| } | |
| if "mobile_tokens" in current_extraction: | |
| mobile = current_extraction["mobile_tokens"] | |
| result["mobile"] = { | |
| "colors": [c.model_dump() for c in mobile.colors], | |
| "typography": [t.model_dump() for t in mobile.typography], | |
| "spacing": [s.model_dump() for s in mobile.spacing], | |
| "metadata": mobile.summary(), | |
| } | |
| if not result: | |
| return '{"error": "No tokens extracted yet. Please run extraction first."}' | |
| return json.dumps(result, indent=2, default=str) | |
| # ============================================================================= | |
| # UI BUILDING | |
| # ============================================================================= | |
| def create_ui(): | |
| """Create the Gradio interface.""" | |
| with gr.Blocks( | |
| title="Design System Extractor v2", | |
| theme=gr.themes.Soft(), | |
| ) as app: | |
| # Header | |
| gr.Markdown(""" | |
| # π¨ Design System Extractor v2 | |
| **Reverse-engineer design systems from live websites.** | |
| Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON. | |
| --- | |
| """) | |
| # ================================================================= | |
| # CONFIGURATION SECTION | |
| # ================================================================= | |
| with gr.Accordion("βοΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)): | |
| gr.Markdown(""" | |
| **HuggingFace Token** is required for AI-powered features (Agent 2-4). | |
| Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) | |
| *Note: Basic extraction (Agent 1) works without a token.* | |
| """) | |
| with gr.Row(): | |
| hf_token_input = gr.Textbox( | |
| label="HuggingFace Token", | |
| placeholder="hf_xxxxxxxxxxxxxxxxxxxx", | |
| type="password", | |
| scale=4, | |
| value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "", | |
| ) | |
| save_token_btn = gr.Button("πΎ Save Token", scale=1) | |
| token_status = gr.Markdown( | |
| "β Token loaded from environment" if HF_TOKEN_FROM_ENV else "β³ Enter your HF token to enable all features" | |
| ) | |
| save_token_btn.click( | |
| fn=set_hf_token, | |
| inputs=[hf_token_input], | |
| outputs=[token_status], | |
| ) | |
| # ================================================================= | |
| # STAGE 1: URL Input & Discovery | |
| # ================================================================= | |
| with gr.Accordion("π Stage 1: Website Discovery", open=True): | |
| gr.Markdown(""" | |
| **Step 1:** Enter your website URL and discover pages. | |
| The system will automatically find and classify pages for extraction. | |
| """) | |
| with gr.Row(): | |
| url_input = gr.Textbox( | |
| label="Website URL", | |
| placeholder="https://example.com", | |
| scale=4, | |
| ) | |
| discover_btn = gr.Button("π Discover Pages", variant="primary", scale=1) | |
| discovery_status = gr.Markdown("") | |
| pages_table = gr.Dataframe( | |
| headers=["Select", "URL", "Title", "Type", "Status"], | |
| datatype=["bool", "str", "str", "str", "str"], | |
| interactive=True, | |
| label="Discovered Pages", | |
| visible=False, | |
| ) | |
| pages_json = gr.JSON(visible=False) | |
| # ================================================================= | |
| # STAGE 2: Extraction | |
| # ================================================================= | |
| with gr.Accordion("π¬ Stage 2: Token Extraction", open=False): | |
| gr.Markdown(""" | |
| **Step 2:** Select pages and viewport, then extract design tokens. | |
| """) | |
| with gr.Row(): | |
| viewport_radio = gr.Radio( | |
| choices=["Desktop (1440px)", "Mobile (375px)"], | |
| value="Desktop (1440px)", | |
| label="Viewport", | |
| ) | |
| extract_btn = gr.Button("π Extract Tokens", variant="primary") | |
| extraction_status = gr.Markdown("") | |
| with gr.Tabs(): | |
| with gr.Tab("π¨ Colors"): | |
| colors_table = gr.Dataframe( | |
| headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"], | |
| datatype=["bool", "str", "number", "str", "str", "str", "str"], | |
| interactive=True, | |
| label="Extracted Colors", | |
| ) | |
| with gr.Tab("π Typography"): | |
| typography_table = gr.Dataframe( | |
| headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"], | |
| datatype=["bool", "str", "str", "number", "str", "str", "number"], | |
| interactive=True, | |
| label="Extracted Typography", | |
| ) | |
| with gr.Tab("π Spacing"): | |
| spacing_table = gr.Dataframe( | |
| headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"], | |
| datatype=["bool", "str", "number", "str", "str", "str"], | |
| interactive=True, | |
| label="Extracted Spacing", | |
| ) | |
| # ================================================================= | |
| # STAGE 3: Export | |
| # ================================================================= | |
| with gr.Accordion("π¦ Stage 3: Export", open=False): | |
| gr.Markdown(""" | |
| **Step 3:** Review and export your design tokens. | |
| """) | |
| with gr.Row(): | |
| export_btn = gr.Button("π₯ Export JSON", variant="secondary") | |
| export_output = gr.Code( | |
| label="Exported Tokens (JSON)", | |
| language="json", | |
| lines=20, | |
| ) | |
| # ================================================================= | |
| # EVENT HANDLERS | |
| # ================================================================= | |
| # Discovery | |
| discover_btn.click( | |
| fn=discover_site_pages, | |
| inputs=[url_input], | |
| outputs=[discovery_status, pages_table, pages_json], | |
| ).then( | |
| fn=lambda: gr.update(visible=True), | |
| outputs=[pages_table], | |
| ) | |
| # Extraction | |
| extract_btn.click( | |
| fn=start_extraction, | |
| inputs=[pages_table, viewport_radio], | |
| outputs=[extraction_status, colors_table, typography_table, spacing_table], | |
| ) | |
| # Export | |
| export_btn.click( | |
| fn=export_tokens_json, | |
| outputs=[export_output], | |
| ) | |
| # ================================================================= | |
| # FOOTER | |
| # ================================================================= | |
| gr.Markdown(""" | |
| --- | |
| **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace | |
| *A semi-automated co-pilot for design system recovery and modernization.* | |
| **Models:** Microsoft Phi (Normalizer) β’ Meta Llama (Advisor) β’ Mistral Codestral (Generator) | |
| """) | |
| return app | |
| # ============================================================================= | |
| # MAIN | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| app = create_ui() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ) | |