""" Design System Extractor v2 β€” Main Application ============================================== Flow: 1. User enters URL 2. Agent 1 discovers pages β†’ User confirms 3. Agent 1 extracts tokens (Desktop + Mobile) 4. Agent 2 normalizes tokens 5. Stage 1 UI: User reviews tokens (accept/reject, Desktop↔Mobile toggle) 6. Agent 3 proposes upgrades 7. Stage 2 UI: User selects options with live preview 8. Agent 4 generates JSON 9. Stage 3 UI: User exports """ import os import asyncio import json import gradio as gr from datetime import datetime from typing import Optional # Get HF token from environment HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "") # ============================================================================= # GLOBAL STATE # ============================================================================= class AppState: """Global application state.""" def __init__(self): self.reset() def reset(self): self.discovered_pages = [] self.base_url = "" self.desktop_raw = None # ExtractedTokens self.mobile_raw = None # ExtractedTokens self.desktop_normalized = None # NormalizedTokens self.mobile_normalized = None # NormalizedTokens self.logs = [] def log(self, message: str): timestamp = datetime.now().strftime("%H:%M:%S") self.logs.append(f"[{timestamp}] {message}") if len(self.logs) > 100: self.logs.pop(0) def get_logs(self) -> str: return "\n".join(self.logs) state = AppState() # ============================================================================= # LAZY IMPORTS # ============================================================================= def get_crawler(): import agents.crawler return agents.crawler def get_extractor(): import agents.extractor return agents.extractor def get_normalizer(): import agents.normalizer return agents.normalizer def get_schema(): import core.token_schema return core.token_schema # ============================================================================= # PHASE 1: DISCOVER PAGES # ============================================================================= async def discover_pages(url: str, progress=gr.Progress()): """Discover pages from URL.""" state.reset() if not url or not url.startswith(("http://", "https://")): return "❌ Please enter a valid URL", "", None state.log(f"πŸš€ Starting discovery for: {url}") progress(0.1, desc="πŸ” Discovering pages...") try: crawler = get_crawler() discoverer = crawler.PageDiscoverer() pages = await discoverer.discover(url) state.discovered_pages = pages state.base_url = url state.log(f"βœ… Found {len(pages)} pages") # Format for display pages_data = [] for page in pages: pages_data.append([ True, # Selected by default page.url, page.title if page.title else "(No title)", page.page_type.value, "βœ“" if not page.error else f"⚠ {page.error}" ]) progress(1.0, desc="βœ… Discovery complete!") status = f"βœ… Found {len(pages)} pages. Review and click 'Extract Tokens' to continue." return status, state.get_logs(), pages_data except Exception as e: import traceback state.log(f"❌ Error: {str(e)}") return f"❌ Error: {str(e)}", state.get_logs(), None # ============================================================================= # PHASE 2: EXTRACT TOKENS # ============================================================================= async def extract_tokens(pages_data, progress=gr.Progress()): """Extract tokens from selected pages (both viewports).""" state.log(f"πŸ“₯ Received pages_data type: {type(pages_data)}") if pages_data is None: return "❌ Please discover pages first", state.get_logs(), None, None # Get selected URLs - handle pandas DataFrame selected_urls = [] try: # Check if it's a pandas DataFrame if hasattr(pages_data, 'iterrows'): state.log(f"πŸ“₯ DataFrame with {len(pages_data)} rows, columns: {list(pages_data.columns)}") for idx, row in pages_data.iterrows(): # Get values by column name or position try: # Try column names first is_selected = row.get('Select', row.iloc[0] if len(row) > 0 else False) url = row.get('URL', row.iloc[1] if len(row) > 1 else '') except: # Fallback to positional is_selected = row.iloc[0] if len(row) > 0 else False url = row.iloc[1] if len(row) > 1 else '' if is_selected and url: selected_urls.append(url) # If it's a dict (Gradio sometimes sends this) elif isinstance(pages_data, dict): state.log(f"πŸ“₯ Dict with keys: {list(pages_data.keys())}") data = pages_data.get('data', []) for row in data: if isinstance(row, (list, tuple)) and len(row) >= 2 and row[0]: selected_urls.append(row[1]) # If it's a list elif isinstance(pages_data, (list, tuple)): state.log(f"πŸ“₯ List with {len(pages_data)} items") for row in pages_data: if isinstance(row, (list, tuple)) and len(row) >= 2 and row[0]: selected_urls.append(row[1]) except Exception as e: state.log(f"❌ Error parsing pages_data: {str(e)}") import traceback state.log(traceback.format_exc()) state.log(f"πŸ“‹ Found {len(selected_urls)} selected URLs") # If still no URLs, try using stored discovered pages if not selected_urls and state.discovered_pages: state.log("⚠️ No URLs from table, using all discovered pages") selected_urls = [p.url for p in state.discovered_pages if not p.error][:10] if not selected_urls: return "❌ No pages selected. Please select pages or rediscover.", state.get_logs(), None, None # Limit to 10 pages for performance selected_urls = selected_urls[:10] state.log(f"πŸ“‹ Extracting from {len(selected_urls)} pages:") for url in selected_urls[:3]: state.log(f" β€’ {url}") if len(selected_urls) > 3: state.log(f" ... and {len(selected_urls) - 3} more") progress(0.05, desc="πŸš€ Starting extraction...") try: schema = get_schema() extractor_mod = get_extractor() normalizer_mod = get_normalizer() # === DESKTOP EXTRACTION === state.log("") state.log("πŸ–₯️ DESKTOP EXTRACTION (1440px)") progress(0.1, desc="πŸ–₯️ Extracting desktop tokens...") desktop_extractor = extractor_mod.TokenExtractor(viewport=schema.Viewport.DESKTOP) def desktop_progress(p): progress(0.1 + (p * 0.35), desc=f"πŸ–₯️ Desktop... {int(p*100)}%") state.desktop_raw = await desktop_extractor.extract(selected_urls, progress_callback=desktop_progress) state.log(f" Raw: {len(state.desktop_raw.colors)} colors, {len(state.desktop_raw.typography)} typography, {len(state.desktop_raw.spacing)} spacing") # Normalize desktop state.log(" Normalizing...") state.desktop_normalized = normalizer_mod.normalize_tokens(state.desktop_raw) state.log(f" Normalized: {len(state.desktop_normalized.colors)} colors, {len(state.desktop_normalized.typography)} typography, {len(state.desktop_normalized.spacing)} spacing") # === MOBILE EXTRACTION === state.log("") state.log("πŸ“± MOBILE EXTRACTION (375px)") progress(0.5, desc="πŸ“± Extracting mobile tokens...") mobile_extractor = extractor_mod.TokenExtractor(viewport=schema.Viewport.MOBILE) def mobile_progress(p): progress(0.5 + (p * 0.35), desc=f"πŸ“± Mobile... {int(p*100)}%") state.mobile_raw = await mobile_extractor.extract(selected_urls, progress_callback=mobile_progress) state.log(f" Raw: {len(state.mobile_raw.colors)} colors, {len(state.mobile_raw.typography)} typography, {len(state.mobile_raw.spacing)} spacing") # Normalize mobile state.log(" Normalizing...") state.mobile_normalized = normalizer_mod.normalize_tokens(state.mobile_raw) state.log(f" Normalized: {len(state.mobile_normalized.colors)} colors, {len(state.mobile_normalized.typography)} typography, {len(state.mobile_normalized.spacing)} spacing") progress(0.95, desc="πŸ“Š Preparing results...") # Format results for Stage 1 UI desktop_data = format_tokens_for_display(state.desktop_normalized) mobile_data = format_tokens_for_display(state.mobile_normalized) state.log("") state.log("=" * 50) state.log("βœ… EXTRACTION COMPLETE!") state.log("=" * 50) progress(1.0, desc="βœ… Complete!") status = f"""## βœ… Extraction Complete! | Viewport | Colors | Typography | Spacing | |----------|--------|------------|---------| | Desktop | {len(state.desktop_normalized.colors)} | {len(state.desktop_normalized.typography)} | {len(state.desktop_normalized.spacing)} | | Mobile | {len(state.mobile_normalized.colors)} | {len(state.mobile_normalized.typography)} | {len(state.mobile_normalized.spacing)} | **Next:** Review the tokens below. Accept or reject, then proceed to Stage 2. """ return status, state.get_logs(), desktop_data, mobile_data except Exception as e: import traceback state.log(f"❌ Error: {str(e)}") state.log(traceback.format_exc()) return f"❌ Error: {str(e)}", state.get_logs(), None, None def format_tokens_for_display(normalized) -> dict: """Format normalized tokens for Gradio display.""" if normalized is None: return {"colors": [], "typography": [], "spacing": []} # Colors are now a dict colors = [] color_items = list(normalized.colors.values()) if isinstance(normalized.colors, dict) else normalized.colors for c in sorted(color_items, key=lambda x: -x.frequency)[:50]: colors.append([ True, # Accept checkbox c.value, c.suggested_name or "", c.frequency, c.confidence.value if c.confidence else "medium", f"{c.contrast_white:.1f}:1" if c.contrast_white else "N/A", "βœ“" if c.wcag_aa_small_text else "βœ—", ", ".join(c.contexts[:2]) if c.contexts else "", ]) # Typography typography = [] typo_items = list(normalized.typography.values()) if isinstance(normalized.typography, dict) else normalized.typography for t in sorted(typo_items, key=lambda x: -x.frequency)[:30]: typography.append([ True, # Accept checkbox t.font_family, t.font_size, str(t.font_weight), t.line_height or "", t.suggested_name or "", t.frequency, t.confidence.value if t.confidence else "medium", ]) # Spacing spacing = [] spacing_items = list(normalized.spacing.values()) if isinstance(normalized.spacing, dict) else normalized.spacing for s in sorted(spacing_items, key=lambda x: x.value_px)[:20]: spacing.append([ True, # Accept checkbox s.value, f"{s.value_px}px", s.suggested_name or "", s.frequency, "βœ“" if s.fits_base_8 else "", s.confidence.value if s.confidence else "medium", ]) return { "colors": colors, "typography": typography, "spacing": spacing, } def switch_viewport(viewport: str): """Switch between desktop and mobile view.""" if viewport == "Desktop (1440px)": data = format_tokens_for_display(state.desktop_normalized) else: data = format_tokens_for_display(state.mobile_normalized) return data["colors"], data["typography"], data["spacing"] # ============================================================================= # STAGE 3: EXPORT # ============================================================================= def export_tokens_json(): """Export tokens to JSON.""" result = { "metadata": { "source_url": state.base_url, "extracted_at": datetime.now().isoformat(), "version": "v1-extracted", }, "desktop": None, "mobile": None, } if state.desktop_normalized: result["desktop"] = { "colors": [ {"value": c.value, "name": c.suggested_name, "frequency": c.frequency, "confidence": c.confidence.value if c.confidence else "medium"} for c in state.desktop_normalized.colors ], "typography": [ {"font_family": t.font_family, "font_size": t.font_size, "font_weight": t.font_weight, "line_height": t.line_height, "name": t.suggested_name, "frequency": t.frequency} for t in state.desktop_normalized.typography ], "spacing": [ {"value": s.value, "value_px": s.value_px, "name": s.suggested_name, "frequency": s.frequency, "fits_base_8": s.fits_base_8} for s in state.desktop_normalized.spacing ], } if state.mobile_normalized: result["mobile"] = { "colors": [ {"value": c.value, "name": c.suggested_name, "frequency": c.frequency, "confidence": c.confidence.value if c.confidence else "medium"} for c in state.mobile_normalized.colors ], "typography": [ {"font_family": t.font_family, "font_size": t.font_size, "font_weight": t.font_weight, "line_height": t.line_height, "name": t.suggested_name, "frequency": t.frequency} for t in state.mobile_normalized.typography ], "spacing": [ {"value": s.value, "value_px": s.value_px, "name": s.suggested_name, "frequency": s.frequency, "fits_base_8": s.fits_base_8} for s in state.mobile_normalized.spacing ], } return json.dumps(result, indent=2, default=str) # ============================================================================= # UI BUILDING # ============================================================================= def create_ui(): """Create the Gradio interface.""" with gr.Blocks( title="Design System Extractor v2", theme=gr.themes.Soft(), css=""" .color-swatch { display: inline-block; width: 24px; height: 24px; border-radius: 4px; margin-right: 8px; vertical-align: middle; } """ ) as app: gr.Markdown(""" # 🎨 Design System Extractor v2 **Reverse-engineer design systems from live websites.** A semi-automated, human-in-the-loop system that extracts, normalizes, and upgrades design tokens. --- """) # ================================================================= # CONFIGURATION # ================================================================= with gr.Accordion("βš™οΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)): gr.Markdown("**HuggingFace Token** β€” Required for Stage 2 (AI upgrades)") with gr.Row(): hf_token_input = gr.Textbox( label="HF Token", placeholder="hf_xxxx", type="password", scale=4, value=HF_TOKEN_FROM_ENV, ) save_token_btn = gr.Button("πŸ’Ύ Save", scale=1) token_status = gr.Markdown("βœ… Token loaded" if HF_TOKEN_FROM_ENV else "⏳ Enter token") def save_token(token): if token and len(token) > 10: os.environ["HF_TOKEN"] = token.strip() return "βœ… Token saved!" return "❌ Invalid token" save_token_btn.click(save_token, [hf_token_input], [token_status]) # ================================================================= # URL INPUT & PAGE DISCOVERY # ================================================================= with gr.Accordion("πŸ” Step 1: Discover Pages", open=True): gr.Markdown("Enter your website URL to discover pages for extraction.") with gr.Row(): url_input = gr.Textbox(label="Website URL", placeholder="https://example.com", scale=4) discover_btn = gr.Button("πŸ” Discover Pages", variant="primary", scale=1) discover_status = gr.Markdown("") with gr.Row(): log_output = gr.Textbox(label="πŸ“‹ Log", lines=8, interactive=False) pages_table = gr.Dataframe( headers=["Select", "URL", "Title", "Type", "Status"], datatype=["bool", "str", "str", "str", "str"], label="Discovered Pages", interactive=True, visible=False, ) extract_btn = gr.Button("πŸš€ Extract Tokens (Desktop + Mobile)", variant="primary", visible=False) # ================================================================= # STAGE 1: EXTRACTION REVIEW # ================================================================= with gr.Accordion("πŸ“Š Stage 1: Review Extracted Tokens", open=False) as stage1_accordion: extraction_status = gr.Markdown("") gr.Markdown(""" **Review the extracted tokens.** Toggle between Desktop and Mobile viewports. Accept or reject tokens, then proceed to Stage 2 for AI-powered upgrades. """) viewport_toggle = gr.Radio( choices=["Desktop (1440px)", "Mobile (375px)"], value="Desktop (1440px)", label="Viewport", ) with gr.Tabs(): with gr.Tab("🎨 Colors"): colors_table = gr.Dataframe( headers=["Accept", "Color", "Suggested Name", "Frequency", "Confidence", "Contrast", "AA", "Context"], datatype=["bool", "str", "str", "number", "str", "str", "str", "str"], label="Colors", interactive=True, ) with gr.Tab("πŸ“ Typography"): typography_table = gr.Dataframe( headers=["Accept", "Font", "Size", "Weight", "Line Height", "Suggested Name", "Frequency", "Confidence"], datatype=["bool", "str", "str", "str", "str", "str", "number", "str"], label="Typography", interactive=True, ) with gr.Tab("πŸ“ Spacing"): spacing_table = gr.Dataframe( headers=["Accept", "Value", "Pixels", "Suggested Name", "Frequency", "Base 8", "Confidence"], datatype=["bool", "str", "str", "str", "number", "str", "str"], label="Spacing", interactive=True, ) proceed_stage2_btn = gr.Button("➑️ Proceed to Stage 2: AI Upgrades", variant="primary") # ================================================================= # STAGE 2: AI UPGRADES (Placeholder) # ================================================================= with gr.Accordion("🧠 Stage 2: AI-Powered Upgrades (Coming Soon)", open=False): gr.Markdown(""" **Agent 3 (Design System Advisor)** will analyze your tokens and propose: - **Type Scale Options:** Choose from A/B/C (1.25, 1.333, 1.414 ratios) - **Color Ramp Generation:** AA-compliant tints and shades - **Spacing System:** Aligned to 8px base grid - **Naming Conventions:** Semantic token names Each option will show a **live preview** so you can see the changes before accepting. *Requires HuggingFace token for LLM inference.* """) # ================================================================= # STAGE 3: EXPORT # ================================================================= with gr.Accordion("πŸ“¦ Stage 3: Export", open=False): gr.Markdown("Export your design tokens to JSON (compatible with Figma Tokens Studio).") export_btn = gr.Button("πŸ“₯ Export JSON", variant="secondary") export_output = gr.Code(label="Tokens JSON", language="json", lines=20) export_btn.click(export_tokens_json, outputs=[export_output]) # ================================================================= # EVENT HANDLERS # ================================================================= # Store data for viewport toggle desktop_data = gr.State({}) mobile_data = gr.State({}) # Discover pages discover_btn.click( fn=discover_pages, inputs=[url_input], outputs=[discover_status, log_output, pages_table], ).then( fn=lambda: (gr.update(visible=True), gr.update(visible=True)), outputs=[pages_table, extract_btn], ) # Extract tokens extract_btn.click( fn=extract_tokens, inputs=[pages_table], outputs=[extraction_status, log_output, desktop_data, mobile_data], ).then( fn=lambda d: (d.get("colors", []), d.get("typography", []), d.get("spacing", [])), inputs=[desktop_data], outputs=[colors_table, typography_table, spacing_table], ).then( fn=lambda: gr.update(open=True), outputs=[stage1_accordion], ) # Viewport toggle viewport_toggle.change( fn=switch_viewport, inputs=[viewport_toggle], outputs=[colors_table, typography_table, spacing_table], ) # ================================================================= # FOOTER # ================================================================= gr.Markdown(""" --- **Design System Extractor v2** | Built with Playwright + Gradio + LangGraph + HuggingFace *A semi-automated co-pilot for design system recovery and modernization.* """) return app # ============================================================================= # MAIN # ============================================================================= if __name__ == "__main__": app = create_ui() app.launch(server_name="0.0.0.0", server_port=7860)