riazmo's picture
Upload app.py
ad4e018 verified
"""
Design System Extractor v2 β€” Main Application
==============================================
A semi-automated, human-in-the-loop agentic system that reverse-engineers
design systems from live websites.
Usage:
python app.py
"""
import os
import asyncio
import gradio as gr
from datetime import datetime
# Get HF token from environment if available
HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
# =============================================================================
# GLOBAL STATE
# =============================================================================
current_extraction: dict = {}
user_hf_token: str = ""
# =============================================================================
# HF TOKEN MANAGEMENT
# =============================================================================
def set_hf_token(token: str) -> str:
"""Set the HF token globally."""
global user_hf_token
if not token or len(token) < 10:
return "❌ Please enter a valid HuggingFace token"
user_hf_token = token.strip()
os.environ["HF_TOKEN"] = user_hf_token
return "βœ… Token saved! You can now use the extractor."
# =============================================================================
# LAZY IMPORTS (avoid circular imports at startup)
# =============================================================================
_crawler_module = None
_extractor_module = None
_schema_module = None
def get_crawler():
global _crawler_module
if _crawler_module is None:
from agents import crawler as _crawler_module
return _crawler_module
def get_extractor():
global _extractor_module
if _extractor_module is None:
from agents import extractor as _extractor_module
return _extractor_module
def get_schema():
global _schema_module
if _schema_module is None:
from core import token_schema as _schema_module
return _schema_module
# =============================================================================
# STAGE 1: URL INPUT & PAGE DISCOVERY
# =============================================================================
async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
"""
Discover pages from a website URL.
Returns tuple of (status_message, pages_dataframe, pages_json)
"""
if not url or not url.startswith(("http://", "https://")):
return "❌ Please enter a valid URL starting with http:// or https://", None, None
progress(0, desc="Initializing browser...")
try:
crawler = get_crawler()
discoverer = crawler.PageDiscoverer()
def update_progress(p):
progress(p, desc=f"Discovering pages... ({int(p*100)}%)")
pages = await discoverer.discover(url, progress_callback=update_progress)
# Format for display
pages_data = []
for page in pages:
pages_data.append({
"Select": page.selected,
"URL": page.url,
"Title": page.title or "(No title)",
"Type": page.page_type.value,
"Status": "βœ“" if not page.error else f"⚠ {page.error}",
})
# Store for later use
current_extraction["discovered_pages"] = pages
current_extraction["base_url"] = url
status = f"βœ… Found {len(pages)} pages. Select the pages you want to extract tokens from."
return status, pages_data, [p.model_dump() for p in pages]
except Exception as e:
import traceback
return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None, None
async def start_extraction(pages_selection: list, viewport_choice: str, progress=gr.Progress()) -> tuple:
"""
Start token extraction from selected pages.
Returns tuple of (status, colors_data, typography_data, spacing_data)
"""
if not pages_selection:
return "❌ Please select at least one page", None, None, None
# Get selected URLs
selected_urls = []
for row in pages_selection:
if row.get("Select", False):
selected_urls.append(row["URL"])
if not selected_urls:
return "❌ Please select at least one page using the checkboxes", None, None, None
# Determine viewport
schema = get_schema()
viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
progress(0, desc=f"Starting {viewport.value} extraction...")
try:
extractor_mod = get_extractor()
extractor = extractor_mod.TokenExtractor(viewport=viewport)
def update_progress(p):
progress(p, desc=f"Extracting tokens... ({int(p*100)}%)")
result = await extractor.extract(selected_urls, progress_callback=update_progress)
# Store result
current_extraction[f"{viewport.value}_tokens"] = result
# Format colors for display
colors_data = []
for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
colors_data.append({
"Accept": True,
"Color": color.value,
"Frequency": color.frequency,
"Context": ", ".join(color.contexts[:3]),
"Contrast (White)": f"{color.contrast_white}:1",
"AA Text": "βœ“" if color.wcag_aa_small_text else "βœ—",
"Confidence": color.confidence.value,
})
# Format typography for display
typography_data = []
for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
typography_data.append({
"Accept": True,
"Font": typo.font_family,
"Size": typo.font_size,
"Weight": typo.font_weight,
"Line Height": typo.line_height,
"Elements": ", ".join(typo.elements[:3]),
"Frequency": typo.frequency,
})
# Format spacing for display
spacing_data = []
for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
spacing_data.append({
"Accept": True,
"Value": space.value,
"Frequency": space.frequency,
"Context": ", ".join(space.contexts[:2]),
"Fits 8px": "βœ“" if space.fits_base_8 else "",
"Outlier": "⚠" if space.is_outlier else "",
})
# Summary
status = f"""βœ… Extraction Complete ({viewport.value})
**Summary:**
- Pages crawled: {len(result.pages_crawled)}
- Colors found: {len(result.colors)}
- Typography styles: {len(result.typography)}
- Spacing values: {len(result.spacing)}
- Font families: {len(result.font_families)}
- Detected spacing base: {result.spacing_base or 'Unknown'}px
- Duration: {result.extraction_duration_ms}ms
"""
if result.warnings:
status += f"\n⚠️ Warnings: {len(result.warnings)}"
if result.errors:
status += f"\n❌ Errors: {len(result.errors)}"
return status, colors_data, typography_data, spacing_data
except Exception as e:
import traceback
return f"❌ Extraction failed: {str(e)}\n\n{traceback.format_exc()}", None, None, None
def export_tokens_json():
"""Export current tokens to JSON."""
import json
result = {}
if "desktop_tokens" in current_extraction:
desktop = current_extraction["desktop_tokens"]
result["desktop"] = {
"colors": [c.model_dump() for c in desktop.colors],
"typography": [t.model_dump() for t in desktop.typography],
"spacing": [s.model_dump() for s in desktop.spacing],
"metadata": desktop.summary(),
}
if "mobile_tokens" in current_extraction:
mobile = current_extraction["mobile_tokens"]
result["mobile"] = {
"colors": [c.model_dump() for c in mobile.colors],
"typography": [t.model_dump() for t in mobile.typography],
"spacing": [s.model_dump() for s in mobile.spacing],
"metadata": mobile.summary(),
}
if not result:
return '{"error": "No tokens extracted yet. Please run extraction first."}'
return json.dumps(result, indent=2, default=str)
# =============================================================================
# UI BUILDING
# =============================================================================
def create_ui():
"""Create the Gradio interface."""
with gr.Blocks(
title="Design System Extractor v2",
theme=gr.themes.Soft(),
) as app:
# Header
gr.Markdown("""
# 🎨 Design System Extractor v2
**Reverse-engineer design systems from live websites.**
Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
---
""")
# =================================================================
# CONFIGURATION SECTION
# =================================================================
with gr.Accordion("βš™οΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
gr.Markdown("""
**HuggingFace Token** is required for AI-powered features (Agent 2-4).
Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
*Note: Basic extraction (Agent 1) works without a token.*
""")
with gr.Row():
hf_token_input = gr.Textbox(
label="HuggingFace Token",
placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
type="password",
scale=4,
value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
)
save_token_btn = gr.Button("πŸ’Ύ Save Token", scale=1)
token_status = gr.Markdown(
"βœ… Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
)
save_token_btn.click(
fn=set_hf_token,
inputs=[hf_token_input],
outputs=[token_status],
)
# =================================================================
# STAGE 1: URL Input & Discovery
# =================================================================
with gr.Accordion("πŸ“ Stage 1: Website Discovery", open=True):
gr.Markdown("""
**Step 1:** Enter your website URL and discover pages.
The system will automatically find and classify pages for extraction.
""")
with gr.Row():
url_input = gr.Textbox(
label="Website URL",
placeholder="https://example.com",
scale=4,
)
discover_btn = gr.Button("πŸ” Discover Pages", variant="primary", scale=1)
discovery_status = gr.Markdown("")
pages_table = gr.Dataframe(
headers=["Select", "URL", "Title", "Type", "Status"],
datatype=["bool", "str", "str", "str", "str"],
interactive=True,
label="Discovered Pages",
visible=False,
)
pages_json = gr.JSON(visible=False)
# =================================================================
# STAGE 2: Extraction
# =================================================================
with gr.Accordion("πŸ”¬ Stage 2: Token Extraction", open=False):
gr.Markdown("""
**Step 2:** Select pages and viewport, then extract design tokens.
""")
with gr.Row():
viewport_radio = gr.Radio(
choices=["Desktop (1440px)", "Mobile (375px)"],
value="Desktop (1440px)",
label="Viewport",
)
extract_btn = gr.Button("πŸš€ Extract Tokens", variant="primary")
extraction_status = gr.Markdown("")
with gr.Tabs():
with gr.Tab("🎨 Colors"):
colors_table = gr.Dataframe(
headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
datatype=["bool", "str", "number", "str", "str", "str", "str"],
interactive=True,
label="Extracted Colors",
)
with gr.Tab("πŸ“ Typography"):
typography_table = gr.Dataframe(
headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
datatype=["bool", "str", "str", "number", "str", "str", "number"],
interactive=True,
label="Extracted Typography",
)
with gr.Tab("πŸ“ Spacing"):
spacing_table = gr.Dataframe(
headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
datatype=["bool", "str", "number", "str", "str", "str"],
interactive=True,
label="Extracted Spacing",
)
# =================================================================
# STAGE 3: Export
# =================================================================
with gr.Accordion("πŸ“¦ Stage 3: Export", open=False):
gr.Markdown("""
**Step 3:** Review and export your design tokens.
""")
with gr.Row():
export_btn = gr.Button("πŸ“₯ Export JSON", variant="secondary")
export_output = gr.Code(
label="Exported Tokens (JSON)",
language="json",
lines=20,
)
# =================================================================
# EVENT HANDLERS
# =================================================================
# Discovery
discover_btn.click(
fn=discover_site_pages,
inputs=[url_input],
outputs=[discovery_status, pages_table, pages_json],
).then(
fn=lambda: gr.update(visible=True),
outputs=[pages_table],
)
# Extraction
extract_btn.click(
fn=start_extraction,
inputs=[pages_table, viewport_radio],
outputs=[extraction_status, colors_table, typography_table, spacing_table],
)
# Export
export_btn.click(
fn=export_tokens_json,
outputs=[export_output],
)
# =================================================================
# FOOTER
# =================================================================
gr.Markdown("""
---
**Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
*A semi-automated co-pilot for design system recovery and modernization.*
**Models:** Microsoft Phi (Normalizer) β€’ Meta Llama (Advisor) β€’ Mistral Codestral (Generator)
""")
return app
# =============================================================================
# MAIN
# =============================================================================
if __name__ == "__main__":
app = create_ui()
app.launch(
server_name="0.0.0.0",
server_port=7860,
)