riazmo commited on
Commit
48de618
Β·
verified Β·
1 Parent(s): a0256ad

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +479 -0
app.py ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Design System Extractor v2 β€” Main Application
3
+ ==============================================
4
+
5
+ A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
+ design systems from live websites.
7
+
8
+ Usage:
9
+ python app.py
10
+ """
11
+
12
+ import os
13
+ import asyncio
14
+ import gradio as gr
15
+ from datetime import datetime
16
+
17
+ # Get HF token from environment if available
18
+ HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
+
20
+ # =============================================================================
21
+ # GLOBAL STATE
22
+ # =============================================================================
23
+
24
+ current_extraction: dict = {}
25
+ user_hf_token: str = ""
26
+
27
+
28
+ # =============================================================================
29
+ # HF TOKEN MANAGEMENT
30
+ # =============================================================================
31
+
32
+ def set_hf_token(token: str) -> str:
33
+ """Set the HF token globally."""
34
+ global user_hf_token
35
+
36
+ if not token or len(token) < 10:
37
+ return "❌ Please enter a valid HuggingFace token"
38
+
39
+ user_hf_token = token.strip()
40
+ os.environ["HF_TOKEN"] = user_hf_token
41
+
42
+ return "βœ… Token saved! You can now use the extractor."
43
+
44
+
45
+ # =============================================================================
46
+ # LAZY IMPORTS (avoid circular imports at startup)
47
+ # =============================================================================
48
+
49
+ _crawler_module = None
50
+ _extractor_module = None
51
+ _schema_module = None
52
+
53
+ def get_crawler():
54
+ global _crawler_module
55
+ if _crawler_module is None:
56
+ from agents import crawler as _crawler_module
57
+ return _crawler_module
58
+
59
+ def get_extractor():
60
+ global _extractor_module
61
+ if _extractor_module is None:
62
+ from agents import extractor as _extractor_module
63
+ return _extractor_module
64
+
65
+ def get_schema():
66
+ global _schema_module
67
+ if _schema_module is None:
68
+ from core import token_schema as _schema_module
69
+ return _schema_module
70
+
71
+
72
+ # =============================================================================
73
+ # STAGE 1: URL INPUT & PAGE DISCOVERY
74
+ # =============================================================================
75
+
76
+ async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
77
+ """
78
+ Discover pages from a website URL.
79
+
80
+ Returns tuple of (status_message, pages_dataframe, pages_json)
81
+ """
82
+ if not url or not url.startswith(("http://", "https://")):
83
+ return "❌ Please enter a valid URL starting with http:// or https://", None, None
84
+
85
+ progress(0, desc="πŸš€ Initializing browser...")
86
+
87
+ try:
88
+ crawler = get_crawler()
89
+ discoverer = crawler.PageDiscoverer()
90
+
91
+ def update_progress(p):
92
+ progress(p, desc=f"πŸ” Discovering pages... ({int(p*100)}%)")
93
+
94
+ pages = await discoverer.discover(url, progress_callback=update_progress)
95
+
96
+ progress(1.0, desc="βœ… Discovery complete!")
97
+
98
+ # Format for display - ensure we return simple values, not objects
99
+ pages_data = []
100
+ for page in pages:
101
+ pages_data.append([
102
+ page.selected, # Select (bool)
103
+ page.url, # URL (str)
104
+ page.title if page.title else "(No title)", # Title (str)
105
+ page.page_type.value, # Type (str)
106
+ "βœ“" if not page.error else f"⚠ {page.error}" # Status (str)
107
+ ])
108
+
109
+ # Store for later use
110
+ current_extraction["discovered_pages"] = pages
111
+ current_extraction["base_url"] = url
112
+
113
+ status = f"βœ… Found {len(pages)} pages. Select the pages you want to extract tokens from."
114
+
115
+ return status, pages_data
116
+
117
+ except Exception as e:
118
+ import traceback
119
+ return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
120
+
121
+
122
+ async def start_extraction(pages_selection, viewport_choice: str, progress=gr.Progress()) -> tuple:
123
+ """
124
+ Start token extraction from selected pages.
125
+
126
+ Returns tuple of (status, colors_data, typography_data, spacing_data)
127
+ """
128
+ if pages_selection is None or len(pages_selection) == 0:
129
+ return "❌ Please discover pages first", None, None, None
130
+
131
+ progress(0, desc="πŸ”„ Preparing extraction...")
132
+
133
+ # Get selected URLs from the dataframe
134
+ selected_urls = []
135
+
136
+ # Handle both list of lists and list of dicts formats
137
+ for row in pages_selection:
138
+ if isinstance(row, (list, tuple)):
139
+ # Format: [Select, URL, Title, Type, Status]
140
+ if len(row) >= 2 and row[0]: # row[0] is Select checkbox
141
+ selected_urls.append(row[1]) # row[1] is URL
142
+ elif isinstance(row, dict):
143
+ if row.get("Select", False):
144
+ selected_urls.append(row.get("URL", ""))
145
+
146
+ if not selected_urls:
147
+ return "❌ Please select at least one page using the checkboxes", None, None, None
148
+
149
+ progress(0.05, desc=f"πŸ“‹ Selected {len(selected_urls)} pages for extraction...")
150
+
151
+ # Determine viewport
152
+ schema = get_schema()
153
+ viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
154
+
155
+ try:
156
+ extractor_mod = get_extractor()
157
+ extractor = extractor_mod.TokenExtractor(viewport=viewport)
158
+
159
+ def update_progress(p):
160
+ # Scale progress from 0.1 to 0.9
161
+ scaled = 0.1 + (p * 0.8)
162
+ progress(scaled, desc=f"πŸ”¬ Extracting tokens... ({int(p*100)}%)")
163
+
164
+ progress(0.1, desc=f"🌐 Starting {viewport.value} extraction...")
165
+
166
+ result = await extractor.extract(selected_urls, progress_callback=update_progress)
167
+
168
+ progress(0.9, desc="πŸ“Š Processing results...")
169
+
170
+ # Store result
171
+ current_extraction[f"{viewport.value}_tokens"] = result
172
+
173
+ # Format colors for display - use list of lists for Gradio
174
+ colors_data = []
175
+ for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
176
+ colors_data.append([
177
+ True, # Accept
178
+ color.value, # Color
179
+ color.frequency, # Frequency
180
+ ", ".join(color.contexts[:3]) if color.contexts else "", # Context
181
+ f"{color.contrast_white:.1f}:1", # Contrast
182
+ "βœ“" if color.wcag_aa_small_text else "βœ—", # AA Text
183
+ color.confidence.value if color.confidence else "low" # Confidence
184
+ ])
185
+
186
+ progress(0.93, desc="πŸ“ Processing typography...")
187
+
188
+ # Format typography for display
189
+ typography_data = []
190
+ for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
191
+ typography_data.append([
192
+ True, # Accept
193
+ typo.font_family, # Font
194
+ typo.font_size, # Size
195
+ typo.font_weight, # Weight
196
+ typo.line_height if typo.line_height else "", # Line Height
197
+ ", ".join(typo.elements[:3]) if typo.elements else "", # Elements
198
+ typo.frequency # Frequency
199
+ ])
200
+
201
+ progress(0.96, desc="πŸ“ Processing spacing...")
202
+
203
+ # Format spacing for display
204
+ spacing_data = []
205
+ for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
206
+ spacing_data.append([
207
+ True, # Accept
208
+ space.value, # Value
209
+ space.frequency, # Frequency
210
+ ", ".join(space.contexts[:2]) if space.contexts else "", # Context
211
+ "βœ“" if space.fits_base_8 else "", # Fits 8px
212
+ "⚠" if space.is_outlier else "" # Outlier
213
+ ])
214
+
215
+ progress(1.0, desc="βœ… Extraction complete!")
216
+
217
+ # Summary
218
+ status = f"""βœ… **Extraction Complete** ({viewport.value})
219
+
220
+ ### πŸ“Š Summary
221
+ | Metric | Value |
222
+ |--------|-------|
223
+ | Pages crawled | {len(result.pages_crawled)} |
224
+ | Colors found | {len(result.colors)} |
225
+ | Typography styles | {len(result.typography)} |
226
+ | Spacing values | {len(result.spacing)} |
227
+ | Font families | {len(result.font_families)} |
228
+ | Spacing base | {result.spacing_base or 'Unknown'}px |
229
+ | Duration | {result.extraction_duration_ms}ms |
230
+ """
231
+
232
+ if result.warnings:
233
+ status += f"\n⚠️ **Warnings:** {len(result.warnings)}"
234
+ if result.errors:
235
+ status += f"\n❌ **Errors:** {len(result.errors)}"
236
+ for err in result.errors[:3]:
237
+ status += f"\n- {err}"
238
+
239
+ return status, colors_data, typography_data, spacing_data
240
+
241
+ except Exception as e:
242
+ import traceback
243
+ return f"❌ Extraction failed: {str(e)}\n\n```\n{traceback.format_exc()}\n```", None, None, None
244
+
245
+
246
+ def export_tokens_json():
247
+ """Export current tokens to JSON."""
248
+ import json
249
+
250
+ result = {}
251
+
252
+ if "desktop_tokens" in current_extraction:
253
+ desktop = current_extraction["desktop_tokens"]
254
+ result["desktop"] = {
255
+ "colors": [c.model_dump() for c in desktop.colors],
256
+ "typography": [t.model_dump() for t in desktop.typography],
257
+ "spacing": [s.model_dump() for s in desktop.spacing],
258
+ "metadata": desktop.summary(),
259
+ }
260
+
261
+ if "mobile_tokens" in current_extraction:
262
+ mobile = current_extraction["mobile_tokens"]
263
+ result["mobile"] = {
264
+ "colors": [c.model_dump() for c in mobile.colors],
265
+ "typography": [t.model_dump() for t in mobile.typography],
266
+ "spacing": [s.model_dump() for s in mobile.spacing],
267
+ "metadata": mobile.summary(),
268
+ }
269
+
270
+ if not result:
271
+ return '{"error": "No tokens extracted yet. Please run extraction first."}'
272
+
273
+ return json.dumps(result, indent=2, default=str)
274
+
275
+
276
+ # =============================================================================
277
+ # UI BUILDING
278
+ # =============================================================================
279
+
280
+ def create_ui():
281
+ """Create the Gradio interface."""
282
+
283
+ with gr.Blocks(
284
+ title="Design System Extractor v2",
285
+ theme=gr.themes.Soft(),
286
+ ) as app:
287
+
288
+ # Header
289
+ gr.Markdown("""
290
+ # 🎨 Design System Extractor v2
291
+
292
+ **Reverse-engineer design systems from live websites.**
293
+
294
+ Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
295
+
296
+ ---
297
+ """)
298
+
299
+ # =================================================================
300
+ # CONFIGURATION SECTION
301
+ # =================================================================
302
+
303
+ with gr.Accordion("βš™οΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
304
+
305
+ gr.Markdown("""
306
+ **HuggingFace Token** is required for AI-powered features (Agent 2-4).
307
+ Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
308
+
309
+ *Note: Basic extraction (Agent 1) works without a token.*
310
+ """)
311
+
312
+ with gr.Row():
313
+ hf_token_input = gr.Textbox(
314
+ label="HuggingFace Token",
315
+ placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
316
+ type="password",
317
+ scale=4,
318
+ value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
319
+ )
320
+ save_token_btn = gr.Button("πŸ’Ύ Save Token", scale=1)
321
+
322
+ token_status = gr.Markdown(
323
+ "βœ… Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
324
+ )
325
+
326
+ save_token_btn.click(
327
+ fn=set_hf_token,
328
+ inputs=[hf_token_input],
329
+ outputs=[token_status],
330
+ )
331
+
332
+ # =================================================================
333
+ # STAGE 1: URL Input & Discovery
334
+ # =================================================================
335
+
336
+ with gr.Accordion("πŸ“ Stage 1: Website Discovery", open=True):
337
+
338
+ gr.Markdown("""
339
+ **Step 1:** Enter your website URL and discover pages.
340
+ The system will automatically find and classify pages for extraction.
341
+ """)
342
+
343
+ with gr.Row():
344
+ url_input = gr.Textbox(
345
+ label="Website URL",
346
+ placeholder="https://example.com",
347
+ scale=4,
348
+ )
349
+ discover_btn = gr.Button("πŸ” Discover Pages", variant="primary", scale=1)
350
+
351
+ discovery_status = gr.Markdown("")
352
+
353
+ pages_table = gr.Dataframe(
354
+ headers=["Select", "URL", "Title", "Type", "Status"],
355
+ datatype=["bool", "str", "str", "str", "str"],
356
+ interactive=True,
357
+ label="Discovered Pages",
358
+ visible=False,
359
+ col_count=(5, "fixed"),
360
+ )
361
+
362
+ # =================================================================
363
+ # STAGE 2: Extraction
364
+ # =================================================================
365
+
366
+ with gr.Accordion("πŸ”¬ Stage 2: Token Extraction", open=False):
367
+
368
+ gr.Markdown("""
369
+ **Step 2:** Select pages and viewport, then extract design tokens.
370
+ """)
371
+
372
+ with gr.Row():
373
+ viewport_radio = gr.Radio(
374
+ choices=["Desktop (1440px)", "Mobile (375px)"],
375
+ value="Desktop (1440px)",
376
+ label="Viewport",
377
+ )
378
+ extract_btn = gr.Button("πŸš€ Extract Tokens", variant="primary")
379
+
380
+ extraction_status = gr.Markdown("")
381
+
382
+ with gr.Tabs():
383
+ with gr.Tab("🎨 Colors"):
384
+ colors_table = gr.Dataframe(
385
+ headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
386
+ datatype=["bool", "str", "number", "str", "str", "str", "str"],
387
+ interactive=True,
388
+ label="Extracted Colors",
389
+ )
390
+
391
+ with gr.Tab("πŸ“ Typography"):
392
+ typography_table = gr.Dataframe(
393
+ headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
394
+ datatype=["bool", "str", "str", "number", "str", "str", "number"],
395
+ interactive=True,
396
+ label="Extracted Typography",
397
+ )
398
+
399
+ with gr.Tab("πŸ“ Spacing"):
400
+ spacing_table = gr.Dataframe(
401
+ headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
402
+ datatype=["bool", "str", "number", "str", "str", "str"],
403
+ interactive=True,
404
+ label="Extracted Spacing",
405
+ )
406
+
407
+ # =================================================================
408
+ # STAGE 3: Export
409
+ # =================================================================
410
+
411
+ with gr.Accordion("πŸ“¦ Stage 3: Export", open=False):
412
+
413
+ gr.Markdown("""
414
+ **Step 3:** Review and export your design tokens.
415
+ """)
416
+
417
+ with gr.Row():
418
+ export_btn = gr.Button("πŸ“₯ Export JSON", variant="secondary")
419
+
420
+ export_output = gr.Code(
421
+ label="Exported Tokens (JSON)",
422
+ language="json",
423
+ lines=20,
424
+ )
425
+
426
+ # =================================================================
427
+ # EVENT HANDLERS
428
+ # =================================================================
429
+
430
+ # Discovery
431
+ discover_btn.click(
432
+ fn=discover_site_pages,
433
+ inputs=[url_input],
434
+ outputs=[discovery_status, pages_table],
435
+ ).then(
436
+ fn=lambda: gr.update(visible=True),
437
+ outputs=[pages_table],
438
+ )
439
+
440
+ # Extraction
441
+ extract_btn.click(
442
+ fn=start_extraction,
443
+ inputs=[pages_table, viewport_radio],
444
+ outputs=[extraction_status, colors_table, typography_table, spacing_table],
445
+ )
446
+
447
+ # Export
448
+ export_btn.click(
449
+ fn=export_tokens_json,
450
+ outputs=[export_output],
451
+ )
452
+
453
+ # =================================================================
454
+ # FOOTER
455
+ # =================================================================
456
+
457
+ gr.Markdown("""
458
+ ---
459
+
460
+ **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
461
+
462
+ *A semi-automated co-pilot for design system recovery and modernization.*
463
+
464
+ **Models:** Microsoft Phi (Normalizer) β€’ Meta Llama (Advisor) β€’ Mistral Codestral (Generator)
465
+ """)
466
+
467
+ return app
468
+
469
+
470
+ # =============================================================================
471
+ # MAIN
472
+ # =============================================================================
473
+
474
+ if __name__ == "__main__":
475
+ app = create_ui()
476
+ app.launch(
477
+ server_name="0.0.0.0",
478
+ server_port=7860,
479
+ )