riazmo commited on
Commit
38d9cec
Β·
verified Β·
1 Parent(s): bcbb324

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +482 -0
app.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Design System Extractor v2 β€” Main Application
3
+ ==============================================
4
+
5
+ A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
+ design systems from live websites.
7
+
8
+ Usage:
9
+ python app.py
10
+ """
11
+
12
+ import os
13
+ import asyncio
14
+ import gradio as gr
15
+ from datetime import datetime
16
+
17
+ # Get HF token from environment if available
18
+ HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
+
20
+ # =============================================================================
21
+ # GLOBAL STATE
22
+ # =============================================================================
23
+
24
+ current_extraction: dict = {}
25
+ user_hf_token: str = ""
26
+
27
+
28
+ # =============================================================================
29
+ # HF TOKEN MANAGEMENT
30
+ # =============================================================================
31
+
32
+ def set_hf_token(token: str) -> str:
33
+ """Set the HF token globally."""
34
+ global user_hf_token
35
+
36
+ if not token or len(token) < 10:
37
+ return "❌ Please enter a valid HuggingFace token"
38
+
39
+ user_hf_token = token.strip()
40
+ os.environ["HF_TOKEN"] = user_hf_token
41
+
42
+ return "βœ… Token saved! You can now use the extractor."
43
+
44
+
45
+ # =============================================================================
46
+ # LAZY IMPORTS (avoid circular imports at startup)
47
+ # =============================================================================
48
+
49
+ _crawler_module = None
50
+ _extractor_module = None
51
+ _schema_module = None
52
+
53
+ def get_crawler():
54
+ global _crawler_module
55
+ if _crawler_module is None:
56
+ import agents.crawler
57
+ _crawler_module = agents.crawler
58
+ return _crawler_module
59
+
60
+ def get_extractor():
61
+ global _extractor_module
62
+ if _extractor_module is None:
63
+ import agents.extractor
64
+ _extractor_module = agents.extractor
65
+ return _extractor_module
66
+
67
+ def get_schema():
68
+ global _schema_module
69
+ if _schema_module is None:
70
+ import core.token_schema
71
+ _schema_module = core.token_schema
72
+ return _schema_module
73
+
74
+
75
+ # =============================================================================
76
+ # STAGE 1: URL INPUT & PAGE DISCOVERY
77
+ # =============================================================================
78
+
79
+ async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
80
+ """
81
+ Discover pages from a website URL.
82
+
83
+ Returns tuple of (status_message, pages_dataframe, pages_json)
84
+ """
85
+ if not url or not url.startswith(("http://", "https://")):
86
+ return "❌ Please enter a valid URL starting with http:// or https://", None, None
87
+
88
+ progress(0, desc="πŸš€ Initializing browser...")
89
+
90
+ try:
91
+ crawler = get_crawler()
92
+ discoverer = crawler.PageDiscoverer()
93
+
94
+ def update_progress(p):
95
+ progress(p, desc=f"πŸ” Discovering pages... ({int(p*100)}%)")
96
+
97
+ pages = await discoverer.discover(url, progress_callback=update_progress)
98
+
99
+ progress(1.0, desc="βœ… Discovery complete!")
100
+
101
+ # Format for display - ensure we return simple values, not objects
102
+ pages_data = []
103
+ for page in pages:
104
+ pages_data.append([
105
+ page.selected, # Select (bool)
106
+ page.url, # URL (str)
107
+ page.title if page.title else "(No title)", # Title (str)
108
+ page.page_type.value, # Type (str)
109
+ "βœ“" if not page.error else f"⚠ {page.error}" # Status (str)
110
+ ])
111
+
112
+ # Store for later use
113
+ current_extraction["discovered_pages"] = pages
114
+ current_extraction["base_url"] = url
115
+
116
+ status = f"βœ… Found {len(pages)} pages. Select the pages you want to extract tokens from."
117
+
118
+ return status, pages_data
119
+
120
+ except Exception as e:
121
+ import traceback
122
+ return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
123
+
124
+
125
+ async def start_extraction(pages_selection, viewport_choice: str, progress=gr.Progress()) -> tuple:
126
+ """
127
+ Start token extraction from selected pages.
128
+
129
+ Returns tuple of (status, colors_data, typography_data, spacing_data)
130
+ """
131
+ if pages_selection is None or len(pages_selection) == 0:
132
+ return "❌ Please discover pages first", None, None, None
133
+
134
+ progress(0, desc="πŸ”„ Preparing extraction...")
135
+
136
+ # Get selected URLs from the dataframe
137
+ selected_urls = []
138
+
139
+ # Handle both list of lists and list of dicts formats
140
+ for row in pages_selection:
141
+ if isinstance(row, (list, tuple)):
142
+ # Format: [Select, URL, Title, Type, Status]
143
+ if len(row) >= 2 and row[0]: # row[0] is Select checkbox
144
+ selected_urls.append(row[1]) # row[1] is URL
145
+ elif isinstance(row, dict):
146
+ if row.get("Select", False):
147
+ selected_urls.append(row.get("URL", ""))
148
+
149
+ if not selected_urls:
150
+ return "❌ Please select at least one page using the checkboxes", None, None, None
151
+
152
+ progress(0.05, desc=f"πŸ“‹ Selected {len(selected_urls)} pages for extraction...")
153
+
154
+ # Determine viewport
155
+ schema = get_schema()
156
+ viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
157
+
158
+ try:
159
+ extractor_mod = get_extractor()
160
+ extractor = extractor_mod.TokenExtractor(viewport=viewport)
161
+
162
+ def update_progress(p):
163
+ # Scale progress from 0.1 to 0.9
164
+ scaled = 0.1 + (p * 0.8)
165
+ progress(scaled, desc=f"πŸ”¬ Extracting tokens... ({int(p*100)}%)")
166
+
167
+ progress(0.1, desc=f"🌐 Starting {viewport.value} extraction...")
168
+
169
+ result = await extractor.extract(selected_urls, progress_callback=update_progress)
170
+
171
+ progress(0.9, desc="πŸ“Š Processing results...")
172
+
173
+ # Store result
174
+ current_extraction[f"{viewport.value}_tokens"] = result
175
+
176
+ # Format colors for display - use list of lists for Gradio
177
+ colors_data = []
178
+ for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
179
+ colors_data.append([
180
+ True, # Accept
181
+ color.value, # Color
182
+ color.frequency, # Frequency
183
+ ", ".join(color.contexts[:3]) if color.contexts else "", # Context
184
+ f"{color.contrast_white:.1f}:1", # Contrast
185
+ "βœ“" if color.wcag_aa_small_text else "βœ—", # AA Text
186
+ color.confidence.value if color.confidence else "low" # Confidence
187
+ ])
188
+
189
+ progress(0.93, desc="πŸ“ Processing typography...")
190
+
191
+ # Format typography for display
192
+ typography_data = []
193
+ for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
194
+ typography_data.append([
195
+ True, # Accept
196
+ typo.font_family, # Font
197
+ typo.font_size, # Size
198
+ typo.font_weight, # Weight
199
+ typo.line_height if typo.line_height else "", # Line Height
200
+ ", ".join(typo.elements[:3]) if typo.elements else "", # Elements
201
+ typo.frequency # Frequency
202
+ ])
203
+
204
+ progress(0.96, desc="πŸ“ Processing spacing...")
205
+
206
+ # Format spacing for display
207
+ spacing_data = []
208
+ for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
209
+ spacing_data.append([
210
+ True, # Accept
211
+ space.value, # Value
212
+ space.frequency, # Frequency
213
+ ", ".join(space.contexts[:2]) if space.contexts else "", # Context
214
+ "βœ“" if space.fits_base_8 else "", # Fits 8px
215
+ "⚠" if space.is_outlier else "" # Outlier
216
+ ])
217
+
218
+ progress(1.0, desc="βœ… Extraction complete!")
219
+
220
+ # Summary
221
+ status = f"""βœ… **Extraction Complete** ({viewport.value})
222
+
223
+ ### πŸ“Š Summary
224
+ | Metric | Value |
225
+ |--------|-------|
226
+ | Pages crawled | {len(result.pages_crawled)} |
227
+ | Colors found | {len(result.colors)} |
228
+ | Typography styles | {len(result.typography)} |
229
+ | Spacing values | {len(result.spacing)} |
230
+ | Font families | {len(result.font_families)} |
231
+ | Spacing base | {result.spacing_base or 'Unknown'}px |
232
+ | Duration | {result.extraction_duration_ms}ms |
233
+ """
234
+
235
+ if result.warnings:
236
+ status += f"\n⚠️ **Warnings:** {len(result.warnings)}"
237
+ if result.errors:
238
+ status += f"\n❌ **Errors:** {len(result.errors)}"
239
+ for err in result.errors[:3]:
240
+ status += f"\n- {err}"
241
+
242
+ return status, colors_data, typography_data, spacing_data
243
+
244
+ except Exception as e:
245
+ import traceback
246
+ return f"❌ Extraction failed: {str(e)}\n\n```\n{traceback.format_exc()}\n```", None, None, None
247
+
248
+
249
+ def export_tokens_json():
250
+ """Export current tokens to JSON."""
251
+ import json
252
+
253
+ result = {}
254
+
255
+ if "desktop_tokens" in current_extraction:
256
+ desktop = current_extraction["desktop_tokens"]
257
+ result["desktop"] = {
258
+ "colors": [c.model_dump() for c in desktop.colors],
259
+ "typography": [t.model_dump() for t in desktop.typography],
260
+ "spacing": [s.model_dump() for s in desktop.spacing],
261
+ "metadata": desktop.summary(),
262
+ }
263
+
264
+ if "mobile_tokens" in current_extraction:
265
+ mobile = current_extraction["mobile_tokens"]
266
+ result["mobile"] = {
267
+ "colors": [c.model_dump() for c in mobile.colors],
268
+ "typography": [t.model_dump() for t in mobile.typography],
269
+ "spacing": [s.model_dump() for s in mobile.spacing],
270
+ "metadata": mobile.summary(),
271
+ }
272
+
273
+ if not result:
274
+ return '{"error": "No tokens extracted yet. Please run extraction first."}'
275
+
276
+ return json.dumps(result, indent=2, default=str)
277
+
278
+
279
+ # =============================================================================
280
+ # UI BUILDING
281
+ # =============================================================================
282
+
283
+ def create_ui():
284
+ """Create the Gradio interface."""
285
+
286
+ with gr.Blocks(
287
+ title="Design System Extractor v2",
288
+ theme=gr.themes.Soft(),
289
+ ) as app:
290
+
291
+ # Header
292
+ gr.Markdown("""
293
+ # 🎨 Design System Extractor v2
294
+
295
+ **Reverse-engineer design systems from live websites.**
296
+
297
+ Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
298
+
299
+ ---
300
+ """)
301
+
302
+ # =================================================================
303
+ # CONFIGURATION SECTION
304
+ # =================================================================
305
+
306
+ with gr.Accordion("βš™οΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
307
+
308
+ gr.Markdown("""
309
+ **HuggingFace Token** is required for AI-powered features (Agent 2-4).
310
+ Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
311
+
312
+ *Note: Basic extraction (Agent 1) works without a token.*
313
+ """)
314
+
315
+ with gr.Row():
316
+ hf_token_input = gr.Textbox(
317
+ label="HuggingFace Token",
318
+ placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
319
+ type="password",
320
+ scale=4,
321
+ value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
322
+ )
323
+ save_token_btn = gr.Button("πŸ’Ύ Save Token", scale=1)
324
+
325
+ token_status = gr.Markdown(
326
+ "βœ… Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
327
+ )
328
+
329
+ save_token_btn.click(
330
+ fn=set_hf_token,
331
+ inputs=[hf_token_input],
332
+ outputs=[token_status],
333
+ )
334
+
335
+ # =================================================================
336
+ # STAGE 1: URL Input & Discovery
337
+ # =================================================================
338
+
339
+ with gr.Accordion("πŸ“ Stage 1: Website Discovery", open=True):
340
+
341
+ gr.Markdown("""
342
+ **Step 1:** Enter your website URL and discover pages.
343
+ The system will automatically find and classify pages for extraction.
344
+ """)
345
+
346
+ with gr.Row():
347
+ url_input = gr.Textbox(
348
+ label="Website URL",
349
+ placeholder="https://example.com",
350
+ scale=4,
351
+ )
352
+ discover_btn = gr.Button("πŸ” Discover Pages", variant="primary", scale=1)
353
+
354
+ discovery_status = gr.Markdown("")
355
+
356
+ pages_table = gr.Dataframe(
357
+ headers=["Select", "URL", "Title", "Type", "Status"],
358
+ datatype=["bool", "str", "str", "str", "str"],
359
+ interactive=True,
360
+ label="Discovered Pages",
361
+ visible=False,
362
+ col_count=(5, "fixed"),
363
+ )
364
+
365
+ # =================================================================
366
+ # STAGE 2: Extraction
367
+ # =================================================================
368
+
369
+ with gr.Accordion("πŸ”¬ Stage 2: Token Extraction", open=False):
370
+
371
+ gr.Markdown("""
372
+ **Step 2:** Select pages and viewport, then extract design tokens.
373
+ """)
374
+
375
+ with gr.Row():
376
+ viewport_radio = gr.Radio(
377
+ choices=["Desktop (1440px)", "Mobile (375px)"],
378
+ value="Desktop (1440px)",
379
+ label="Viewport",
380
+ )
381
+ extract_btn = gr.Button("πŸš€ Extract Tokens", variant="primary")
382
+
383
+ extraction_status = gr.Markdown("")
384
+
385
+ with gr.Tabs():
386
+ with gr.Tab("🎨 Colors"):
387
+ colors_table = gr.Dataframe(
388
+ headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
389
+ datatype=["bool", "str", "number", "str", "str", "str", "str"],
390
+ interactive=True,
391
+ label="Extracted Colors",
392
+ )
393
+
394
+ with gr.Tab("πŸ“ Typography"):
395
+ typography_table = gr.Dataframe(
396
+ headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
397
+ datatype=["bool", "str", "str", "number", "str", "str", "number"],
398
+ interactive=True,
399
+ label="Extracted Typography",
400
+ )
401
+
402
+ with gr.Tab("πŸ“ Spacing"):
403
+ spacing_table = gr.Dataframe(
404
+ headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
405
+ datatype=["bool", "str", "number", "str", "str", "str"],
406
+ interactive=True,
407
+ label="Extracted Spacing",
408
+ )
409
+
410
+ # =================================================================
411
+ # STAGE 3: Export
412
+ # =================================================================
413
+
414
+ with gr.Accordion("πŸ“¦ Stage 3: Export", open=False):
415
+
416
+ gr.Markdown("""
417
+ **Step 3:** Review and export your design tokens.
418
+ """)
419
+
420
+ with gr.Row():
421
+ export_btn = gr.Button("πŸ“₯ Export JSON", variant="secondary")
422
+
423
+ export_output = gr.Code(
424
+ label="Exported Tokens (JSON)",
425
+ language="json",
426
+ lines=20,
427
+ )
428
+
429
+ # =================================================================
430
+ # EVENT HANDLERS
431
+ # =================================================================
432
+
433
+ # Discovery
434
+ discover_btn.click(
435
+ fn=discover_site_pages,
436
+ inputs=[url_input],
437
+ outputs=[discovery_status, pages_table],
438
+ ).then(
439
+ fn=lambda: gr.update(visible=True),
440
+ outputs=[pages_table],
441
+ )
442
+
443
+ # Extraction
444
+ extract_btn.click(
445
+ fn=start_extraction,
446
+ inputs=[pages_table, viewport_radio],
447
+ outputs=[extraction_status, colors_table, typography_table, spacing_table],
448
+ )
449
+
450
+ # Export
451
+ export_btn.click(
452
+ fn=export_tokens_json,
453
+ outputs=[export_output],
454
+ )
455
+
456
+ # =================================================================
457
+ # FOOTER
458
+ # =================================================================
459
+
460
+ gr.Markdown("""
461
+ ---
462
+
463
+ **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
464
+
465
+ *A semi-automated co-pilot for design system recovery and modernization.*
466
+
467
+ **Models:** Microsoft Phi (Normalizer) β€’ Meta Llama (Advisor) β€’ Mistral Codestral (Generator)
468
+ """)
469
+
470
+ return app
471
+
472
+
473
+ # =============================================================================
474
+ # MAIN
475
+ # =============================================================================
476
+
477
+ if __name__ == "__main__":
478
+ app = create_ui()
479
+ app.launch(
480
+ server_name="0.0.0.0",
481
+ server_port=7860,
482
+ )