riazmo commited on
Commit
ad4e018
Β·
verified Β·
1 Parent(s): 17a7a81

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +453 -0
app.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Design System Extractor v2 β€” Main Application
3
+ ==============================================
4
+
5
+ A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
+ design systems from live websites.
7
+
8
+ Usage:
9
+ python app.py
10
+ """
11
+
12
+ import os
13
+ import asyncio
14
+ import gradio as gr
15
+ from datetime import datetime
16
+
17
+ # Get HF token from environment if available
18
+ HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
+
20
+ # =============================================================================
21
+ # GLOBAL STATE
22
+ # =============================================================================
23
+
24
+ current_extraction: dict = {}
25
+ user_hf_token: str = ""
26
+
27
+
28
+ # =============================================================================
29
+ # HF TOKEN MANAGEMENT
30
+ # =============================================================================
31
+
32
+ def set_hf_token(token: str) -> str:
33
+ """Set the HF token globally."""
34
+ global user_hf_token
35
+
36
+ if not token or len(token) < 10:
37
+ return "❌ Please enter a valid HuggingFace token"
38
+
39
+ user_hf_token = token.strip()
40
+ os.environ["HF_TOKEN"] = user_hf_token
41
+
42
+ return "βœ… Token saved! You can now use the extractor."
43
+
44
+
45
+ # =============================================================================
46
+ # LAZY IMPORTS (avoid circular imports at startup)
47
+ # =============================================================================
48
+
49
+ _crawler_module = None
50
+ _extractor_module = None
51
+ _schema_module = None
52
+
53
+ def get_crawler():
54
+ global _crawler_module
55
+ if _crawler_module is None:
56
+ from agents import crawler as _crawler_module
57
+ return _crawler_module
58
+
59
+ def get_extractor():
60
+ global _extractor_module
61
+ if _extractor_module is None:
62
+ from agents import extractor as _extractor_module
63
+ return _extractor_module
64
+
65
+ def get_schema():
66
+ global _schema_module
67
+ if _schema_module is None:
68
+ from core import token_schema as _schema_module
69
+ return _schema_module
70
+
71
+
72
+ # =============================================================================
73
+ # STAGE 1: URL INPUT & PAGE DISCOVERY
74
+ # =============================================================================
75
+
76
+ async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
77
+ """
78
+ Discover pages from a website URL.
79
+
80
+ Returns tuple of (status_message, pages_dataframe, pages_json)
81
+ """
82
+ if not url or not url.startswith(("http://", "https://")):
83
+ return "❌ Please enter a valid URL starting with http:// or https://", None, None
84
+
85
+ progress(0, desc="Initializing browser...")
86
+
87
+ try:
88
+ crawler = get_crawler()
89
+ discoverer = crawler.PageDiscoverer()
90
+
91
+ def update_progress(p):
92
+ progress(p, desc=f"Discovering pages... ({int(p*100)}%)")
93
+
94
+ pages = await discoverer.discover(url, progress_callback=update_progress)
95
+
96
+ # Format for display
97
+ pages_data = []
98
+ for page in pages:
99
+ pages_data.append({
100
+ "Select": page.selected,
101
+ "URL": page.url,
102
+ "Title": page.title or "(No title)",
103
+ "Type": page.page_type.value,
104
+ "Status": "βœ“" if not page.error else f"⚠ {page.error}",
105
+ })
106
+
107
+ # Store for later use
108
+ current_extraction["discovered_pages"] = pages
109
+ current_extraction["base_url"] = url
110
+
111
+ status = f"βœ… Found {len(pages)} pages. Select the pages you want to extract tokens from."
112
+
113
+ return status, pages_data, [p.model_dump() for p in pages]
114
+
115
+ except Exception as e:
116
+ import traceback
117
+ return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None, None
118
+
119
+
120
+ async def start_extraction(pages_selection: list, viewport_choice: str, progress=gr.Progress()) -> tuple:
121
+ """
122
+ Start token extraction from selected pages.
123
+
124
+ Returns tuple of (status, colors_data, typography_data, spacing_data)
125
+ """
126
+ if not pages_selection:
127
+ return "❌ Please select at least one page", None, None, None
128
+
129
+ # Get selected URLs
130
+ selected_urls = []
131
+ for row in pages_selection:
132
+ if row.get("Select", False):
133
+ selected_urls.append(row["URL"])
134
+
135
+ if not selected_urls:
136
+ return "❌ Please select at least one page using the checkboxes", None, None, None
137
+
138
+ # Determine viewport
139
+ schema = get_schema()
140
+ viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
141
+
142
+ progress(0, desc=f"Starting {viewport.value} extraction...")
143
+
144
+ try:
145
+ extractor_mod = get_extractor()
146
+ extractor = extractor_mod.TokenExtractor(viewport=viewport)
147
+
148
+ def update_progress(p):
149
+ progress(p, desc=f"Extracting tokens... ({int(p*100)}%)")
150
+
151
+ result = await extractor.extract(selected_urls, progress_callback=update_progress)
152
+
153
+ # Store result
154
+ current_extraction[f"{viewport.value}_tokens"] = result
155
+
156
+ # Format colors for display
157
+ colors_data = []
158
+ for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
159
+ colors_data.append({
160
+ "Accept": True,
161
+ "Color": color.value,
162
+ "Frequency": color.frequency,
163
+ "Context": ", ".join(color.contexts[:3]),
164
+ "Contrast (White)": f"{color.contrast_white}:1",
165
+ "AA Text": "βœ“" if color.wcag_aa_small_text else "βœ—",
166
+ "Confidence": color.confidence.value,
167
+ })
168
+
169
+ # Format typography for display
170
+ typography_data = []
171
+ for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
172
+ typography_data.append({
173
+ "Accept": True,
174
+ "Font": typo.font_family,
175
+ "Size": typo.font_size,
176
+ "Weight": typo.font_weight,
177
+ "Line Height": typo.line_height,
178
+ "Elements": ", ".join(typo.elements[:3]),
179
+ "Frequency": typo.frequency,
180
+ })
181
+
182
+ # Format spacing for display
183
+ spacing_data = []
184
+ for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
185
+ spacing_data.append({
186
+ "Accept": True,
187
+ "Value": space.value,
188
+ "Frequency": space.frequency,
189
+ "Context": ", ".join(space.contexts[:2]),
190
+ "Fits 8px": "βœ“" if space.fits_base_8 else "",
191
+ "Outlier": "⚠" if space.is_outlier else "",
192
+ })
193
+
194
+ # Summary
195
+ status = f"""βœ… Extraction Complete ({viewport.value})
196
+
197
+ **Summary:**
198
+ - Pages crawled: {len(result.pages_crawled)}
199
+ - Colors found: {len(result.colors)}
200
+ - Typography styles: {len(result.typography)}
201
+ - Spacing values: {len(result.spacing)}
202
+ - Font families: {len(result.font_families)}
203
+ - Detected spacing base: {result.spacing_base or 'Unknown'}px
204
+ - Duration: {result.extraction_duration_ms}ms
205
+ """
206
+
207
+ if result.warnings:
208
+ status += f"\n⚠️ Warnings: {len(result.warnings)}"
209
+ if result.errors:
210
+ status += f"\n❌ Errors: {len(result.errors)}"
211
+
212
+ return status, colors_data, typography_data, spacing_data
213
+
214
+ except Exception as e:
215
+ import traceback
216
+ return f"❌ Extraction failed: {str(e)}\n\n{traceback.format_exc()}", None, None, None
217
+
218
+
219
+ def export_tokens_json():
220
+ """Export current tokens to JSON."""
221
+ import json
222
+
223
+ result = {}
224
+
225
+ if "desktop_tokens" in current_extraction:
226
+ desktop = current_extraction["desktop_tokens"]
227
+ result["desktop"] = {
228
+ "colors": [c.model_dump() for c in desktop.colors],
229
+ "typography": [t.model_dump() for t in desktop.typography],
230
+ "spacing": [s.model_dump() for s in desktop.spacing],
231
+ "metadata": desktop.summary(),
232
+ }
233
+
234
+ if "mobile_tokens" in current_extraction:
235
+ mobile = current_extraction["mobile_tokens"]
236
+ result["mobile"] = {
237
+ "colors": [c.model_dump() for c in mobile.colors],
238
+ "typography": [t.model_dump() for t in mobile.typography],
239
+ "spacing": [s.model_dump() for s in mobile.spacing],
240
+ "metadata": mobile.summary(),
241
+ }
242
+
243
+ if not result:
244
+ return '{"error": "No tokens extracted yet. Please run extraction first."}'
245
+
246
+ return json.dumps(result, indent=2, default=str)
247
+
248
+
249
+ # =============================================================================
250
+ # UI BUILDING
251
+ # =============================================================================
252
+
253
+ def create_ui():
254
+ """Create the Gradio interface."""
255
+
256
+ with gr.Blocks(
257
+ title="Design System Extractor v2",
258
+ theme=gr.themes.Soft(),
259
+ ) as app:
260
+
261
+ # Header
262
+ gr.Markdown("""
263
+ # 🎨 Design System Extractor v2
264
+
265
+ **Reverse-engineer design systems from live websites.**
266
+
267
+ Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
268
+
269
+ ---
270
+ """)
271
+
272
+ # =================================================================
273
+ # CONFIGURATION SECTION
274
+ # =================================================================
275
+
276
+ with gr.Accordion("βš™οΈ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
277
+
278
+ gr.Markdown("""
279
+ **HuggingFace Token** is required for AI-powered features (Agent 2-4).
280
+ Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
281
+
282
+ *Note: Basic extraction (Agent 1) works without a token.*
283
+ """)
284
+
285
+ with gr.Row():
286
+ hf_token_input = gr.Textbox(
287
+ label="HuggingFace Token",
288
+ placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
289
+ type="password",
290
+ scale=4,
291
+ value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
292
+ )
293
+ save_token_btn = gr.Button("πŸ’Ύ Save Token", scale=1)
294
+
295
+ token_status = gr.Markdown(
296
+ "βœ… Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
297
+ )
298
+
299
+ save_token_btn.click(
300
+ fn=set_hf_token,
301
+ inputs=[hf_token_input],
302
+ outputs=[token_status],
303
+ )
304
+
305
+ # =================================================================
306
+ # STAGE 1: URL Input & Discovery
307
+ # =================================================================
308
+
309
+ with gr.Accordion("πŸ“ Stage 1: Website Discovery", open=True):
310
+
311
+ gr.Markdown("""
312
+ **Step 1:** Enter your website URL and discover pages.
313
+ The system will automatically find and classify pages for extraction.
314
+ """)
315
+
316
+ with gr.Row():
317
+ url_input = gr.Textbox(
318
+ label="Website URL",
319
+ placeholder="https://example.com",
320
+ scale=4,
321
+ )
322
+ discover_btn = gr.Button("πŸ” Discover Pages", variant="primary", scale=1)
323
+
324
+ discovery_status = gr.Markdown("")
325
+
326
+ pages_table = gr.Dataframe(
327
+ headers=["Select", "URL", "Title", "Type", "Status"],
328
+ datatype=["bool", "str", "str", "str", "str"],
329
+ interactive=True,
330
+ label="Discovered Pages",
331
+ visible=False,
332
+ )
333
+
334
+ pages_json = gr.JSON(visible=False)
335
+
336
+ # =================================================================
337
+ # STAGE 2: Extraction
338
+ # =================================================================
339
+
340
+ with gr.Accordion("πŸ”¬ Stage 2: Token Extraction", open=False):
341
+
342
+ gr.Markdown("""
343
+ **Step 2:** Select pages and viewport, then extract design tokens.
344
+ """)
345
+
346
+ with gr.Row():
347
+ viewport_radio = gr.Radio(
348
+ choices=["Desktop (1440px)", "Mobile (375px)"],
349
+ value="Desktop (1440px)",
350
+ label="Viewport",
351
+ )
352
+ extract_btn = gr.Button("πŸš€ Extract Tokens", variant="primary")
353
+
354
+ extraction_status = gr.Markdown("")
355
+
356
+ with gr.Tabs():
357
+ with gr.Tab("🎨 Colors"):
358
+ colors_table = gr.Dataframe(
359
+ headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
360
+ datatype=["bool", "str", "number", "str", "str", "str", "str"],
361
+ interactive=True,
362
+ label="Extracted Colors",
363
+ )
364
+
365
+ with gr.Tab("πŸ“ Typography"):
366
+ typography_table = gr.Dataframe(
367
+ headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
368
+ datatype=["bool", "str", "str", "number", "str", "str", "number"],
369
+ interactive=True,
370
+ label="Extracted Typography",
371
+ )
372
+
373
+ with gr.Tab("πŸ“ Spacing"):
374
+ spacing_table = gr.Dataframe(
375
+ headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
376
+ datatype=["bool", "str", "number", "str", "str", "str"],
377
+ interactive=True,
378
+ label="Extracted Spacing",
379
+ )
380
+
381
+ # =================================================================
382
+ # STAGE 3: Export
383
+ # =================================================================
384
+
385
+ with gr.Accordion("πŸ“¦ Stage 3: Export", open=False):
386
+
387
+ gr.Markdown("""
388
+ **Step 3:** Review and export your design tokens.
389
+ """)
390
+
391
+ with gr.Row():
392
+ export_btn = gr.Button("πŸ“₯ Export JSON", variant="secondary")
393
+
394
+ export_output = gr.Code(
395
+ label="Exported Tokens (JSON)",
396
+ language="json",
397
+ lines=20,
398
+ )
399
+
400
+ # =================================================================
401
+ # EVENT HANDLERS
402
+ # =================================================================
403
+
404
+ # Discovery
405
+ discover_btn.click(
406
+ fn=discover_site_pages,
407
+ inputs=[url_input],
408
+ outputs=[discovery_status, pages_table, pages_json],
409
+ ).then(
410
+ fn=lambda: gr.update(visible=True),
411
+ outputs=[pages_table],
412
+ )
413
+
414
+ # Extraction
415
+ extract_btn.click(
416
+ fn=start_extraction,
417
+ inputs=[pages_table, viewport_radio],
418
+ outputs=[extraction_status, colors_table, typography_table, spacing_table],
419
+ )
420
+
421
+ # Export
422
+ export_btn.click(
423
+ fn=export_tokens_json,
424
+ outputs=[export_output],
425
+ )
426
+
427
+ # =================================================================
428
+ # FOOTER
429
+ # =================================================================
430
+
431
+ gr.Markdown("""
432
+ ---
433
+
434
+ **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
435
+
436
+ *A semi-automated co-pilot for design system recovery and modernization.*
437
+
438
+ **Models:** Microsoft Phi (Normalizer) β€’ Meta Llama (Advisor) β€’ Mistral Codestral (Generator)
439
+ """)
440
+
441
+ return app
442
+
443
+
444
+ # =============================================================================
445
+ # MAIN
446
+ # =============================================================================
447
+
448
+ if __name__ == "__main__":
449
+ app = create_ui()
450
+ app.launch(
451
+ server_name="0.0.0.0",
452
+ server_port=7860,
453
+ )