riazmo commited on
Commit
bcbb324
·
verified ·
1 Parent(s): 421902e

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -482
app.py DELETED
@@ -1,482 +0,0 @@
1
- """
2
- Design System Extractor v2 — Main Application
3
- ==============================================
4
-
5
- A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
- design systems from live websites.
7
-
8
- Usage:
9
- python app.py
10
- """
11
-
12
- import os
13
- import asyncio
14
- import gradio as gr
15
- from datetime import datetime
16
-
17
- # Get HF token from environment if available
18
- HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
-
20
- # =============================================================================
21
- # GLOBAL STATE
22
- # =============================================================================
23
-
24
- current_extraction: dict = {}
25
- user_hf_token: str = ""
26
-
27
-
28
- # =============================================================================
29
- # HF TOKEN MANAGEMENT
30
- # =============================================================================
31
-
32
- def set_hf_token(token: str) -> str:
33
- """Set the HF token globally."""
34
- global user_hf_token
35
-
36
- if not token or len(token) < 10:
37
- return "❌ Please enter a valid HuggingFace token"
38
-
39
- user_hf_token = token.strip()
40
- os.environ["HF_TOKEN"] = user_hf_token
41
-
42
- return "✅ Token saved! You can now use the extractor."
43
-
44
-
45
- # =============================================================================
46
- # LAZY IMPORTS (avoid circular imports at startup)
47
- # =============================================================================
48
-
49
- _crawler_module = None
50
- _extractor_module = None
51
- _schema_module = None
52
-
53
- def get_crawler():
54
- global _crawler_module
55
- if _crawler_module is None:
56
- from agents import crawler
57
- _crawler_module = crawler
58
- return _crawler_module
59
-
60
- def get_extractor():
61
- global _extractor_module
62
- if _extractor_module is None:
63
- from agents import extractor
64
- _extractor_module = extractor
65
- return _extractor_module
66
-
67
- def get_schema():
68
- global _schema_module
69
- if _schema_module is None:
70
- from core import token_schema
71
- _schema_module = token_schema
72
- return _schema_module
73
-
74
-
75
- # =============================================================================
76
- # STAGE 1: URL INPUT & PAGE DISCOVERY
77
- # =============================================================================
78
-
79
- async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
80
- """
81
- Discover pages from a website URL.
82
-
83
- Returns tuple of (status_message, pages_dataframe, pages_json)
84
- """
85
- if not url or not url.startswith(("http://", "https://")):
86
- return "❌ Please enter a valid URL starting with http:// or https://", None, None
87
-
88
- progress(0, desc="🚀 Initializing browser...")
89
-
90
- try:
91
- crawler = get_crawler()
92
- discoverer = crawler.PageDiscoverer()
93
-
94
- def update_progress(p):
95
- progress(p, desc=f"🔍 Discovering pages... ({int(p*100)}%)")
96
-
97
- pages = await discoverer.discover(url, progress_callback=update_progress)
98
-
99
- progress(1.0, desc="✅ Discovery complete!")
100
-
101
- # Format for display - ensure we return simple values, not objects
102
- pages_data = []
103
- for page in pages:
104
- pages_data.append([
105
- page.selected, # Select (bool)
106
- page.url, # URL (str)
107
- page.title if page.title else "(No title)", # Title (str)
108
- page.page_type.value, # Type (str)
109
- "✓" if not page.error else f"⚠ {page.error}" # Status (str)
110
- ])
111
-
112
- # Store for later use
113
- current_extraction["discovered_pages"] = pages
114
- current_extraction["base_url"] = url
115
-
116
- status = f"✅ Found {len(pages)} pages. Select the pages you want to extract tokens from."
117
-
118
- return status, pages_data
119
-
120
- except Exception as e:
121
- import traceback
122
- return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
123
-
124
-
125
- async def start_extraction(pages_selection, viewport_choice: str, progress=gr.Progress()) -> tuple:
126
- """
127
- Start token extraction from selected pages.
128
-
129
- Returns tuple of (status, colors_data, typography_data, spacing_data)
130
- """
131
- if pages_selection is None or len(pages_selection) == 0:
132
- return "❌ Please discover pages first", None, None, None
133
-
134
- progress(0, desc="🔄 Preparing extraction...")
135
-
136
- # Get selected URLs from the dataframe
137
- selected_urls = []
138
-
139
- # Handle both list of lists and list of dicts formats
140
- for row in pages_selection:
141
- if isinstance(row, (list, tuple)):
142
- # Format: [Select, URL, Title, Type, Status]
143
- if len(row) >= 2 and row[0]: # row[0] is Select checkbox
144
- selected_urls.append(row[1]) # row[1] is URL
145
- elif isinstance(row, dict):
146
- if row.get("Select", False):
147
- selected_urls.append(row.get("URL", ""))
148
-
149
- if not selected_urls:
150
- return "❌ Please select at least one page using the checkboxes", None, None, None
151
-
152
- progress(0.05, desc=f"📋 Selected {len(selected_urls)} pages for extraction...")
153
-
154
- # Determine viewport
155
- schema = get_schema()
156
- viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
157
-
158
- try:
159
- extractor_mod = get_extractor()
160
- extractor = extractor_mod.TokenExtractor(viewport=viewport)
161
-
162
- def update_progress(p):
163
- # Scale progress from 0.1 to 0.9
164
- scaled = 0.1 + (p * 0.8)
165
- progress(scaled, desc=f"🔬 Extracting tokens... ({int(p*100)}%)")
166
-
167
- progress(0.1, desc=f"🌐 Starting {viewport.value} extraction...")
168
-
169
- result = await extractor.extract(selected_urls, progress_callback=update_progress)
170
-
171
- progress(0.9, desc="📊 Processing results...")
172
-
173
- # Store result
174
- current_extraction[f"{viewport.value}_tokens"] = result
175
-
176
- # Format colors for display - use list of lists for Gradio
177
- colors_data = []
178
- for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
179
- colors_data.append([
180
- True, # Accept
181
- color.value, # Color
182
- color.frequency, # Frequency
183
- ", ".join(color.contexts[:3]) if color.contexts else "", # Context
184
- f"{color.contrast_white:.1f}:1", # Contrast
185
- "✓" if color.wcag_aa_small_text else "✗", # AA Text
186
- color.confidence.value if color.confidence else "low" # Confidence
187
- ])
188
-
189
- progress(0.93, desc="📝 Processing typography...")
190
-
191
- # Format typography for display
192
- typography_data = []
193
- for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
194
- typography_data.append([
195
- True, # Accept
196
- typo.font_family, # Font
197
- typo.font_size, # Size
198
- typo.font_weight, # Weight
199
- typo.line_height if typo.line_height else "", # Line Height
200
- ", ".join(typo.elements[:3]) if typo.elements else "", # Elements
201
- typo.frequency # Frequency
202
- ])
203
-
204
- progress(0.96, desc="📏 Processing spacing...")
205
-
206
- # Format spacing for display
207
- spacing_data = []
208
- for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
209
- spacing_data.append([
210
- True, # Accept
211
- space.value, # Value
212
- space.frequency, # Frequency
213
- ", ".join(space.contexts[:2]) if space.contexts else "", # Context
214
- "✓" if space.fits_base_8 else "", # Fits 8px
215
- "⚠" if space.is_outlier else "" # Outlier
216
- ])
217
-
218
- progress(1.0, desc="✅ Extraction complete!")
219
-
220
- # Summary
221
- status = f"""✅ **Extraction Complete** ({viewport.value})
222
-
223
- ### 📊 Summary
224
- | Metric | Value |
225
- |--------|-------|
226
- | Pages crawled | {len(result.pages_crawled)} |
227
- | Colors found | {len(result.colors)} |
228
- | Typography styles | {len(result.typography)} |
229
- | Spacing values | {len(result.spacing)} |
230
- | Font families | {len(result.font_families)} |
231
- | Spacing base | {result.spacing_base or 'Unknown'}px |
232
- | Duration | {result.extraction_duration_ms}ms |
233
- """
234
-
235
- if result.warnings:
236
- status += f"\n⚠️ **Warnings:** {len(result.warnings)}"
237
- if result.errors:
238
- status += f"\n❌ **Errors:** {len(result.errors)}"
239
- for err in result.errors[:3]:
240
- status += f"\n- {err}"
241
-
242
- return status, colors_data, typography_data, spacing_data
243
-
244
- except Exception as e:
245
- import traceback
246
- return f"❌ Extraction failed: {str(e)}\n\n```\n{traceback.format_exc()}\n```", None, None, None
247
-
248
-
249
- def export_tokens_json():
250
- """Export current tokens to JSON."""
251
- import json
252
-
253
- result = {}
254
-
255
- if "desktop_tokens" in current_extraction:
256
- desktop = current_extraction["desktop_tokens"]
257
- result["desktop"] = {
258
- "colors": [c.model_dump() for c in desktop.colors],
259
- "typography": [t.model_dump() for t in desktop.typography],
260
- "spacing": [s.model_dump() for s in desktop.spacing],
261
- "metadata": desktop.summary(),
262
- }
263
-
264
- if "mobile_tokens" in current_extraction:
265
- mobile = current_extraction["mobile_tokens"]
266
- result["mobile"] = {
267
- "colors": [c.model_dump() for c in mobile.colors],
268
- "typography": [t.model_dump() for t in mobile.typography],
269
- "spacing": [s.model_dump() for s in mobile.spacing],
270
- "metadata": mobile.summary(),
271
- }
272
-
273
- if not result:
274
- return '{"error": "No tokens extracted yet. Please run extraction first."}'
275
-
276
- return json.dumps(result, indent=2, default=str)
277
-
278
-
279
- # =============================================================================
280
- # UI BUILDING
281
- # =============================================================================
282
-
283
- def create_ui():
284
- """Create the Gradio interface."""
285
-
286
- with gr.Blocks(
287
- title="Design System Extractor v2",
288
- theme=gr.themes.Soft(),
289
- ) as app:
290
-
291
- # Header
292
- gr.Markdown("""
293
- # 🎨 Design System Extractor v2
294
-
295
- **Reverse-engineer design systems from live websites.**
296
-
297
- Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
298
-
299
- ---
300
- """)
301
-
302
- # =================================================================
303
- # CONFIGURATION SECTION
304
- # =================================================================
305
-
306
- with gr.Accordion("⚙️ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
307
-
308
- gr.Markdown("""
309
- **HuggingFace Token** is required for AI-powered features (Agent 2-4).
310
- Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
311
-
312
- *Note: Basic extraction (Agent 1) works without a token.*
313
- """)
314
-
315
- with gr.Row():
316
- hf_token_input = gr.Textbox(
317
- label="HuggingFace Token",
318
- placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
319
- type="password",
320
- scale=4,
321
- value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
322
- )
323
- save_token_btn = gr.Button("💾 Save Token", scale=1)
324
-
325
- token_status = gr.Markdown(
326
- "✅ Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
327
- )
328
-
329
- save_token_btn.click(
330
- fn=set_hf_token,
331
- inputs=[hf_token_input],
332
- outputs=[token_status],
333
- )
334
-
335
- # =================================================================
336
- # STAGE 1: URL Input & Discovery
337
- # =================================================================
338
-
339
- with gr.Accordion("📍 Stage 1: Website Discovery", open=True):
340
-
341
- gr.Markdown("""
342
- **Step 1:** Enter your website URL and discover pages.
343
- The system will automatically find and classify pages for extraction.
344
- """)
345
-
346
- with gr.Row():
347
- url_input = gr.Textbox(
348
- label="Website URL",
349
- placeholder="https://example.com",
350
- scale=4,
351
- )
352
- discover_btn = gr.Button("🔍 Discover Pages", variant="primary", scale=1)
353
-
354
- discovery_status = gr.Markdown("")
355
-
356
- pages_table = gr.Dataframe(
357
- headers=["Select", "URL", "Title", "Type", "Status"],
358
- datatype=["bool", "str", "str", "str", "str"],
359
- interactive=True,
360
- label="Discovered Pages",
361
- visible=False,
362
- col_count=(5, "fixed"),
363
- )
364
-
365
- # =================================================================
366
- # STAGE 2: Extraction
367
- # =================================================================
368
-
369
- with gr.Accordion("🔬 Stage 2: Token Extraction", open=False):
370
-
371
- gr.Markdown("""
372
- **Step 2:** Select pages and viewport, then extract design tokens.
373
- """)
374
-
375
- with gr.Row():
376
- viewport_radio = gr.Radio(
377
- choices=["Desktop (1440px)", "Mobile (375px)"],
378
- value="Desktop (1440px)",
379
- label="Viewport",
380
- )
381
- extract_btn = gr.Button("🚀 Extract Tokens", variant="primary")
382
-
383
- extraction_status = gr.Markdown("")
384
-
385
- with gr.Tabs():
386
- with gr.Tab("🎨 Colors"):
387
- colors_table = gr.Dataframe(
388
- headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
389
- datatype=["bool", "str", "number", "str", "str", "str", "str"],
390
- interactive=True,
391
- label="Extracted Colors",
392
- )
393
-
394
- with gr.Tab("📝 Typography"):
395
- typography_table = gr.Dataframe(
396
- headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
397
- datatype=["bool", "str", "str", "number", "str", "str", "number"],
398
- interactive=True,
399
- label="Extracted Typography",
400
- )
401
-
402
- with gr.Tab("📏 Spacing"):
403
- spacing_table = gr.Dataframe(
404
- headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
405
- datatype=["bool", "str", "number", "str", "str", "str"],
406
- interactive=True,
407
- label="Extracted Spacing",
408
- )
409
-
410
- # =================================================================
411
- # STAGE 3: Export
412
- # =================================================================
413
-
414
- with gr.Accordion("📦 Stage 3: Export", open=False):
415
-
416
- gr.Markdown("""
417
- **Step 3:** Review and export your design tokens.
418
- """)
419
-
420
- with gr.Row():
421
- export_btn = gr.Button("📥 Export JSON", variant="secondary")
422
-
423
- export_output = gr.Code(
424
- label="Exported Tokens (JSON)",
425
- language="json",
426
- lines=20,
427
- )
428
-
429
- # =================================================================
430
- # EVENT HANDLERS
431
- # =================================================================
432
-
433
- # Discovery
434
- discover_btn.click(
435
- fn=discover_site_pages,
436
- inputs=[url_input],
437
- outputs=[discovery_status, pages_table],
438
- ).then(
439
- fn=lambda: gr.update(visible=True),
440
- outputs=[pages_table],
441
- )
442
-
443
- # Extraction
444
- extract_btn.click(
445
- fn=start_extraction,
446
- inputs=[pages_table, viewport_radio],
447
- outputs=[extraction_status, colors_table, typography_table, spacing_table],
448
- )
449
-
450
- # Export
451
- export_btn.click(
452
- fn=export_tokens_json,
453
- outputs=[export_output],
454
- )
455
-
456
- # =================================================================
457
- # FOOTER
458
- # =================================================================
459
-
460
- gr.Markdown("""
461
- ---
462
-
463
- **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
464
-
465
- *A semi-automated co-pilot for design system recovery and modernization.*
466
-
467
- **Models:** Microsoft Phi (Normalizer) • Meta Llama (Advisor) • Mistral Codestral (Generator)
468
- """)
469
-
470
- return app
471
-
472
-
473
- # =============================================================================
474
- # MAIN
475
- # =============================================================================
476
-
477
- if __name__ == "__main__":
478
- app = create_ui()
479
- app.launch(
480
- server_name="0.0.0.0",
481
- server_port=7860,
482
- )