riazmo commited on
Commit
b451543
·
verified ·
1 Parent(s): 9131d5e

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -479
app.py DELETED
@@ -1,479 +0,0 @@
1
- """
2
- Design System Extractor v2 — Main Application
3
- ==============================================
4
-
5
- A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
- design systems from live websites.
7
-
8
- Usage:
9
- python app.py
10
- """
11
-
12
- import os
13
- import asyncio
14
- import gradio as gr
15
- from datetime import datetime
16
-
17
- # Get HF token from environment if available
18
- HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
-
20
- # =============================================================================
21
- # GLOBAL STATE
22
- # =============================================================================
23
-
24
- current_extraction: dict = {}
25
- user_hf_token: str = ""
26
-
27
-
28
- # =============================================================================
29
- # HF TOKEN MANAGEMENT
30
- # =============================================================================
31
-
32
- def set_hf_token(token: str) -> str:
33
- """Set the HF token globally."""
34
- global user_hf_token
35
-
36
- if not token or len(token) < 10:
37
- return "❌ Please enter a valid HuggingFace token"
38
-
39
- user_hf_token = token.strip()
40
- os.environ["HF_TOKEN"] = user_hf_token
41
-
42
- return "✅ Token saved! You can now use the extractor."
43
-
44
-
45
- # =============================================================================
46
- # LAZY IMPORTS (avoid circular imports at startup)
47
- # =============================================================================
48
-
49
- _crawler_module = None
50
- _extractor_module = None
51
- _schema_module = None
52
-
53
- def get_crawler():
54
- global _crawler_module
55
- if _crawler_module is None:
56
- from agents import crawler as _crawler_module
57
- return _crawler_module
58
-
59
- def get_extractor():
60
- global _extractor_module
61
- if _extractor_module is None:
62
- from agents import extractor as _extractor_module
63
- return _extractor_module
64
-
65
- def get_schema():
66
- global _schema_module
67
- if _schema_module is None:
68
- from core import token_schema as _schema_module
69
- return _schema_module
70
-
71
-
72
- # =============================================================================
73
- # STAGE 1: URL INPUT & PAGE DISCOVERY
74
- # =============================================================================
75
-
76
- async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
77
- """
78
- Discover pages from a website URL.
79
-
80
- Returns tuple of (status_message, pages_dataframe, pages_json)
81
- """
82
- if not url or not url.startswith(("http://", "https://")):
83
- return "❌ Please enter a valid URL starting with http:// or https://", None, None
84
-
85
- progress(0, desc="🚀 Initializing browser...")
86
-
87
- try:
88
- crawler = get_crawler()
89
- discoverer = crawler.PageDiscoverer()
90
-
91
- def update_progress(p):
92
- progress(p, desc=f"🔍 Discovering pages... ({int(p*100)}%)")
93
-
94
- pages = await discoverer.discover(url, progress_callback=update_progress)
95
-
96
- progress(1.0, desc="✅ Discovery complete!")
97
-
98
- # Format for display - ensure we return simple values, not objects
99
- pages_data = []
100
- for page in pages:
101
- pages_data.append([
102
- page.selected, # Select (bool)
103
- page.url, # URL (str)
104
- page.title if page.title else "(No title)", # Title (str)
105
- page.page_type.value, # Type (str)
106
- "✓" if not page.error else f"⚠ {page.error}" # Status (str)
107
- ])
108
-
109
- # Store for later use
110
- current_extraction["discovered_pages"] = pages
111
- current_extraction["base_url"] = url
112
-
113
- status = f"✅ Found {len(pages)} pages. Select the pages you want to extract tokens from."
114
-
115
- return status, pages_data
116
-
117
- except Exception as e:
118
- import traceback
119
- return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None
120
-
121
-
122
- async def start_extraction(pages_selection, viewport_choice: str, progress=gr.Progress()) -> tuple:
123
- """
124
- Start token extraction from selected pages.
125
-
126
- Returns tuple of (status, colors_data, typography_data, spacing_data)
127
- """
128
- if pages_selection is None or len(pages_selection) == 0:
129
- return "❌ Please discover pages first", None, None, None
130
-
131
- progress(0, desc="🔄 Preparing extraction...")
132
-
133
- # Get selected URLs from the dataframe
134
- selected_urls = []
135
-
136
- # Handle both list of lists and list of dicts formats
137
- for row in pages_selection:
138
- if isinstance(row, (list, tuple)):
139
- # Format: [Select, URL, Title, Type, Status]
140
- if len(row) >= 2 and row[0]: # row[0] is Select checkbox
141
- selected_urls.append(row[1]) # row[1] is URL
142
- elif isinstance(row, dict):
143
- if row.get("Select", False):
144
- selected_urls.append(row.get("URL", ""))
145
-
146
- if not selected_urls:
147
- return "❌ Please select at least one page using the checkboxes", None, None, None
148
-
149
- progress(0.05, desc=f"📋 Selected {len(selected_urls)} pages for extraction...")
150
-
151
- # Determine viewport
152
- schema = get_schema()
153
- viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
154
-
155
- try:
156
- extractor_mod = get_extractor()
157
- extractor = extractor_mod.TokenExtractor(viewport=viewport)
158
-
159
- def update_progress(p):
160
- # Scale progress from 0.1 to 0.9
161
- scaled = 0.1 + (p * 0.8)
162
- progress(scaled, desc=f"🔬 Extracting tokens... ({int(p*100)}%)")
163
-
164
- progress(0.1, desc=f"🌐 Starting {viewport.value} extraction...")
165
-
166
- result = await extractor.extract(selected_urls, progress_callback=update_progress)
167
-
168
- progress(0.9, desc="📊 Processing results...")
169
-
170
- # Store result
171
- current_extraction[f"{viewport.value}_tokens"] = result
172
-
173
- # Format colors for display - use list of lists for Gradio
174
- colors_data = []
175
- for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
176
- colors_data.append([
177
- True, # Accept
178
- color.value, # Color
179
- color.frequency, # Frequency
180
- ", ".join(color.contexts[:3]) if color.contexts else "", # Context
181
- f"{color.contrast_white:.1f}:1", # Contrast
182
- "✓" if color.wcag_aa_small_text else "✗", # AA Text
183
- color.confidence.value if color.confidence else "low" # Confidence
184
- ])
185
-
186
- progress(0.93, desc="📝 Processing typography...")
187
-
188
- # Format typography for display
189
- typography_data = []
190
- for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
191
- typography_data.append([
192
- True, # Accept
193
- typo.font_family, # Font
194
- typo.font_size, # Size
195
- typo.font_weight, # Weight
196
- typo.line_height if typo.line_height else "", # Line Height
197
- ", ".join(typo.elements[:3]) if typo.elements else "", # Elements
198
- typo.frequency # Frequency
199
- ])
200
-
201
- progress(0.96, desc="📏 Processing spacing...")
202
-
203
- # Format spacing for display
204
- spacing_data = []
205
- for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
206
- spacing_data.append([
207
- True, # Accept
208
- space.value, # Value
209
- space.frequency, # Frequency
210
- ", ".join(space.contexts[:2]) if space.contexts else "", # Context
211
- "✓" if space.fits_base_8 else "", # Fits 8px
212
- "⚠" if space.is_outlier else "" # Outlier
213
- ])
214
-
215
- progress(1.0, desc="✅ Extraction complete!")
216
-
217
- # Summary
218
- status = f"""✅ **Extraction Complete** ({viewport.value})
219
-
220
- ### 📊 Summary
221
- | Metric | Value |
222
- |--------|-------|
223
- | Pages crawled | {len(result.pages_crawled)} |
224
- | Colors found | {len(result.colors)} |
225
- | Typography styles | {len(result.typography)} |
226
- | Spacing values | {len(result.spacing)} |
227
- | Font families | {len(result.font_families)} |
228
- | Spacing base | {result.spacing_base or 'Unknown'}px |
229
- | Duration | {result.extraction_duration_ms}ms |
230
- """
231
-
232
- if result.warnings:
233
- status += f"\n⚠️ **Warnings:** {len(result.warnings)}"
234
- if result.errors:
235
- status += f"\n❌ **Errors:** {len(result.errors)}"
236
- for err in result.errors[:3]:
237
- status += f"\n- {err}"
238
-
239
- return status, colors_data, typography_data, spacing_data
240
-
241
- except Exception as e:
242
- import traceback
243
- return f"❌ Extraction failed: {str(e)}\n\n```\n{traceback.format_exc()}\n```", None, None, None
244
-
245
-
246
- def export_tokens_json():
247
- """Export current tokens to JSON."""
248
- import json
249
-
250
- result = {}
251
-
252
- if "desktop_tokens" in current_extraction:
253
- desktop = current_extraction["desktop_tokens"]
254
- result["desktop"] = {
255
- "colors": [c.model_dump() for c in desktop.colors],
256
- "typography": [t.model_dump() for t in desktop.typography],
257
- "spacing": [s.model_dump() for s in desktop.spacing],
258
- "metadata": desktop.summary(),
259
- }
260
-
261
- if "mobile_tokens" in current_extraction:
262
- mobile = current_extraction["mobile_tokens"]
263
- result["mobile"] = {
264
- "colors": [c.model_dump() for c in mobile.colors],
265
- "typography": [t.model_dump() for t in mobile.typography],
266
- "spacing": [s.model_dump() for s in mobile.spacing],
267
- "metadata": mobile.summary(),
268
- }
269
-
270
- if not result:
271
- return '{"error": "No tokens extracted yet. Please run extraction first."}'
272
-
273
- return json.dumps(result, indent=2, default=str)
274
-
275
-
276
- # =============================================================================
277
- # UI BUILDING
278
- # =============================================================================
279
-
280
- def create_ui():
281
- """Create the Gradio interface."""
282
-
283
- with gr.Blocks(
284
- title="Design System Extractor v2",
285
- theme=gr.themes.Soft(),
286
- ) as app:
287
-
288
- # Header
289
- gr.Markdown("""
290
- # 🎨 Design System Extractor v2
291
-
292
- **Reverse-engineer design systems from live websites.**
293
-
294
- Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
295
-
296
- ---
297
- """)
298
-
299
- # =================================================================
300
- # CONFIGURATION SECTION
301
- # =================================================================
302
-
303
- with gr.Accordion("⚙️ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
304
-
305
- gr.Markdown("""
306
- **HuggingFace Token** is required for AI-powered features (Agent 2-4).
307
- Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
308
-
309
- *Note: Basic extraction (Agent 1) works without a token.*
310
- """)
311
-
312
- with gr.Row():
313
- hf_token_input = gr.Textbox(
314
- label="HuggingFace Token",
315
- placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
316
- type="password",
317
- scale=4,
318
- value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
319
- )
320
- save_token_btn = gr.Button("💾 Save Token", scale=1)
321
-
322
- token_status = gr.Markdown(
323
- "✅ Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
324
- )
325
-
326
- save_token_btn.click(
327
- fn=set_hf_token,
328
- inputs=[hf_token_input],
329
- outputs=[token_status],
330
- )
331
-
332
- # =================================================================
333
- # STAGE 1: URL Input & Discovery
334
- # =================================================================
335
-
336
- with gr.Accordion("📍 Stage 1: Website Discovery", open=True):
337
-
338
- gr.Markdown("""
339
- **Step 1:** Enter your website URL and discover pages.
340
- The system will automatically find and classify pages for extraction.
341
- """)
342
-
343
- with gr.Row():
344
- url_input = gr.Textbox(
345
- label="Website URL",
346
- placeholder="https://example.com",
347
- scale=4,
348
- )
349
- discover_btn = gr.Button("🔍 Discover Pages", variant="primary", scale=1)
350
-
351
- discovery_status = gr.Markdown("")
352
-
353
- pages_table = gr.Dataframe(
354
- headers=["Select", "URL", "Title", "Type", "Status"],
355
- datatype=["bool", "str", "str", "str", "str"],
356
- interactive=True,
357
- label="Discovered Pages",
358
- visible=False,
359
- col_count=(5, "fixed"),
360
- )
361
-
362
- # =================================================================
363
- # STAGE 2: Extraction
364
- # =================================================================
365
-
366
- with gr.Accordion("🔬 Stage 2: Token Extraction", open=False):
367
-
368
- gr.Markdown("""
369
- **Step 2:** Select pages and viewport, then extract design tokens.
370
- """)
371
-
372
- with gr.Row():
373
- viewport_radio = gr.Radio(
374
- choices=["Desktop (1440px)", "Mobile (375px)"],
375
- value="Desktop (1440px)",
376
- label="Viewport",
377
- )
378
- extract_btn = gr.Button("🚀 Extract Tokens", variant="primary")
379
-
380
- extraction_status = gr.Markdown("")
381
-
382
- with gr.Tabs():
383
- with gr.Tab("🎨 Colors"):
384
- colors_table = gr.Dataframe(
385
- headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
386
- datatype=["bool", "str", "number", "str", "str", "str", "str"],
387
- interactive=True,
388
- label="Extracted Colors",
389
- )
390
-
391
- with gr.Tab("📝 Typography"):
392
- typography_table = gr.Dataframe(
393
- headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
394
- datatype=["bool", "str", "str", "number", "str", "str", "number"],
395
- interactive=True,
396
- label="Extracted Typography",
397
- )
398
-
399
- with gr.Tab("📏 Spacing"):
400
- spacing_table = gr.Dataframe(
401
- headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
402
- datatype=["bool", "str", "number", "str", "str", "str"],
403
- interactive=True,
404
- label="Extracted Spacing",
405
- )
406
-
407
- # =================================================================
408
- # STAGE 3: Export
409
- # =================================================================
410
-
411
- with gr.Accordion("📦 Stage 3: Export", open=False):
412
-
413
- gr.Markdown("""
414
- **Step 3:** Review and export your design tokens.
415
- """)
416
-
417
- with gr.Row():
418
- export_btn = gr.Button("📥 Export JSON", variant="secondary")
419
-
420
- export_output = gr.Code(
421
- label="Exported Tokens (JSON)",
422
- language="json",
423
- lines=20,
424
- )
425
-
426
- # =================================================================
427
- # EVENT HANDLERS
428
- # =================================================================
429
-
430
- # Discovery
431
- discover_btn.click(
432
- fn=discover_site_pages,
433
- inputs=[url_input],
434
- outputs=[discovery_status, pages_table],
435
- ).then(
436
- fn=lambda: gr.update(visible=True),
437
- outputs=[pages_table],
438
- )
439
-
440
- # Extraction
441
- extract_btn.click(
442
- fn=start_extraction,
443
- inputs=[pages_table, viewport_radio],
444
- outputs=[extraction_status, colors_table, typography_table, spacing_table],
445
- )
446
-
447
- # Export
448
- export_btn.click(
449
- fn=export_tokens_json,
450
- outputs=[export_output],
451
- )
452
-
453
- # =================================================================
454
- # FOOTER
455
- # =================================================================
456
-
457
- gr.Markdown("""
458
- ---
459
-
460
- **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
461
-
462
- *A semi-automated co-pilot for design system recovery and modernization.*
463
-
464
- **Models:** Microsoft Phi (Normalizer) • Meta Llama (Advisor) • Mistral Codestral (Generator)
465
- """)
466
-
467
- return app
468
-
469
-
470
- # =============================================================================
471
- # MAIN
472
- # =============================================================================
473
-
474
- if __name__ == "__main__":
475
- app = create_ui()
476
- app.launch(
477
- server_name="0.0.0.0",
478
- server_port=7860,
479
- )