riazmo commited on
Commit
a0256ad
·
verified ·
1 Parent(s): 67ae0a7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -453
app.py DELETED
@@ -1,453 +0,0 @@
1
- """
2
- Design System Extractor v2 — Main Application
3
- ==============================================
4
-
5
- A semi-automated, human-in-the-loop agentic system that reverse-engineers
6
- design systems from live websites.
7
-
8
- Usage:
9
- python app.py
10
- """
11
-
12
- import os
13
- import asyncio
14
- import gradio as gr
15
- from datetime import datetime
16
-
17
- # Get HF token from environment if available
18
- HF_TOKEN_FROM_ENV = os.getenv("HF_TOKEN", "")
19
-
20
- # =============================================================================
21
- # GLOBAL STATE
22
- # =============================================================================
23
-
24
- current_extraction: dict = {}
25
- user_hf_token: str = ""
26
-
27
-
28
- # =============================================================================
29
- # HF TOKEN MANAGEMENT
30
- # =============================================================================
31
-
32
- def set_hf_token(token: str) -> str:
33
- """Set the HF token globally."""
34
- global user_hf_token
35
-
36
- if not token or len(token) < 10:
37
- return "❌ Please enter a valid HuggingFace token"
38
-
39
- user_hf_token = token.strip()
40
- os.environ["HF_TOKEN"] = user_hf_token
41
-
42
- return "✅ Token saved! You can now use the extractor."
43
-
44
-
45
- # =============================================================================
46
- # LAZY IMPORTS (avoid circular imports at startup)
47
- # =============================================================================
48
-
49
- _crawler_module = None
50
- _extractor_module = None
51
- _schema_module = None
52
-
53
- def get_crawler():
54
- global _crawler_module
55
- if _crawler_module is None:
56
- from agents import crawler as _crawler_module
57
- return _crawler_module
58
-
59
- def get_extractor():
60
- global _extractor_module
61
- if _extractor_module is None:
62
- from agents import extractor as _extractor_module
63
- return _extractor_module
64
-
65
- def get_schema():
66
- global _schema_module
67
- if _schema_module is None:
68
- from core import token_schema as _schema_module
69
- return _schema_module
70
-
71
-
72
- # =============================================================================
73
- # STAGE 1: URL INPUT & PAGE DISCOVERY
74
- # =============================================================================
75
-
76
- async def discover_site_pages(url: str, progress=gr.Progress()) -> tuple:
77
- """
78
- Discover pages from a website URL.
79
-
80
- Returns tuple of (status_message, pages_dataframe, pages_json)
81
- """
82
- if not url or not url.startswith(("http://", "https://")):
83
- return "❌ Please enter a valid URL starting with http:// or https://", None, None
84
-
85
- progress(0, desc="Initializing browser...")
86
-
87
- try:
88
- crawler = get_crawler()
89
- discoverer = crawler.PageDiscoverer()
90
-
91
- def update_progress(p):
92
- progress(p, desc=f"Discovering pages... ({int(p*100)}%)")
93
-
94
- pages = await discoverer.discover(url, progress_callback=update_progress)
95
-
96
- # Format for display
97
- pages_data = []
98
- for page in pages:
99
- pages_data.append({
100
- "Select": page.selected,
101
- "URL": page.url,
102
- "Title": page.title or "(No title)",
103
- "Type": page.page_type.value,
104
- "Status": "✓" if not page.error else f"⚠ {page.error}",
105
- })
106
-
107
- # Store for later use
108
- current_extraction["discovered_pages"] = pages
109
- current_extraction["base_url"] = url
110
-
111
- status = f"✅ Found {len(pages)} pages. Select the pages you want to extract tokens from."
112
-
113
- return status, pages_data, [p.model_dump() for p in pages]
114
-
115
- except Exception as e:
116
- import traceback
117
- return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None, None
118
-
119
-
120
- async def start_extraction(pages_selection: list, viewport_choice: str, progress=gr.Progress()) -> tuple:
121
- """
122
- Start token extraction from selected pages.
123
-
124
- Returns tuple of (status, colors_data, typography_data, spacing_data)
125
- """
126
- if not pages_selection:
127
- return "❌ Please select at least one page", None, None, None
128
-
129
- # Get selected URLs
130
- selected_urls = []
131
- for row in pages_selection:
132
- if row.get("Select", False):
133
- selected_urls.append(row["URL"])
134
-
135
- if not selected_urls:
136
- return "❌ Please select at least one page using the checkboxes", None, None, None
137
-
138
- # Determine viewport
139
- schema = get_schema()
140
- viewport = schema.Viewport.DESKTOP if viewport_choice == "Desktop (1440px)" else schema.Viewport.MOBILE
141
-
142
- progress(0, desc=f"Starting {viewport.value} extraction...")
143
-
144
- try:
145
- extractor_mod = get_extractor()
146
- extractor = extractor_mod.TokenExtractor(viewport=viewport)
147
-
148
- def update_progress(p):
149
- progress(p, desc=f"Extracting tokens... ({int(p*100)}%)")
150
-
151
- result = await extractor.extract(selected_urls, progress_callback=update_progress)
152
-
153
- # Store result
154
- current_extraction[f"{viewport.value}_tokens"] = result
155
-
156
- # Format colors for display
157
- colors_data = []
158
- for color in sorted(result.colors, key=lambda c: -c.frequency)[:50]:
159
- colors_data.append({
160
- "Accept": True,
161
- "Color": color.value,
162
- "Frequency": color.frequency,
163
- "Context": ", ".join(color.contexts[:3]),
164
- "Contrast (White)": f"{color.contrast_white}:1",
165
- "AA Text": "✓" if color.wcag_aa_small_text else "✗",
166
- "Confidence": color.confidence.value,
167
- })
168
-
169
- # Format typography for display
170
- typography_data = []
171
- for typo in sorted(result.typography, key=lambda t: -t.frequency)[:30]:
172
- typography_data.append({
173
- "Accept": True,
174
- "Font": typo.font_family,
175
- "Size": typo.font_size,
176
- "Weight": typo.font_weight,
177
- "Line Height": typo.line_height,
178
- "Elements": ", ".join(typo.elements[:3]),
179
- "Frequency": typo.frequency,
180
- })
181
-
182
- # Format spacing for display
183
- spacing_data = []
184
- for space in sorted(result.spacing, key=lambda s: s.value_px)[:20]:
185
- spacing_data.append({
186
- "Accept": True,
187
- "Value": space.value,
188
- "Frequency": space.frequency,
189
- "Context": ", ".join(space.contexts[:2]),
190
- "Fits 8px": "✓" if space.fits_base_8 else "",
191
- "Outlier": "⚠" if space.is_outlier else "",
192
- })
193
-
194
- # Summary
195
- status = f"""✅ Extraction Complete ({viewport.value})
196
-
197
- **Summary:**
198
- - Pages crawled: {len(result.pages_crawled)}
199
- - Colors found: {len(result.colors)}
200
- - Typography styles: {len(result.typography)}
201
- - Spacing values: {len(result.spacing)}
202
- - Font families: {len(result.font_families)}
203
- - Detected spacing base: {result.spacing_base or 'Unknown'}px
204
- - Duration: {result.extraction_duration_ms}ms
205
- """
206
-
207
- if result.warnings:
208
- status += f"\n⚠️ Warnings: {len(result.warnings)}"
209
- if result.errors:
210
- status += f"\n❌ Errors: {len(result.errors)}"
211
-
212
- return status, colors_data, typography_data, spacing_data
213
-
214
- except Exception as e:
215
- import traceback
216
- return f"❌ Extraction failed: {str(e)}\n\n{traceback.format_exc()}", None, None, None
217
-
218
-
219
- def export_tokens_json():
220
- """Export current tokens to JSON."""
221
- import json
222
-
223
- result = {}
224
-
225
- if "desktop_tokens" in current_extraction:
226
- desktop = current_extraction["desktop_tokens"]
227
- result["desktop"] = {
228
- "colors": [c.model_dump() for c in desktop.colors],
229
- "typography": [t.model_dump() for t in desktop.typography],
230
- "spacing": [s.model_dump() for s in desktop.spacing],
231
- "metadata": desktop.summary(),
232
- }
233
-
234
- if "mobile_tokens" in current_extraction:
235
- mobile = current_extraction["mobile_tokens"]
236
- result["mobile"] = {
237
- "colors": [c.model_dump() for c in mobile.colors],
238
- "typography": [t.model_dump() for t in mobile.typography],
239
- "spacing": [s.model_dump() for s in mobile.spacing],
240
- "metadata": mobile.summary(),
241
- }
242
-
243
- if not result:
244
- return '{"error": "No tokens extracted yet. Please run extraction first."}'
245
-
246
- return json.dumps(result, indent=2, default=str)
247
-
248
-
249
- # =============================================================================
250
- # UI BUILDING
251
- # =============================================================================
252
-
253
- def create_ui():
254
- """Create the Gradio interface."""
255
-
256
- with gr.Blocks(
257
- title="Design System Extractor v2",
258
- theme=gr.themes.Soft(),
259
- ) as app:
260
-
261
- # Header
262
- gr.Markdown("""
263
- # 🎨 Design System Extractor v2
264
-
265
- **Reverse-engineer design systems from live websites.**
266
-
267
- Extract colors, typography, and spacing tokens from any website and export to Figma-compatible JSON.
268
-
269
- ---
270
- """)
271
-
272
- # =================================================================
273
- # CONFIGURATION SECTION
274
- # =================================================================
275
-
276
- with gr.Accordion("⚙️ Configuration", open=not bool(HF_TOKEN_FROM_ENV)):
277
-
278
- gr.Markdown("""
279
- **HuggingFace Token** is required for AI-powered features (Agent 2-4).
280
- Get your token at: [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
281
-
282
- *Note: Basic extraction (Agent 1) works without a token.*
283
- """)
284
-
285
- with gr.Row():
286
- hf_token_input = gr.Textbox(
287
- label="HuggingFace Token",
288
- placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
289
- type="password",
290
- scale=4,
291
- value=HF_TOKEN_FROM_ENV if HF_TOKEN_FROM_ENV else "",
292
- )
293
- save_token_btn = gr.Button("💾 Save Token", scale=1)
294
-
295
- token_status = gr.Markdown(
296
- "✅ Token loaded from environment" if HF_TOKEN_FROM_ENV else "⏳ Enter your HF token to enable all features"
297
- )
298
-
299
- save_token_btn.click(
300
- fn=set_hf_token,
301
- inputs=[hf_token_input],
302
- outputs=[token_status],
303
- )
304
-
305
- # =================================================================
306
- # STAGE 1: URL Input & Discovery
307
- # =================================================================
308
-
309
- with gr.Accordion("📍 Stage 1: Website Discovery", open=True):
310
-
311
- gr.Markdown("""
312
- **Step 1:** Enter your website URL and discover pages.
313
- The system will automatically find and classify pages for extraction.
314
- """)
315
-
316
- with gr.Row():
317
- url_input = gr.Textbox(
318
- label="Website URL",
319
- placeholder="https://example.com",
320
- scale=4,
321
- )
322
- discover_btn = gr.Button("🔍 Discover Pages", variant="primary", scale=1)
323
-
324
- discovery_status = gr.Markdown("")
325
-
326
- pages_table = gr.Dataframe(
327
- headers=["Select", "URL", "Title", "Type", "Status"],
328
- datatype=["bool", "str", "str", "str", "str"],
329
- interactive=True,
330
- label="Discovered Pages",
331
- visible=False,
332
- )
333
-
334
- pages_json = gr.JSON(visible=False)
335
-
336
- # =================================================================
337
- # STAGE 2: Extraction
338
- # =================================================================
339
-
340
- with gr.Accordion("🔬 Stage 2: Token Extraction", open=False):
341
-
342
- gr.Markdown("""
343
- **Step 2:** Select pages and viewport, then extract design tokens.
344
- """)
345
-
346
- with gr.Row():
347
- viewport_radio = gr.Radio(
348
- choices=["Desktop (1440px)", "Mobile (375px)"],
349
- value="Desktop (1440px)",
350
- label="Viewport",
351
- )
352
- extract_btn = gr.Button("🚀 Extract Tokens", variant="primary")
353
-
354
- extraction_status = gr.Markdown("")
355
-
356
- with gr.Tabs():
357
- with gr.Tab("🎨 Colors"):
358
- colors_table = gr.Dataframe(
359
- headers=["Accept", "Color", "Frequency", "Context", "Contrast (White)", "AA Text", "Confidence"],
360
- datatype=["bool", "str", "number", "str", "str", "str", "str"],
361
- interactive=True,
362
- label="Extracted Colors",
363
- )
364
-
365
- with gr.Tab("📝 Typography"):
366
- typography_table = gr.Dataframe(
367
- headers=["Accept", "Font", "Size", "Weight", "Line Height", "Elements", "Frequency"],
368
- datatype=["bool", "str", "str", "number", "str", "str", "number"],
369
- interactive=True,
370
- label="Extracted Typography",
371
- )
372
-
373
- with gr.Tab("📏 Spacing"):
374
- spacing_table = gr.Dataframe(
375
- headers=["Accept", "Value", "Frequency", "Context", "Fits 8px", "Outlier"],
376
- datatype=["bool", "str", "number", "str", "str", "str"],
377
- interactive=True,
378
- label="Extracted Spacing",
379
- )
380
-
381
- # =================================================================
382
- # STAGE 3: Export
383
- # =================================================================
384
-
385
- with gr.Accordion("📦 Stage 3: Export", open=False):
386
-
387
- gr.Markdown("""
388
- **Step 3:** Review and export your design tokens.
389
- """)
390
-
391
- with gr.Row():
392
- export_btn = gr.Button("📥 Export JSON", variant="secondary")
393
-
394
- export_output = gr.Code(
395
- label="Exported Tokens (JSON)",
396
- language="json",
397
- lines=20,
398
- )
399
-
400
- # =================================================================
401
- # EVENT HANDLERS
402
- # =================================================================
403
-
404
- # Discovery
405
- discover_btn.click(
406
- fn=discover_site_pages,
407
- inputs=[url_input],
408
- outputs=[discovery_status, pages_table, pages_json],
409
- ).then(
410
- fn=lambda: gr.update(visible=True),
411
- outputs=[pages_table],
412
- )
413
-
414
- # Extraction
415
- extract_btn.click(
416
- fn=start_extraction,
417
- inputs=[pages_table, viewport_radio],
418
- outputs=[extraction_status, colors_table, typography_table, spacing_table],
419
- )
420
-
421
- # Export
422
- export_btn.click(
423
- fn=export_tokens_json,
424
- outputs=[export_output],
425
- )
426
-
427
- # =================================================================
428
- # FOOTER
429
- # =================================================================
430
-
431
- gr.Markdown("""
432
- ---
433
-
434
- **Design System Extractor v2** | Built with LangGraph + Gradio + HuggingFace
435
-
436
- *A semi-automated co-pilot for design system recovery and modernization.*
437
-
438
- **Models:** Microsoft Phi (Normalizer) • Meta Llama (Advisor) • Mistral Codestral (Generator)
439
- """)
440
-
441
- return app
442
-
443
-
444
- # =============================================================================
445
- # MAIN
446
- # =============================================================================
447
-
448
- if __name__ == "__main__":
449
- app = create_ui()
450
- app.launch(
451
- server_name="0.0.0.0",
452
- server_port=7860,
453
- )