simoncck commited on
Commit
4e5febb
·
verified ·
1 Parent(s): 6492bd4

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +398 -0
  2. browser_automation_ui.html +270 -0
  3. requirements.txt +39 -0
app.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ import uuid
8
+ from datetime import datetime
9
+ from io import BytesIO
10
+ from typing import Dict, List, Optional, Any
11
+ from contextlib import asynccontextmanager
12
+
13
+ import gradio as gr
14
+ import uvicorn
15
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from fastapi.responses import JSONResponse
18
+ from pydantic import BaseModel
19
+ from playwright.async_api import async_playwright, Browser, BrowserContext, Page
20
+ from selenium import webdriver
21
+ from selenium.webdriver.chrome.options import Options
22
+ from selenium.webdriver.chrome.service import Service
23
+ from selenium.webdriver.common.by import By
24
+ from selenium.webdriver.support.ui import WebDriverWait
25
+ from selenium.webdriver.support import expected_conditions as EC
26
+ from selenium.common.exceptions import TimeoutException, WebDriverException
27
+ from bs4 import BeautifulSoup
28
+ from PIL import Image
29
+
30
+ # Configure logging
31
+ logging.basicConfig(level=logging.INFO)
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Global browser instances
35
+ browser_instances: Dict[str, Dict] = {}
36
+ playwright_instance = None
37
+ browser_pool = None
38
+
39
+ # Pydantic models
40
+ class BrowserLaunchRequest(BaseModel):
41
+ headless: bool = True
42
+ width: int = 1920
43
+ height: int = 1080
44
+ user_agent: Optional[str] = None
45
+
46
+ class NavigateRequest(BaseModel):
47
+ session_id: str
48
+ url: str
49
+ wait_until: str = "networkidle"
50
+
51
+ class ScreenshotRequest(BaseModel):
52
+ session_id: str
53
+ full_page: bool = False
54
+ selector: Optional[str] = None
55
+
56
+ class ElementActionRequest(BaseModel):
57
+ session_id: str
58
+ selector: str
59
+ action: str # click, type, scroll, hover
60
+ value: Optional[str] = None
61
+
62
+ class ScrapeRequest(BaseModel):
63
+ session_id: str
64
+ selectors: Optional[List[str]] = None
65
+ extract_all: bool = False
66
+
67
+ class AIExtractionRequest(BaseModel):
68
+ session_id: str
69
+ prompt: str
70
+ target_elements: Optional[List[str]] = None
71
+
72
+ # Global lifespan manager
73
+ @asynccontextmanager
74
+ async def lifespan(app: FastAPI):
75
+ global playwright_instance, browser_pool
76
+
77
+ # Startup
78
+ playwright_instance = await async_playwright().start()
79
+ browser_pool = await playwright_instance.chromium.launch(
80
+ headless=True,
81
+ args=['--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu']
82
+ )
83
+ logger.info("Browser pool initialized")
84
+
85
+ yield
86
+
87
+ # Shutdown
88
+ if browser_pool:
89
+ await browser_pool.close()
90
+ if playwright_instance:
91
+ await playwright_instance.stop()
92
+ logger.info("Browser instances cleaned up")
93
+
94
+ # Initialize FastAPI app
95
+ app = FastAPI(
96
+ title="Web Scraping API Service",
97
+ description="Headless browser automation with Playwright and Selenium",
98
+ version="1.0.0",
99
+ lifespan=lifespan
100
+ )
101
+
102
+ app.add_middleware(
103
+ CORSMiddleware,
104
+ allow_origins=["*"],
105
+ allow_credentials=True,
106
+ allow_methods=["*"],
107
+ allow_headers=["*"],
108
+ )
109
+
110
+ # Utility functions
111
+ def get_chrome_options():
112
+ """Get Chrome options for Selenium"""
113
+ options = Options()
114
+ options.add_argument('--headless')
115
+ options.add_argument('--no-sandbox')
116
+ options.add_argument('--disable-dev-shm-usage')
117
+ options.add_argument('--disable-gpu')
118
+ options.add_argument('--window-size=1920,1080')
119
+ return options
120
+
121
+ def cleanup_old_sessions():
122
+ """Clean up sessions older than 1 hour"""
123
+ current_time = time.time()
124
+ expired_sessions = []
125
+
126
+ for session_id, session_data in browser_instances.items():
127
+ if current_time - session_data.get('created_at', 0) > 3600: # 1 hour
128
+ expired_sessions.append(session_id)
129
+
130
+ for session_id in expired_sessions:
131
+ asyncio.create_task(close_browser_session(session_id))
132
+
133
+ async def close_browser_session(session_id: str):
134
+ """Close a specific browser session"""
135
+ if session_id in browser_instances:
136
+ session = browser_instances[session_id]
137
+
138
+ # Close Playwright session
139
+ if 'playwright_page' in session:
140
+ try:
141
+ await session['playwright_page'].close()
142
+ await session['playwright_context'].close()
143
+ except Exception as e:
144
+ logger.error(f"Error closing Playwright session {session_id}: {e}")
145
+
146
+ # Close Selenium session
147
+ if 'selenium_driver' in session:
148
+ try:
149
+ session['selenium_driver'].quit()
150
+ except Exception as e:
151
+ logger.error(f"Error closing Selenium session {session_id}: {e}")
152
+
153
+ del browser_instances[session_id]
154
+ logger.info(f"Closed browser session: {session_id}")
155
+
156
+ # API Endpoints
157
+
158
+ @app.get("/health")
159
+ async def health_check():
160
+ """Health check endpoint"""
161
+ return {"status": "healthy", "timestamp": datetime.now().isoformat()}
162
+
163
+ @app.post("/api/browser/launch")
164
+ async def launch_browser(request: BrowserLaunchRequest):
165
+ """Launch a new browser instance"""
166
+ session_id = str(uuid.uuid4())
167
+
168
+ try:
169
+ # Launch Playwright browser
170
+ context = await browser_pool.new_context(
171
+ viewport={'width': request.width, 'height': request.height},
172
+ user_agent=request.user_agent
173
+ )
174
+ page = await context.new_page()
175
+
176
+ # Launch Selenium browser
177
+ chrome_options = get_chrome_options()
178
+ if request.user_agent:
179
+ chrome_options.add_argument(f'--user-agent={request.user_agent}')
180
+
181
+ selenium_driver = webdriver.Chrome(options=chrome_options)
182
+ selenium_driver.set_window_size(request.width, request.height)
183
+
184
+ # Store session
185
+ browser_instances[session_id] = {
186
+ 'playwright_context': context,
187
+ 'playwright_page': page,
188
+ 'selenium_driver': selenium_driver,
189
+ 'created_at': time.time(),
190
+ 'config': request.dict()
191
+ }
192
+
193
+ logger.info(f"Launched browser session: {session_id}")
194
+ return {"session_id": session_id, "status": "launched"}
195
+
196
+ except Exception as e:
197
+ logger.error(f"Error launching browser: {e}")
198
+ raise HTTPException(status_code=500, detail=str(e))
199
+
200
+ @app.post("/api/browser/navigate")
201
+ async def navigate_to_url(request: NavigateRequest):
202
+ """Navigate to a URL"""
203
+ if request.session_id not in browser_instances:
204
+ raise HTTPException(status_code=404, detail="Session not found")
205
+
206
+ session = browser_instances[request.session_id]
207
+
208
+ try:
209
+ # Navigate with Playwright
210
+ await session['playwright_page'].goto(request.url, wait_until=request.wait_until)
211
+
212
+ # Navigate with Selenium
213
+ session['selenium_driver'].get(request.url)
214
+
215
+ return {"status": "navigated", "url": request.url}
216
+
217
+ except Exception as e:
218
+ logger.error(f"Error navigating to {request.url}: {e}")
219
+ raise HTTPException(status_code=500, detail=str(e))
220
+
221
+ @app.post("/api/browser/screenshot")
222
+ async def take_screenshot(request: ScreenshotRequest):
223
+ """Take a screenshot"""
224
+ if request.session_id not in browser_instances:
225
+ raise HTTPException(status_code=404, detail="Session not found")
226
+
227
+ session = browser_instances[request.session_id]
228
+
229
+ try:
230
+ if request.selector:
231
+ # Screenshot specific element with Playwright
232
+ element = await session['playwright_page'].locator(request.selector).first
233
+ screenshot_bytes = await element.screenshot()
234
+ else:
235
+ # Full page screenshot with Playwright
236
+ screenshot_bytes = await session['playwright_page'].screenshot(
237
+ full_page=request.full_page
238
+ )
239
+
240
+ # Convert to base64
241
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
242
+
243
+ return {
244
+ "screenshot": screenshot_b64,
245
+ "format": "png",
246
+ "timestamp": datetime.now().isoformat()
247
+ }
248
+
249
+ except Exception as e:
250
+ logger.error(f"Error taking screenshot: {e}")
251
+ raise HTTPException(status_code=500, detail=str(e))
252
+
253
+ @app.post("/api/elements/action")
254
+ async def perform_element_action(request: ElementActionRequest):
255
+ """Perform action on an element"""
256
+ if request.session_id not in browser_instances:
257
+ raise HTTPException(status_code=404, detail="Session not found")
258
+
259
+ session = browser_instances[request.session_id]
260
+
261
+ try:
262
+ page = session['playwright_page']
263
+ element = page.locator(request.selector).first
264
+
265
+ if request.action == "click":
266
+ await element.click()
267
+ elif request.action == "type":
268
+ await element.fill(request.value or "")
269
+ elif request.action == "scroll":
270
+ await element.scroll_into_view_if_needed()
271
+ elif request.action == "hover":
272
+ await element.hover()
273
+ else:
274
+ raise HTTPException(status_code=400, detail="Invalid action")
275
+
276
+ return {"status": "completed", "action": request.action}
277
+
278
+ except Exception as e:
279
+ logger.error(f"Error performing action {request.action}: {e}")
280
+ raise HTTPException(status_code=500, detail=str(e))
281
+
282
+ @app.get("/api/elements/inspect/{session_id}")
283
+ async def inspect_page_elements(session_id: str):
284
+ """Get all interactive elements on the page"""
285
+ if session_id not in browser_instances:
286
+ raise HTTPException(status_code=404, detail="Session not found")
287
+
288
+ session = browser_instances[session_id]
289
+
290
+ try:
291
+ page = session['playwright_page']
292
+
293
+ # Get page content
294
+ content = await page.content()
295
+ soup = BeautifulSoup(content, 'html.parser')
296
+
297
+ # Find interactive elements
298
+ interactive_selectors = [
299
+ 'a', 'button', 'input', 'select', 'textarea',
300
+ '[onclick]', '[href]', '[role="button"]'
301
+ ]
302
+
303
+ elements = []
304
+ for selector in interactive_selectors:
305
+ found_elements = soup.select(selector)
306
+ for i, elem in enumerate(found_elements):
307
+ element_info = {
308
+ 'tag': elem.name,
309
+ 'selector': f"{selector}:nth-of-type({i+1})",
310
+ 'text': elem.get_text(strip=True)[:100],
311
+ 'attributes': dict(elem.attrs),
312
+ 'type': elem.get('type', 'N/A')
313
+ }
314
+ elements.append(element_info)
315
+
316
+ return {"elements": elements, "total_count": len(elements)}
317
+
318
+ except Exception as e:
319
+ logger.error(f"Error inspecting elements: {e}")
320
+ raise HTTPException(status_code=500, detail=str(e))
321
+
322
+ @app.post("/api/scrape/content")
323
+ async def scrape_content(request: ScrapeRequest):
324
+ """Scrape content from the page"""
325
+ if request.session_id not in browser_instances:
326
+ raise HTTPException(status_code=404, detail="Session not found")
327
+
328
+ session = browser_instances[request.session_id]
329
+
330
+ try:
331
+ page = session['playwright_page']
332
+ content = await page.content()
333
+ soup = BeautifulSoup(content, 'html.parser')
334
+
335
+ scraped_data = {}
336
+
337
+ if request.extract_all:
338
+ # Extract common elements
339
+ scraped_data = {
340
+ 'title': soup.title.string if soup.title else None,
341
+ 'headings': [h.get_text(strip=True) for h in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])],
342
+ 'paragraphs': [p.get_text(strip=True) for p in soup.find_all('p')],
343
+ 'links': [{'text': a.get_text(strip=True), 'href': a.get('href')} for a in soup.find_all('a', href=True)],
344
+ 'images': [{'src': img.get('src'), 'alt': img.get('alt')} for img in soup.find_all('img')],
345
+ 'forms': [{'action': form.get('action'), 'method': form.get('method')} for form in soup.find_all('form')]
346
+ }
347
+ elif request.selectors:
348
+ # Extract specific selectors
349
+ for selector in request.selectors:
350
+ elements = soup.select(selector)
351
+ scraped_data[selector] = [elem.get_text(strip=True) for elem in elements]
352
+
353
+ return {"data": scraped_data, "timestamp": datetime.now().isoformat()}
354
+
355
+ except Exception as e:
356
+ logger.error(f"Error scraping content: {e}")
357
+ raise HTTPException(status_code=500, detail=str(e))
358
+
359
+ @app.delete("/api/browser/close/{session_id}")
360
+ async def close_browser(session_id: str):
361
+ """Close a browser session"""
362
+ if session_id not in browser_instances:
363
+ raise HTTPException(status_code=404, detail="Session not found")
364
+
365
+ await close_browser_session(session_id)
366
+ return {"status": "closed", "session_id": session_id}
367
+
368
+ @app.get("/api/sessions")
369
+ async def list_sessions():
370
+ """List all active browser sessions"""
371
+ sessions = []
372
+ for session_id, session_data in browser_instances.items():
373
+ sessions.append({
374
+ 'session_id': session_id,
375
+ 'created_at': datetime.fromtimestamp(session_data['created_at']).isoformat(),
376
+ 'config': session_data['config']
377
+ })
378
+
379
+ return {"sessions": sessions, "total_count": len(sessions)}
380
+
381
+ # Background task to cleanup old sessions
382
+ @app.on_event("startup")
383
+ async def startup_event():
384
+ async def cleanup_task():
385
+ while True:
386
+ cleanup_old_sessions()
387
+ await asyncio.sleep(300) # Clean up every 5 minutes
388
+
389
+ asyncio.create_task(cleanup_task())
390
+
391
+ if __name__ == "__main__":
392
+ uvicorn.run(
393
+ "app:app",
394
+ host="0.0.0.0",
395
+ port=7860,
396
+ reload=False,
397
+ workers=1
398
+ )
browser_automation_ui.html ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>🌐 Web Scraping Server UI</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Source+Code+Pro:wght@400;500&display=swap" rel="stylesheet">
8
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/lucide/0.263.1/lucide.min.css" rel="stylesheet">
9
+ <style>
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ body {
17
+ font-family: 'Inter', sans-serif;
18
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
19
+ min-height: 100vh;
20
+ color: #2d3748;
21
+ overflow-x: hidden;
22
+ }
23
+
24
+ .container {
25
+ max-width: 1400px;
26
+ margin: 0 auto;
27
+ padding: 20px;
28
+ }
29
+
30
+ .header {
31
+ text-align: center;
32
+ margin-bottom: 30px;
33
+ color: white;
34
+ }
35
+
36
+ .header h1 {
37
+ font-size: 2.5rem;
38
+ font-weight: 700;
39
+ margin-bottom: 10px;
40
+ text-shadow: 0 2px 4px rgba(0,0,0,0.1);
41
+ }
42
+
43
+ .header p {
44
+ font-size: 1.1rem;
45
+ opacity: 0.9;
46
+ }
47
+
48
+ .status-bar {
49
+ background: rgba(255, 255, 255, 0.95);
50
+ backdrop-filter: blur(10px);
51
+ border-radius: 12px;
52
+ padding: 16px 24px;
53
+ margin-bottom: 24px;
54
+ display: flex;
55
+ justify-content: space-between;
56
+ align-items: center;
57
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
58
+ border: 1px solid rgba(255, 255, 255, 0.2);
59
+ }
60
+
61
+ .status-item {
62
+ display: flex;
63
+ align-items: center;
64
+ gap: 8px;
65
+ }
66
+
67
+ .status-indicator {
68
+ width: 12px;
69
+ height: 12px;
70
+ border-radius: 50%;
71
+ animation: pulse 2s infinite;
72
+ }
73
+
74
+ .status-online {
75
+ background: #10b981;
76
+ }
77
+
78
+ .status-offline {
79
+ background: #ef4444;
80
+ }
81
+
82
+ @keyframes pulse {
83
+ 0%, 100% { opacity: 1; }
84
+ 50% { opacity: 0.5; }
85
+ }
86
+
87
+ .main-content {
88
+ background: rgba(255, 255, 255, 0.95);
89
+ backdrop-filter: blur(10px);
90
+ border-radius: 16px;
91
+ box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
92
+ border: 1px solid rgba(255, 255, 255, 0.2);
93
+ overflow: hidden;
94
+ }
95
+
96
+ .tab-nav {
97
+ display: flex;
98
+ background: rgba(248, 250, 252, 0.8);
99
+ border-bottom: 1px solid #e2e8f0;
100
+ }
101
+
102
+ .tab-button {
103
+ flex: 1;
104
+ padding: 16px 24px;
105
+ background: none;
106
+ border: none;
107
+ cursor: pointer;
108
+ font-family: 'Inter', sans-serif;
109
+ font-size: 1rem;
110
+ font-weight: 500;
111
+ color: #64748b;
112
+ transition: all 0.3s ease;
113
+ position: relative;
114
+ }
115
+
116
+ .tab-button.active {
117
+ color: #4f46e5;
118
+ background: rgba(79, 70, 229, 0.05);
119
+ }
120
+
121
+ .tab-button.active::after {
122
+ content: '';
123
+ position: absolute;
124
+ bottom: 0;
125
+ left: 0;
126
+ right: 0;
127
+ height: 3px;
128
+ background: #4f46e5;
129
+ border-radius: 3px 3px 0 0;
130
+ }
131
+
132
+ .tab-button:hover {
133
+ background: rgba(79, 70, 229, 0.05);
134
+ color: #4f46e5;
135
+ }
136
+
137
+ .tab-content {
138
+ padding: 32px;
139
+ min-height: 600px;
140
+ }
141
+
142
+ .tab-pane {
143
+ display: none;
144
+ }
145
+
146
+ .tab-pane.active {
147
+ display: block;
148
+ animation: fadeIn 0.3s ease;
149
+ }
150
+
151
+ @keyframes fadeIn {
152
+ from { opacity: 0; transform: translateY(10px); }
153
+ to { opacity: 1; transform: translateY(0); }
154
+ }
155
+
156
+ .api-section {
157
+ margin-bottom: 40px;
158
+ background: #f8fafc;
159
+ border-radius: 12px;
160
+ border: 1px solid #e2e8f0;
161
+ overflow: hidden;
162
+ }
163
+
164
+ .api-header {
165
+ background: linear-gradient(135deg, #4f46e5, #7c3aed);
166
+ color: white;
167
+ padding: 20px 24px;
168
+ display: flex;
169
+ align-items: center;
170
+ gap: 12px;
171
+ }
172
+
173
+ .api-header i {
174
+ font-size: 1.2rem;
175
+ }
176
+
177
+ .api-header h3 {
178
+ font-size: 1.25rem;
179
+ font-weight: 600;
180
+ }
181
+
182
+ .api-body {
183
+ padding: 24px;
184
+ }
185
+
186
+ .form-group {
187
+ margin-bottom: 20px;
188
+ }
189
+
190
+ .form-label {
191
+ display: block;
192
+ margin-bottom: 8px;
193
+ font-weight: 500;
194
+ color: #374151;
195
+ }
196
+
197
+ .form-input, .form-textarea, .form-select {
198
+ width: 100%;
199
+ padding: 12px 16px;
200
+ border: 2px solid #e5e7eb;
201
+ border-radius: 8px;
202
+ font-family: 'Inter', sans-serif;
203
+ font-size: 0.95rem;
204
+ transition: all 0.3s ease;
205
+ }
206
+
207
+ .form-input:focus, .form-textarea:focus, .form-select:focus {
208
+ outline: none;
209
+ border-color: #4f46e5;
210
+ box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1);
211
+ }
212
+
213
+ .form-textarea {
214
+ min-height: 120px;
215
+ resize: vertical;
216
+ font-family: 'Source Code Pro', monospace;
217
+ }
218
+
219
+ .btn {
220
+ padding: 12px 24px;
221
+ border: none;
222
+ border-radius: 8px;
223
+ cursor: pointer;
224
+ font-family: 'Inter', sans-serif;
225
+ font-size: 0.95rem;
226
+ font-weight: 500;
227
+ transition: all 0.3s ease;
228
+ display: inline-flex;
229
+ align-items: center;
230
+ gap: 8px;
231
+ }
232
+
233
+ .btn-primary {
234
+ background: linear-gradient(135deg, #4f46e5, #7c3aed);
235
+ color: white;
236
+ box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3);
237
+ }
238
+
239
+ .btn-primary:hover {
240
+ transform: translateY(-2px);
241
+ box-shadow: 0 6px 20px rgba(79, 70, 229, 0.4);
242
+ }
243
+
244
+ .btn-secondary {
245
+ background: #f1f5f9;
246
+ color: #475569;
247
+ border: 1px solid #e2e8f0;
248
+ }
249
+
250
+ .btn-secondary:hover {
251
+ background: #e2e8f0;
252
+ }
253
+
254
+ .response-area {
255
+ margin-top: 24px;
256
+ background: #1e293b;
257
+ border-radius: 8px;
258
+ padding: 20px;
259
+ font-family: 'Source Code Pro', monospace;
260
+ color: #e2e8f0;
261
+ white-space: pre-wrap;
262
+ max-height: 400px;
263
+ overflow-y: auto;
264
+ border: 1px solid #334155;
265
+ }
266
+
267
+ .element-inspector {
268
+ background: #f8fafc;
269
+ border-radius: 12px;
270
+ border
requirements.txt ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Web framework
2
+ fastapi==0.104.1
3
+ uvicorn[standard]==0.24.0
4
+ gradio==4.8.0
5
+
6
+ # Browser automation
7
+ playwright==1.40.0
8
+ selenium==4.15.2
9
+ webdriver-manager==4.0.1
10
+
11
+ # Web scraping and parsing
12
+ beautifulsoup4==4.12.2
13
+ lxml==4.9.3
14
+ requests==2.31.0
15
+ aiohttp==3.9.1
16
+
17
+ # Image processing
18
+ Pillow==10.1.0
19
+
20
+ # Data processing
21
+ pandas==2.1.4
22
+ numpy==1.25.2
23
+
24
+ # Async support
25
+ asyncio-throttle==1.0.2
26
+ aiofiles==23.2.1
27
+
28
+ # Utilities
29
+ python-multipart==0.0.6
30
+ python-dotenv==1.0.0
31
+ pydantic==2.5.0
32
+
33
+ # AI/ML for content understanding (optional)
34
+ openai==1.3.7
35
+ anthropic==0.7.8
36
+
37
+ # Development
38
+ pytest==7.4.3
39
+ pytest-asyncio==0.21.1