sheikhcoders commited on
Commit
c028a11
Β·
verified Β·
1 Parent(s): 252373e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +798 -0
app.py ADDED
@@ -0,0 +1,798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hybrid Browser Automation Tool
3
+ Combines Gradio UI with FastAPI backend
4
+ """
5
+
6
+ import gradio as gr
7
+ import asyncio
8
+ import aiohttp
9
+ import json
10
+ import base64
11
+ import uuid
12
+ from typing import Dict, List, Any, Optional
13
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
14
+ from fastapi.responses import StreamingResponse
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from pydantic import BaseModel
17
+ from datetime import datetime
18
+ from selenium import webdriver
19
+ from selenium.webdriver.common.by import By
20
+ from selenium.webdriver.chrome.options import Options
21
+ import threading
22
+ import uvicorn
23
+
24
+ # ============================================================================
25
+ # FastAPI Backend
26
+ # ============================================================================
27
+
28
+ # FastAPI App Setup
29
+ api_app = FastAPI(
30
+ title="Browser Automation API",
31
+ description="Browser automation with Gradio UI and REST API",
32
+ version="2.0.0"
33
+ )
34
+
35
+ # Add CORS for Gradio integration
36
+ api_app.add_middleware(
37
+ CORSMiddleware,
38
+ allow_origins=["*"],
39
+ allow_credentials=True,
40
+ allow_methods=["*"],
41
+ allow_headers=["*"],
42
+ )
43
+
44
+ # ============================================================================
45
+ # Data Models
46
+ # ============================================================================
47
+
48
+ class NavigateRequest(BaseModel):
49
+ url: str
50
+ wait_time: Optional[int] = 2
51
+
52
+ class ClickRequest(BaseModel):
53
+ selector: str
54
+ selector_type: Optional[str] = "css"
55
+
56
+ class TypeRequest(BaseModel):
57
+ selector: str
58
+ text: str
59
+ selector_type: Optional[str] = "css"
60
+
61
+ class ExtractRequest(BaseModel):
62
+ selector: str
63
+ attribute: Optional[str] = "text"
64
+ selector_type: Optional[str] = "css"
65
+
66
+ class TaskRequest(BaseModel):
67
+ task_id: Optional[str] = None
68
+ actions: List[Dict[str, Any]]
69
+
70
+ # ============================================================================
71
+ # Browser Session Manager
72
+ # ============================================================================
73
+
74
+ class BrowserSession:
75
+ def __init__(self):
76
+ self.sessions: Dict[str, webdriver.Chrome] = {}
77
+ self.task_status: Dict[str, Dict] = {}
78
+ self.api_clients: Dict[str, Any] = {}
79
+
80
+ def create_session(self, session_id: str = None) -> str:
81
+ if not session_id:
82
+ session_id = str(uuid.uuid4())
83
+
84
+ chrome_options = Options()
85
+ chrome_options.add_argument("--headless")
86
+ chrome_options.add_argument("--no-sandbox")
87
+ chrome_options.add_argument("--disable-dev-shm-usage")
88
+ chrome_options.add_argument("--disable-gpu")
89
+
90
+ try:
91
+ driver = webdriver.Chrome(options=chrome_options)
92
+ self.sessions[session_id] = driver
93
+ return session_id
94
+ except Exception as e:
95
+ print(f"Error creating session: {e}")
96
+ raise HTTPException(status_code=500, detail=f"Failed to create browser session: {e}")
97
+
98
+ def get_session(self, session_id: str) -> webdriver.Chrome:
99
+ if session_id not in self.sessions:
100
+ raise HTTPException(status_code=404, detail="Session not found")
101
+ return self.sessions[session_id]
102
+
103
+ def close_session(self, session_id: str):
104
+ if session_id in self.sessions:
105
+ try:
106
+ self.sessions[session_id].quit()
107
+ del self.sessions[session_id]
108
+ except:
109
+ pass
110
+
111
+ def get_screenshot(self, session_id: str) -> str:
112
+ driver = self.get_session(session_id)
113
+ try:
114
+ screenshot = driver.get_screenshot_as_png()
115
+ return base64.b64encode(screenshot).decode()
116
+ except Exception as e:
117
+ raise HTTPException(status_code=500, detail=f"Failed to capture screenshot: {e}")
118
+
119
+ def list_sessions(self) -> List[Dict]:
120
+ """List all active sessions"""
121
+ session_list = []
122
+ for session_id, driver in self.sessions.items():
123
+ try:
124
+ session_info = {
125
+ "session_id": session_id,
126
+ "url": driver.current_url,
127
+ "title": driver.title,
128
+ "created_at": getattr(driver, 'created_at', 'Unknown')
129
+ }
130
+ session_list.append(session_info)
131
+ except:
132
+ session_list.append({
133
+ "session_id": session_id,
134
+ "url": "Error getting URL",
135
+ "title": "Error",
136
+ "created_at": "Unknown"
137
+ })
138
+ return session_list
139
+
140
+ browser_manager = BrowserSession()
141
+
142
+ # ============================================================================
143
+ # API Endpoints
144
+ # ============================================================================
145
+
146
+ @api_app.get("/")
147
+ async def root():
148
+ return {
149
+ "name": "Hybrid Browser Automation API",
150
+ "version": "2.0.0",
151
+ "features": {
152
+ "gradio_ui": "User-friendly web interface",
153
+ "rest_api": "Programmatic API access",
154
+ "sse_streaming": "Real-time updates",
155
+ "mcp_server": "AI agent integration",
156
+ "session_management": "Multiple concurrent sessions"
157
+ },
158
+ "endpoints": {
159
+ "sessions": "/sessions",
160
+ "mcp": "/mcp",
161
+ "sse": "/stream/{session_id}",
162
+ "tasks": "/tasks",
163
+ "health": "/health"
164
+ }
165
+ }
166
+
167
+ @api_app.get("/health")
168
+ async def health_check():
169
+ return {
170
+ "status": "healthy",
171
+ "active_sessions": len(browser_manager.sessions),
172
+ "active_tasks": len(browser_manager.task_status),
173
+ "timestamp": datetime.utcnow().isoformat()
174
+ }
175
+
176
+ @api_app.post("/sessions/create")
177
+ async def create_session():
178
+ session_id = browser_manager.create_session()
179
+ return {
180
+ "session_id": session_id,
181
+ "created_at": datetime.utcnow().isoformat()
182
+ }
183
+
184
+ @api_app.get("/sessions")
185
+ async def list_sessions():
186
+ return {
187
+ "sessions": browser_manager.list_sessions(),
188
+ "count": len(browser_manager.sessions)
189
+ }
190
+
191
+ @api_app.post("/sessions/{session_id}/navigate")
192
+ async def navigate(session_id: str, request: NavigateRequest):
193
+ driver = browser_manager.get_session(session_id)
194
+ driver.get(request.url)
195
+ await asyncio.sleep(request.wait_time)
196
+
197
+ return {
198
+ "status": "success",
199
+ "url": driver.current_url,
200
+ "title": driver.title
201
+ }
202
+
203
+ @api_app.post("/sessions/{session_id}/click")
204
+ async def click_element(session_id: str, request: ClickRequest):
205
+ driver = browser_manager.get_session(session_id)
206
+
207
+ by_type = {
208
+ "css": By.CSS_SELECTOR,
209
+ "xpath": By.XPATH,
210
+ "id": By.ID,
211
+ "class": By.CLASS_NAME
212
+ }
213
+
214
+ try:
215
+ element = driver.find_element(by_type[request.selector_type], request.selector)
216
+ element.click()
217
+ return {"status": "success", "selector": request.selector}
218
+ except Exception as e:
219
+ return {"status": "error", "error": str(e)}
220
+
221
+ @api_app.post("/sessions/{session_id}/type")
222
+ async def type_text(session_id: str, request: TypeRequest):
223
+ driver = browser_manager.get_session(session_id)
224
+
225
+ by_type = {
226
+ "css": By.CSS_SELECTOR,
227
+ "xpath": By.XPATH,
228
+ "id": By.ID,
229
+ "class": By.CLASS_NAME
230
+ }
231
+
232
+ try:
233
+ element = driver.find_element(by_type[request.selector_type], request.selector)
234
+ element.clear()
235
+ element.send_keys(request.text)
236
+ return {"status": "success", "text": request.text}
237
+ except Exception as e:
238
+ return {"status": "error", "error": str(e)}
239
+
240
+ @api_app.post("/sessions/{session_id}/extract")
241
+ async def extract_data(session_id: str, request: ExtractRequest):
242
+ driver = browser_manager.get_session(session_id)
243
+
244
+ by_type = {
245
+ "css": By.CSS_SELECTOR,
246
+ "xpath": By.XPATH,
247
+ "id": By.ID,
248
+ "class": By.CLASS_NAME
249
+ }
250
+
251
+ try:
252
+ elements = driver.find_elements(by_type[request.selector_type], request.selector)
253
+ data = []
254
+ for elem in elements[:50]: # Limit to 50 elements
255
+ if request.attribute == "text":
256
+ data.append(elem.text)
257
+ else:
258
+ data.append(elem.get_attribute(request.attribute))
259
+
260
+ return {"status": "success", "data": data, "count": len(data)}
261
+ except Exception as e:
262
+ return {"status": "error", "error": str(e)}
263
+
264
+ @api_app.get("/sessions/{session_id}/screenshot")
265
+ async def get_screenshot(session_id: str):
266
+ try:
267
+ screenshot = browser_manager.get_screenshot(session_id)
268
+ return {"status": "success", "screenshot": screenshot}
269
+ except Exception as e:
270
+ return {"status": "error", "error": str(e)}
271
+
272
+ @api_app.delete("/sessions/{session_id}")
273
+ async def close_session(session_id: str):
274
+ browser_manager.close_session(session_id)
275
+ return {"status": "success", "message": "Session closed"}
276
+
277
+ # ============================================================================
278
+ # SSE Streaming
279
+ # ============================================================================
280
+
281
+ @api_app.get("/stream/{session_id}")
282
+ async def stream_events(session_id: str):
283
+ async def event_generator():
284
+ try:
285
+ driver = browser_manager.get_session(session_id)
286
+
287
+ while session_id in browser_manager.sessions:
288
+ try:
289
+ event_data = {
290
+ "timestamp": datetime.utcnow().isoformat(),
291
+ "url": driver.current_url,
292
+ "title": driver.title,
293
+ "session_id": session_id
294
+ }
295
+
296
+ yield f"data: {json.dumps(event_data)}\n\n"
297
+ await asyncio.sleep(2)
298
+
299
+ except Exception as e:
300
+ error_data = {
301
+ "error": str(e),
302
+ "timestamp": datetime.utcnow().isoformat()
303
+ }
304
+ yield f"data: {json.dumps(error_data)}\n\n"
305
+ break
306
+
307
+ yield f"data: {json.dumps({'status': 'closed', 'session_id': session_id})}\n\n"
308
+
309
+ except Exception as e:
310
+ yield f"data: {json.dumps({'error': f'Stream error: {e}'})}\n\n"
311
+
312
+ return StreamingResponse(
313
+ event_generator(),
314
+ media_type="text/event-stream",
315
+ headers={
316
+ "Cache-Control": "no-cache",
317
+ "Connection": "keep-alive",
318
+ }
319
+ )
320
+
321
+ # ============================================================================
322
+ # Task Execution
323
+ # ============================================================================
324
+
325
+ @api_app.post("/tasks/execute")
326
+ async def execute_task(request: TaskRequest, background_tasks: BackgroundTasks):
327
+ task_id = request.task_id or str(uuid.uuid4())
328
+ session_id = browser_manager.create_session()
329
+
330
+ browser_manager.task_status[task_id] = {
331
+ "status": "running",
332
+ "session_id": session_id,
333
+ "progress": 0,
334
+ "current_step": 0,
335
+ "total_steps": len(request.actions),
336
+ "started_at": datetime.utcnow().isoformat()
337
+ }
338
+
339
+ async def run_task():
340
+ driver = browser_manager.get_session(session_id)
341
+
342
+ try:
343
+ for i, action in enumerate(request.actions):
344
+ browser_manager.task_status[task_id]["current_step"] = i + 1
345
+ browser_manager.task_status[task_id]["progress"] = (i + 1) / len(request.actions) * 100
346
+
347
+ action_type = action.get("type")
348
+
349
+ if action_type == "navigate":
350
+ driver.get(action["url"])
351
+ elif action_type == "click":
352
+ elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
353
+ elem.click()
354
+ elif action_type == "type":
355
+ elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
356
+ elem.send_keys(action["text"])
357
+ elif action_type == "wait":
358
+ await asyncio.sleep(action.get("seconds", 1))
359
+
360
+ await asyncio.sleep(0.5)
361
+
362
+ browser_manager.task_status[task_id]["status"] = "completed"
363
+ browser_manager.task_status[task_id]["completed_at"] = datetime.utcnow().isoformat()
364
+
365
+ except Exception as e:
366
+ browser_manager.task_status[task_id]["status"] = "error"
367
+ browser_manager.task_status[task_id]["error"] = str(e)
368
+ browser_manager.task_status[task_id]["error_at"] = datetime.utcnow().isoformat()
369
+
370
+ background_tasks.add_task(run_task)
371
+
372
+ return {
373
+ "task_id": task_id,
374
+ "session_id": session_id,
375
+ "status": "started"
376
+ }
377
+
378
+ @api_app.get("/tasks/{task_id}/status")
379
+ async def get_task_status(task_id: str):
380
+ if task_id not in browser_manager.task_status:
381
+ raise HTTPException(status_code=404, detail="Task not found")
382
+ return browser_manager.task_status[task_id]
383
+
384
+ @api_app.get("/tasks/{task_id}/stream")
385
+ async def stream_task_progress(task_id: str):
386
+ async def progress_generator():
387
+ while True:
388
+ if task_id not in browser_manager.task_status:
389
+ yield f"data: {json.dumps({'error': 'Task not found'})}\n\n"
390
+ break
391
+
392
+ status = browser_manager.task_status[task_id]
393
+ yield f"data: {json.dumps(status)}\n\n"
394
+
395
+ if status["status"] in ["completed", "error"]:
396
+ break
397
+
398
+ await asyncio.sleep(0.5)
399
+
400
+ return StreamingResponse(
401
+ progress_generator(),
402
+ media_type="text/event-stream"
403
+ )
404
+
405
+ # ============================================================================
406
+ # Gradio Frontend
407
+ # ============================================================================
408
+
409
+ class BrowserAPIClient:
410
+ def __init__(self, base_url: str = "http://localhost:8000"):
411
+ self.base_url = base_url
412
+
413
+ def make_request(self, method: str, endpoint: str, data: dict = None) -> dict:
414
+ import requests
415
+
416
+ url = f"{self.base_url}{endpoint}"
417
+ try:
418
+ if method == "GET":
419
+ response = requests.get(url)
420
+ elif method == "POST":
421
+ response = requests.post(url, json=data)
422
+ elif method == "DELETE":
423
+ response = requests.delete(url)
424
+ else:
425
+ raise ValueError(f"Unsupported method: {method}")
426
+
427
+ return response.json()
428
+ except Exception as e:
429
+ return {"error": str(e)}
430
+
431
+ # Global API client
432
+ api_client = BrowserAPIClient()
433
+
434
+ def create_new_session():
435
+ """Create a new browser session"""
436
+ result = api_client.make_request("POST", "/sessions/create")
437
+ if "error" in result:
438
+ return f"❌ Error: {result['error']}", None, None, None, None, None
439
+
440
+ session_id = result["session_id"]
441
+ return f"βœ… Session created: {session_id}", session_id, None, None, None, None
442
+
443
+ def navigate_to_url(url, wait_time, session_id):
444
+ """Navigate to a URL"""
445
+ if not session_id:
446
+ return "❌ Please create a session first", None, None, None, None, None
447
+
448
+ data = {"url": url, "wait_time": wait_time}
449
+ result = api_client.make_request("POST", f"/sessions/{session_id}/navigate", data)
450
+
451
+ if "error" in result:
452
+ return f"❌ Error: {result['error']}", None, None, None, None, None
453
+
454
+ return (
455
+ f"βœ… Navigated to: {result['url']} | Title: {result['title']}",
456
+ session_id,
457
+ result["url"],
458
+ result["title"],
459
+ None,
460
+ None
461
+ )
462
+
463
+ def extract_data_from_page(selector, attribute, session_id):
464
+ """Extract data from the page"""
465
+ if not session_id:
466
+ return "❌ Please create a session first", None, None, None, None, None
467
+
468
+ data = {"selector": selector, "attribute": attribute}
469
+ result = api_client.make_request("POST", f"/sessions/{session_id}/extract", data)
470
+
471
+ if "error" in result:
472
+ return f"❌ Error: {result['error']}", None, None, None, None, None
473
+
474
+ extracted_text = "\n".join([f"β€’ {item}" for item in result["data"][:20]])
475
+ if result["count"] > 20:
476
+ extracted_text += f"\n... and {result['count'] - 20} more items"
477
+
478
+ return (
479
+ f"βœ… Extracted {result['count']} items from selector: {selector}",
480
+ session_id,
481
+ None,
482
+ None,
483
+ extracted_text,
484
+ None
485
+ )
486
+
487
+ def take_screenshot(session_id):
488
+ """Take a screenshot of the current page"""
489
+ if not session_id:
490
+ return "❌ Please create a session first", None, None, None, None, None
491
+
492
+ result = api_client.make_request("GET", f"/sessions/{session_id}/screenshot")
493
+
494
+ if "error" in result:
495
+ return f"❌ Error: {result['error']}", None, None, None, None, None
496
+
497
+ return (
498
+ f"βœ… Screenshot captured",
499
+ session_id,
500
+ None,
501
+ None,
502
+ None,
503
+ result["screenshot"]
504
+ )
505
+
506
+ def execute_task(actions_json, task_id):
507
+ """Execute a multi-step task"""
508
+ try:
509
+ actions = json.loads(actions_json)
510
+ except json.JSONDecodeError:
511
+ return "❌ Invalid JSON format for actions", None, None, None, None, None
512
+
513
+ data = {"actions": actions, "task_id": task_id or None}
514
+ result = api_client.make_request("POST", "/tasks/execute", data)
515
+
516
+ if "error" in result:
517
+ return f"❌ Error: {result['error']}", None, None, None, None, None
518
+
519
+ task_id = result["task_id"]
520
+ return (
521
+ f"βœ… Task started: {task_id}\n\nUse the task status to monitor progress.",
522
+ result["session_id"],
523
+ None,
524
+ None,
525
+ f"Task ID: {task_id}\nSession ID: {result['session_id']}\n\nNote: Monitor progress via API endpoint /tasks/{task_id}/stream",
526
+ None
527
+ )
528
+
529
+ def close_current_session(session_id):
530
+ """Close the current session"""
531
+ if not session_id:
532
+ return "❌ No active session to close", None, None, None, None, None
533
+
534
+ result = api_client.make_request("DELETE", f"/sessions/{session_id}")
535
+ return "βœ… Session closed successfully", None, None, None, None, None
536
+
537
+ # ============================================================================
538
+ # Gradio Interface
539
+ # ============================================================================
540
+
541
+ def create_gradio_app():
542
+ with gr.Blocks(title="🌐 Browser Automation Tool", theme=gr.themes.Soft()) as demo:
543
+ gr.Markdown("# 🌐 Browser Automation Tool")
544
+ gr.Markdown("**Hybrid Platform: Gradio UI + FastAPI Backend + REST API + SSE + MCP**")
545
+
546
+ with gr.Tab("πŸ”§ Session Management"):
547
+ gr.Markdown("### Browser Session Control")
548
+
549
+ with gr.Row():
550
+ create_btn = gr.Button("πŸ†• Create New Session", variant="primary")
551
+ close_btn = gr.Button("❌ Close Current Session", variant="secondary")
552
+
553
+ session_info = gr.Textbox(label="Session Status", lines=3, max_lines=10)
554
+ session_id_state = gr.State()
555
+ current_url_state = gr.State()
556
+ current_title_state = gr.State()
557
+ extracted_data_state = gr.State()
558
+ screenshot_state = gr.State()
559
+
560
+ create_btn.click(
561
+ fn=create_new_session,
562
+ outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
563
+ )
564
+
565
+ close_btn.click(
566
+ fn=close_current_session,
567
+ inputs=[session_id_state],
568
+ outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
569
+ )
570
+
571
+ with gr.Tab("🌐 Navigation & Actions"):
572
+ gr.Markdown("### Navigate and Interact")
573
+
574
+ with gr.Row():
575
+ url_input = gr.Textbox(
576
+ label="URL",
577
+ placeholder="https://example.com",
578
+ scale=3
579
+ )
580
+ wait_time = gr.Slider(1, 10, value=3, label="Wait Time (seconds)", scale=1)
581
+ navigate_btn = gr.Button("πŸš€ Navigate", variant="primary", scale=1)
582
+
583
+ with gr.Row():
584
+ selector_input = gr.Textbox(
585
+ label="CSS Selector",
586
+ placeholder=".example-class or #element-id",
587
+ scale=3
588
+ )
589
+ attribute_dropdown = gr.Dropdown(
590
+ ["text", "href", "src", "value", "innerHTML"],
591
+ value="text",
592
+ label="Extract Attribute",
593
+ scale=1
594
+ )
595
+ extract_btn = gr.Button("πŸ” Extract Data", variant="secondary", scale=1)
596
+
597
+ screenshot_btn = gr.Button("πŸ“Έ Take Screenshot", variant="secondary")
598
+
599
+ # Status outputs
600
+ status_output = gr.Textbox(label="Status", lines=3)
601
+ current_url_display = gr.Textbox(label="Current URL", lines=2)
602
+ current_title_display = gr.Textbox(label="Page Title", lines=2)
603
+ extracted_display = gr.Textbox(label="Extracted Data", lines=8)
604
+ screenshot_display = gr.Image(label="Screenshot", visible=False)
605
+
606
+ navigate_btn.click(
607
+ fn=navigate_to_url,
608
+ inputs=[url_input, wait_time, session_id_state],
609
+ outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
610
+ )
611
+
612
+ extract_btn.click(
613
+ fn=extract_data_from_page,
614
+ inputs=[selector_input, attribute_dropdown, session_id_state],
615
+ outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
616
+ )
617
+
618
+ screenshot_btn.click(
619
+ fn=take_screenshot,
620
+ inputs=[session_id_state],
621
+ outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_display]
622
+ )
623
+
624
+ with gr.Tab("⚑ Task Execution"):
625
+ gr.Markdown("### Multi-Step Task Automation")
626
+ gr.Markdown("**Enter actions as JSON array:**")
627
+ gr.Markdown("""
628
+ ```json
629
+ [
630
+ {"type": "navigate", "url": "https://example.com"},
631
+ {"type": "wait", "seconds": 2},
632
+ {"type": "click", "selector": ".button-class"},
633
+ {"type": "type", "selector": "input[name='search']", "text": "search term"}
634
+ ]
635
+ ```
636
+ """)
637
+
638
+ task_actions = gr.Textbox(
639
+ label="Task Actions (JSON)",
640
+ lines=10,
641
+ placeholder='[{"type": "navigate", "url": "https://example.com"}]',
642
+ value='[{"type": "navigate", "url": "https://example.com"}, {"type": "wait", "seconds": 2}]'
643
+ )
644
+
645
+ task_id_input = gr.Textbox(
646
+ label="Custom Task ID (optional)",
647
+ placeholder="Leave empty for auto-generated"
648
+ )
649
+
650
+ execute_task_btn = gr.Button("⚑ Execute Task", variant="primary")
651
+
652
+ task_status = gr.Textbox(label="Task Status", lines=5)
653
+ task_session_id = gr.Textbox(label="Task Session ID", lines=1)
654
+
655
+ with gr.Tab("πŸ“Š API Information"):
656
+ gr.Markdown("### REST API Endpoints")
657
+
658
+ gr.Markdown("""
659
+ **Base URL:** `http://localhost:8000`
660
+
661
+ **Key Endpoints:**
662
+ - `POST /sessions/create` - Create new session
663
+ - `POST /sessions/{id}/navigate` - Navigate to URL
664
+ - `POST /sessions/{id}/extract` - Extract data
665
+ - `GET /sessions/{id}/screenshot` - Get screenshot
666
+ - `POST /tasks/execute` - Execute task
667
+ - `GET /stream/{id}` - SSE stream (real-time updates)
668
+ - `GET /health` - Health check
669
+ """)
670
+
671
+ gr.Markdown("### MCP Server")
672
+ gr.Markdown("""
673
+ **Tools available:**
674
+ - `browser_navigate` - Navigate to URL
675
+ - `browser_click` - Click element
676
+ - `browser_extract` - Extract data
677
+ - `browser_screenshot` - Capture screenshot
678
+ """)
679
+
680
+ gr.Markdown("### Usage Examples")
681
+ gr.Markdown("""
682
+ **Python Client:**
683
+ ```python
684
+ import requests
685
+ response = requests.post("http://localhost:8000/sessions/create")
686
+ session_id = response.json()["session_id"]
687
+
688
+ response = requests.post(
689
+ f"http://localhost:8000/sessions/{session_id}/navigate",
690
+ json={"url": "https://example.com"}
691
+ )
692
+ ```
693
+ """)
694
+
695
+ with gr.Tab("🎯 Advanced Features"):
696
+ gr.Markdown("### Advanced Capabilities")
697
+
698
+ gr.Markdown("""
699
+ **βœ… Multi-Session Management**
700
+ - Create and manage multiple browser sessions simultaneously
701
+ - Each session runs independently
702
+
703
+ **βœ… Real-time Streaming (SSE)**
704
+ - Live updates of browser state
705
+ - Task progress monitoring
706
+
707
+ **βœ… JavaScript Execution**
708
+ - Execute custom scripts via REST API
709
+ - Access browser internals programmatically
710
+
711
+ **βœ… MCP Integration**
712
+ - AI agent integration via Model Context Protocol
713
+ - Natural language browser control
714
+
715
+ **βœ… Production Ready**
716
+ - FastAPI backend with automatic OpenAPI docs
717
+ - CORS enabled for web integration
718
+ - Health monitoring and error handling
719
+ """)
720
+
721
+ gr.Markdown("### System Information")
722
+ with gr.Row():
723
+ with gr.Column():
724
+ gr.Markdown("**API Health:** `GET /health`")
725
+ health_status = gr.Textbox("Click refresh to check", interactive=False)
726
+ refresh_btn = gr.Button("πŸ”„ Refresh Health")
727
+ with gr.Column():
728
+ gr.Markdown("**Active Sessions:** Count from API")
729
+ active_sessions = gr.Textbox("N/A", interactive=False)
730
+
731
+ def check_health():
732
+ try:
733
+ import requests
734
+ response = requests.get("http://localhost:8000/health")
735
+ data = response.json()
736
+ return f"Status: {data.get('status', 'Unknown')}\nActive Sessions: {data.get('active_sessions', 0)}\nActive Tasks: {data.get('active_tasks', 0)}"
737
+ except:
738
+ return "❌ API not available - Make sure the API server is running on port 8000"
739
+
740
+ refresh_btn.click(
741
+ fn=check_health,
742
+ outputs=[health_status]
743
+ )
744
+
745
+ # Initialize with session info
746
+ demo.load(
747
+ fn=lambda: "Browser Automation Tool ready! Create a session to start.",
748
+ outputs=[session_info]
749
+ )
750
+
751
+ return demo
752
+
753
+ # ============================================================================
754
+ # Main Application
755
+ # ============================================================================
756
+
757
+ def start_api_server():
758
+ """Start the FastAPI server on port 8000"""
759
+ uvicorn.run(
760
+ api_app,
761
+ host="0.0.0.0",
762
+ port=8000,
763
+ log_level="info"
764
+ )
765
+
766
+ def main():
767
+ """Main application entry point"""
768
+ import threading
769
+ import time
770
+
771
+ print("πŸš€ Starting Browser Automation Tool...")
772
+ print("=" * 50)
773
+
774
+ # Start API server in background
775
+ print("πŸ”§ Starting FastAPI server on port 8000...")
776
+ api_thread = threading.Thread(target=start_api_server, daemon=True)
777
+ api_thread.start()
778
+
779
+ # Wait for API to start
780
+ time.sleep(3)
781
+
782
+ # Create and launch Gradio app
783
+ print("🎨 Starting Gradio UI on port 7860...")
784
+ print("πŸ“Š API documentation available at: http://localhost:8000/docs")
785
+ print("🌐 Gradio interface will open at: http://localhost:7860")
786
+ print("=" * 50)
787
+
788
+ demo = create_gradio_app()
789
+ demo.launch(
790
+ server_name="0.0.0.0",
791
+ server_port=7860,
792
+ share=False,
793
+ show_error=True,
794
+ debug=False
795
+ )
796
+
797
+ if __name__ == "__main__":
798
+ main()