sheikhcoders commited on
Commit
39a1b59
·
verified ·
1 Parent(s): 7ccd7d9

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +132 -759
app.py CHANGED
@@ -1,798 +1,171 @@
 
1
  """
2
- Hybrid Browser Automation Tool
3
- Combines Gradio UI with FastAPI backend
4
  """
5
 
6
  import gradio as gr
7
- import asyncio
8
- import aiohttp
9
- import json
10
- import base64
11
- import uuid
12
- from typing import Dict, List, Any, Optional
13
- from fastapi import FastAPI, HTTPException, BackgroundTasks
14
- from fastapi.responses import StreamingResponse
15
- from fastapi.middleware.cors import CORSMiddleware
16
- from pydantic import BaseModel
17
- from datetime import datetime
18
  from selenium import webdriver
19
- from selenium.webdriver.common.by import By
20
  from selenium.webdriver.chrome.options import Options
21
- import threading
22
- import uvicorn
23
-
24
- # ============================================================================
25
- # FastAPI Backend
26
- # ============================================================================
27
-
28
- # FastAPI App Setup
29
- api_app = FastAPI(
30
- title="Browser Automation API",
31
- description="Browser automation with Gradio UI and REST API",
32
- version="2.0.0"
33
- )
34
-
35
- # Add CORS for Gradio integration
36
- api_app.add_middleware(
37
- CORSMiddleware,
38
- allow_origins=["*"],
39
- allow_credentials=True,
40
- allow_methods=["*"],
41
- allow_headers=["*"],
42
- )
43
-
44
- # ============================================================================
45
- # Data Models
46
- # ============================================================================
47
-
48
- class NavigateRequest(BaseModel):
49
- url: str
50
- wait_time: Optional[int] = 2
51
-
52
- class ClickRequest(BaseModel):
53
- selector: str
54
- selector_type: Optional[str] = "css"
55
-
56
- class TypeRequest(BaseModel):
57
- selector: str
58
- text: str
59
- selector_type: Optional[str] = "css"
60
-
61
- class ExtractRequest(BaseModel):
62
- selector: str
63
- attribute: Optional[str] = "text"
64
- selector_type: Optional[str] = "css"
65
-
66
- class TaskRequest(BaseModel):
67
- task_id: Optional[str] = None
68
- actions: List[Dict[str, Any]]
69
-
70
- # ============================================================================
71
- # Browser Session Manager
72
- # ============================================================================
73
-
74
- class BrowserSession:
75
- def __init__(self):
76
- self.sessions: Dict[str, webdriver.Chrome] = {}
77
- self.task_status: Dict[str, Dict] = {}
78
- self.api_clients: Dict[str, Any] = {}
79
-
80
- def create_session(self, session_id: str = None) -> str:
81
- if not session_id:
82
- session_id = str(uuid.uuid4())
83
-
84
- chrome_options = Options()
85
  chrome_options.add_argument("--headless")
86
- chrome_options.add_argument("--no-sandbox")
87
- chrome_options.add_argument("--disable-dev-shm-usage")
88
- chrome_options.add_argument("--disable-gpu")
89
-
90
- try:
91
- driver = webdriver.Chrome(options=chrome_options)
92
- self.sessions[session_id] = driver
93
- return session_id
94
- except Exception as e:
95
- print(f"Error creating session: {e}")
96
- raise HTTPException(status_code=500, detail=f"Failed to create browser session: {e}")
97
-
98
- def get_session(self, session_id: str) -> webdriver.Chrome:
99
- if session_id not in self.sessions:
100
- raise HTTPException(status_code=404, detail="Session not found")
101
- return self.sessions[session_id]
102
-
103
- def close_session(self, session_id: str):
104
- if session_id in self.sessions:
105
- try:
106
- self.sessions[session_id].quit()
107
- del self.sessions[session_id]
108
- except:
109
- pass
110
-
111
- def get_screenshot(self, session_id: str) -> str:
112
- driver = self.get_session(session_id)
113
- try:
114
- screenshot = driver.get_screenshot_as_png()
115
- return base64.b64encode(screenshot).decode()
116
- except Exception as e:
117
- raise HTTPException(status_code=500, detail=f"Failed to capture screenshot: {e}")
118
-
119
- def list_sessions(self) -> List[Dict]:
120
- """List all active sessions"""
121
- session_list = []
122
- for session_id, driver in self.sessions.items():
123
- try:
124
- session_info = {
125
- "session_id": session_id,
126
- "url": driver.current_url,
127
- "title": driver.title,
128
- "created_at": getattr(driver, 'created_at', 'Unknown')
129
- }
130
- session_list.append(session_info)
131
- except:
132
- session_list.append({
133
- "session_id": session_id,
134
- "url": "Error getting URL",
135
- "title": "Error",
136
- "created_at": "Unknown"
137
- })
138
- return session_list
139
-
140
- browser_manager = BrowserSession()
141
-
142
- # ============================================================================
143
- # API Endpoints
144
- # ============================================================================
145
-
146
- @api_app.get("/")
147
- async def root():
148
- return {
149
- "name": "Hybrid Browser Automation API",
150
- "version": "2.0.0",
151
- "features": {
152
- "gradio_ui": "User-friendly web interface",
153
- "rest_api": "Programmatic API access",
154
- "sse_streaming": "Real-time updates",
155
- "mcp_server": "AI agent integration",
156
- "session_management": "Multiple concurrent sessions"
157
- },
158
- "endpoints": {
159
- "sessions": "/sessions",
160
- "mcp": "/mcp",
161
- "sse": "/stream/{session_id}",
162
- "tasks": "/tasks",
163
- "health": "/health"
164
- }
165
- }
166
-
167
- @api_app.get("/health")
168
- async def health_check():
169
- return {
170
- "status": "healthy",
171
- "active_sessions": len(browser_manager.sessions),
172
- "active_tasks": len(browser_manager.task_status),
173
- "timestamp": datetime.utcnow().isoformat()
174
- }
175
-
176
- @api_app.post("/sessions/create")
177
- async def create_session():
178
- session_id = browser_manager.create_session()
179
- return {
180
- "session_id": session_id,
181
- "created_at": datetime.utcnow().isoformat()
182
- }
183
-
184
- @api_app.get("/sessions")
185
- async def list_sessions():
186
- return {
187
- "sessions": browser_manager.list_sessions(),
188
- "count": len(browser_manager.sessions)
189
- }
190
-
191
- @api_app.post("/sessions/{session_id}/navigate")
192
- async def navigate(session_id: str, request: NavigateRequest):
193
- driver = browser_manager.get_session(session_id)
194
- driver.get(request.url)
195
- await asyncio.sleep(request.wait_time)
196
 
197
- return {
198
- "status": "success",
199
- "url": driver.current_url,
200
- "title": driver.title
201
- }
202
 
203
- @api_app.post("/sessions/{session_id}/click")
204
- async def click_element(session_id: str, request: ClickRequest):
205
- driver = browser_manager.get_session(session_id)
206
-
207
- by_type = {
208
- "css": By.CSS_SELECTOR,
209
- "xpath": By.XPATH,
210
- "id": By.ID,
211
- "class": By.CLASS_NAME
212
- }
213
-
214
- try:
215
- element = driver.find_element(by_type[request.selector_type], request.selector)
216
- element.click()
217
- return {"status": "success", "selector": request.selector}
218
- except Exception as e:
219
- return {"status": "error", "error": str(e)}
220
-
221
- @api_app.post("/sessions/{session_id}/type")
222
- async def type_text(session_id: str, request: TypeRequest):
223
- driver = browser_manager.get_session(session_id)
224
-
225
- by_type = {
226
- "css": By.CSS_SELECTOR,
227
- "xpath": By.XPATH,
228
- "id": By.ID,
229
- "class": By.CLASS_NAME
230
- }
231
-
232
  try:
233
- element = driver.find_element(by_type[request.selector_type], request.selector)
234
- element.clear()
235
- element.send_keys(request.text)
236
- return {"status": "success", "text": request.text}
237
- except Exception as e:
238
- return {"status": "error", "error": str(e)}
239
-
240
- @api_app.post("/sessions/{session_id}/extract")
241
- async def extract_data(session_id: str, request: ExtractRequest):
242
- driver = browser_manager.get_session(session_id)
243
-
244
- by_type = {
245
- "css": By.CSS_SELECTOR,
246
- "xpath": By.XPATH,
247
- "id": By.ID,
248
- "class": By.CLASS_NAME
249
- }
250
-
251
- try:
252
- elements = driver.find_elements(by_type[request.selector_type], request.selector)
253
- data = []
254
- for elem in elements[:50]: # Limit to 50 elements
255
- if request.attribute == "text":
256
- data.append(elem.text)
257
- else:
258
- data.append(elem.get_attribute(request.attribute))
259
 
260
- return {"status": "success", "data": data, "count": len(data)}
 
 
 
 
261
  except Exception as e:
262
- return {"status": "error", "error": str(e)}
263
 
264
- @api_app.get("/sessions/{session_id}/screenshot")
265
- async def get_screenshot(session_id: str):
266
  try:
267
- screenshot = browser_manager.get_screenshot(session_id)
268
- return {"status": "success", "screenshot": screenshot}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  except Exception as e:
270
- return {"status": "error", "error": str(e)}
271
-
272
- @api_app.delete("/sessions/{session_id}")
273
- async def close_session(session_id: str):
274
- browser_manager.close_session(session_id)
275
- return {"status": "success", "message": "Session closed"}
276
-
277
- # ============================================================================
278
- # SSE Streaming
279
- # ============================================================================
280
 
281
- @api_app.get("/stream/{session_id}")
282
- async def stream_events(session_id: str):
283
- async def event_generator():
284
- try:
285
- driver = browser_manager.get_session(session_id)
286
-
287
- while session_id in browser_manager.sessions:
288
- try:
289
- event_data = {
290
- "timestamp": datetime.utcnow().isoformat(),
291
- "url": driver.current_url,
292
- "title": driver.title,
293
- "session_id": session_id
294
- }
295
-
296
- yield f"data: {json.dumps(event_data)}\n\n"
297
- await asyncio.sleep(2)
298
-
299
- except Exception as e:
300
- error_data = {
301
- "error": str(e),
302
- "timestamp": datetime.utcnow().isoformat()
303
- }
304
- yield f"data: {json.dumps(error_data)}\n\n"
305
- break
306
 
307
- yield f"data: {json.dumps({'status': 'closed', 'session_id': session_id})}\n\n"
308
-
309
- except Exception as e:
310
- yield f"data: {json.dumps({'error': f'Stream error: {e}'})}\n\n"
311
-
312
- return StreamingResponse(
313
- event_generator(),
314
- media_type="text/event-stream",
315
- headers={
316
- "Cache-Control": "no-cache",
317
- "Connection": "keep-alive",
318
- }
319
- )
320
-
321
- # ============================================================================
322
- # Task Execution
323
- # ============================================================================
324
-
325
- @api_app.post("/tasks/execute")
326
- async def execute_task(request: TaskRequest, background_tasks: BackgroundTasks):
327
- task_id = request.task_id or str(uuid.uuid4())
328
- session_id = browser_manager.create_session()
329
-
330
- browser_manager.task_status[task_id] = {
331
- "status": "running",
332
- "session_id": session_id,
333
- "progress": 0,
334
- "current_step": 0,
335
- "total_steps": len(request.actions),
336
- "started_at": datetime.utcnow().isoformat()
337
- }
338
-
339
- async def run_task():
340
- driver = browser_manager.get_session(session_id)
341
-
342
  try:
343
- for i, action in enumerate(request.actions):
344
- browser_manager.task_status[task_id]["current_step"] = i + 1
345
- browser_manager.task_status[task_id]["progress"] = (i + 1) / len(request.actions) * 100
346
-
347
- action_type = action.get("type")
348
-
349
- if action_type == "navigate":
350
- driver.get(action["url"])
351
- elif action_type == "click":
352
- elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
353
- elem.click()
354
- elif action_type == "type":
355
- elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
356
- elem.send_keys(action["text"])
357
- elif action_type == "wait":
358
- await asyncio.sleep(action.get("seconds", 1))
359
-
360
- await asyncio.sleep(0.5)
361
-
362
- browser_manager.task_status[task_id]["status"] = "completed"
363
- browser_manager.task_status[task_id]["completed_at"] = datetime.utcnow().isoformat()
364
 
365
- except Exception as e:
366
- browser_manager.task_status[task_id]["status"] = "error"
367
- browser_manager.task_status[task_id]["error"] = str(e)
368
- browser_manager.task_status[task_id]["error_at"] = datetime.utcnow().isoformat()
369
-
370
- background_tasks.add_task(run_task)
371
-
372
- return {
373
- "task_id": task_id,
374
- "session_id": session_id,
375
- "status": "started"
376
- }
377
-
378
- @api_app.get("/tasks/{task_id}/status")
379
- async def get_task_status(task_id: str):
380
- if task_id not in browser_manager.task_status:
381
- raise HTTPException(status_code=404, detail="Task not found")
382
- return browser_manager.task_status[task_id]
383
-
384
- @api_app.get("/tasks/{task_id}/stream")
385
- async def stream_task_progress(task_id: str):
386
- async def progress_generator():
387
- while True:
388
- if task_id not in browser_manager.task_status:
389
- yield f"data: {json.dumps({'error': 'Task not found'})}\n\n"
390
- break
391
-
392
- status = browser_manager.task_status[task_id]
393
- yield f"data: {json.dumps(status)}\n\n"
394
-
395
- if status["status"] in ["completed", "error"]:
396
- break
397
 
398
- await asyncio.sleep(0.5)
399
-
400
- return StreamingResponse(
401
- progress_generator(),
402
- media_type="text/event-stream"
403
- )
404
-
405
- # ============================================================================
406
- # Gradio Frontend
407
- # ============================================================================
408
-
409
- class BrowserAPIClient:
410
- def __init__(self, base_url: str = "http://localhost:8000"):
411
- self.base_url = base_url
412
-
413
- def make_request(self, method: str, endpoint: str, data: dict = None) -> dict:
414
- import requests
415
-
416
- url = f"{self.base_url}{endpoint}"
417
- try:
418
- if method == "GET":
419
- response = requests.get(url)
420
- elif method == "POST":
421
- response = requests.post(url, json=data)
422
- elif method == "DELETE":
423
- response = requests.delete(url)
424
- else:
425
- raise ValueError(f"Unsupported method: {method}")
426
 
427
- return response.json()
428
  except Exception as e:
429
- return {"error": str(e)}
430
-
431
- # Global API client
432
- api_client = BrowserAPIClient()
433
-
434
- def create_new_session():
435
- """Create a new browser session"""
436
- result = api_client.make_request("POST", "/sessions/create")
437
- if "error" in result:
438
- return f"❌ Error: {result['error']}", None, None, None, None, None
439
-
440
- session_id = result["session_id"]
441
- return f"✅ Session created: {session_id}", session_id, None, None, None, None
442
-
443
- def navigate_to_url(url, wait_time, session_id):
444
- """Navigate to a URL"""
445
- if not session_id:
446
- return "❌ Please create a session first", None, None, None, None, None
447
-
448
- data = {"url": url, "wait_time": wait_time}
449
- result = api_client.make_request("POST", f"/sessions/{session_id}/navigate", data)
450
-
451
- if "error" in result:
452
- return f"❌ Error: {result['error']}", None, None, None, None, None
453
-
454
- return (
455
- f"✅ Navigated to: {result['url']} | Title: {result['title']}",
456
- session_id,
457
- result["url"],
458
- result["title"],
459
- None,
460
- None
461
- )
462
-
463
- def extract_data_from_page(selector, attribute, session_id):
464
- """Extract data from the page"""
465
- if not session_id:
466
- return "❌ Please create a session first", None, None, None, None, None
467
-
468
- data = {"selector": selector, "attribute": attribute}
469
- result = api_client.make_request("POST", f"/sessions/{session_id}/extract", data)
470
-
471
- if "error" in result:
472
- return f"❌ Error: {result['error']}", None, None, None, None, None
473
-
474
- extracted_text = "\n".join([f"• {item}" for item in result["data"][:20]])
475
- if result["count"] > 20:
476
- extracted_text += f"\n... and {result['count'] - 20} more items"
477
-
478
- return (
479
- f"✅ Extracted {result['count']} items from selector: {selector}",
480
- session_id,
481
- None,
482
- None,
483
- extracted_text,
484
- None
485
- )
486
-
487
- def take_screenshot(session_id):
488
- """Take a screenshot of the current page"""
489
- if not session_id:
490
- return "❌ Please create a session first", None, None, None, None, None
491
-
492
- result = api_client.make_request("GET", f"/sessions/{session_id}/screenshot")
493
-
494
- if "error" in result:
495
- return f"❌ Error: {result['error']}", None, None, None, None, None
496
 
497
- return (
498
- f"✅ Screenshot captured",
499
- session_id,
500
- None,
501
- None,
502
- None,
503
- result["screenshot"]
504
- )
505
 
506
- def execute_task(actions_json, task_id):
507
- """Execute a multi-step task"""
508
- try:
509
- actions = json.loads(actions_json)
510
- except json.JSONDecodeError:
511
- return "❌ Invalid JSON format for actions", None, None, None, None, None
512
-
513
- data = {"actions": actions, "task_id": task_id or None}
514
- result = api_client.make_request("POST", "/tasks/execute", data)
515
-
516
- if "error" in result:
517
- return f"❌ Error: {result['error']}", None, None, None, None, None
518
-
519
- task_id = result["task_id"]
520
- return (
521
- f"✅ Task started: {task_id}\n\nUse the task status to monitor progress.",
522
- result["session_id"],
523
- None,
524
- None,
525
- f"Task ID: {task_id}\nSession ID: {result['session_id']}\n\nNote: Monitor progress via API endpoint /tasks/{task_id}/stream",
526
- None
527
- )
528
-
529
- def close_current_session(session_id):
530
- """Close the current session"""
531
- if not session_id:
532
- return "❌ No active session to close", None, None, None, None, None
533
-
534
- result = api_client.make_request("DELETE", f"/sessions/{session_id}")
535
- return "✅ Session closed successfully", None, None, None, None, None
536
-
537
- # ============================================================================
538
  # Gradio Interface
539
- # ============================================================================
540
-
541
- def create_gradio_app():
542
- with gr.Blocks(title="🌐 Browser Automation Tool", theme=gr.themes.Soft()) as demo:
543
  gr.Markdown("# 🌐 Browser Automation Tool")
544
- gr.Markdown("**Hybrid Platform: Gradio UI + FastAPI Backend + REST API + SSE + MCP**")
545
 
546
- with gr.Tab("🔧 Session Management"):
547
- gr.Markdown("### Browser Session Control")
548
-
549
  with gr.Row():
550
- create_btn = gr.Button("🆕 Create New Session", variant="primary")
551
- close_btn = gr.Button(" Close Current Session", variant="secondary")
552
-
553
- session_info = gr.Textbox(label="Session Status", lines=3, max_lines=10)
554
- session_id_state = gr.State()
555
- current_url_state = gr.State()
556
- current_title_state = gr.State()
557
- extracted_data_state = gr.State()
558
- screenshot_state = gr.State()
559
-
560
- create_btn.click(
561
- fn=create_new_session,
562
- outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
563
- )
564
-
565
- close_btn.click(
566
- fn=close_current_session,
567
- inputs=[session_id_state],
568
- outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
569
- )
570
-
571
- with gr.Tab("🌐 Navigation & Actions"):
572
- gr.Markdown("### Navigate and Interact")
573
-
574
- with gr.Row():
575
- url_input = gr.Textbox(
576
- label="URL",
577
- placeholder="https://example.com",
578
- scale=3
579
- )
580
- wait_time = gr.Slider(1, 10, value=3, label="Wait Time (seconds)", scale=1)
581
- navigate_btn = gr.Button("🚀 Navigate", variant="primary", scale=1)
582
-
583
- with gr.Row():
584
- selector_input = gr.Textbox(
585
- label="CSS Selector",
586
- placeholder=".example-class or #element-id",
587
- scale=3
588
- )
589
- attribute_dropdown = gr.Dropdown(
590
- ["text", "href", "src", "value", "innerHTML"],
591
- value="text",
592
- label="Extract Attribute",
593
- scale=1
594
- )
595
- extract_btn = gr.Button("🔍 Extract Data", variant="secondary", scale=1)
596
-
597
- screenshot_btn = gr.Button("📸 Take Screenshot", variant="secondary")
598
-
599
- # Status outputs
600
- status_output = gr.Textbox(label="Status", lines=3)
601
- current_url_display = gr.Textbox(label="Current URL", lines=2)
602
- current_title_display = gr.Textbox(label="Page Title", lines=2)
603
- extracted_display = gr.Textbox(label="Extracted Data", lines=8)
604
- screenshot_display = gr.Image(label="Screenshot", visible=False)
605
-
606
- navigate_btn.click(
607
- fn=navigate_to_url,
608
- inputs=[url_input, wait_time, session_id_state],
609
- outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
610
- )
611
-
612
- extract_btn.click(
613
- fn=extract_data_from_page,
614
- inputs=[selector_input, attribute_dropdown, session_id_state],
615
- outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
616
- )
617
-
618
- screenshot_btn.click(
619
- fn=take_screenshot,
620
- inputs=[session_id_state],
621
- outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_display]
622
- )
623
-
624
- with gr.Tab("⚡ Task Execution"):
625
- gr.Markdown("### Multi-Step Task Automation")
626
- gr.Markdown("**Enter actions as JSON array:**")
627
- gr.Markdown("""
628
- ```json
629
- [
630
- {"type": "navigate", "url": "https://example.com"},
631
- {"type": "wait", "seconds": 2},
632
- {"type": "click", "selector": ".button-class"},
633
- {"type": "type", "selector": "input[name='search']", "text": "search term"}
634
- ]
635
- ```
636
- """)
637
-
638
- task_actions = gr.Textbox(
639
- label="Task Actions (JSON)",
640
- lines=10,
641
- placeholder='[{"type": "navigate", "url": "https://example.com"}]',
642
- value='[{"type": "navigate", "url": "https://example.com"}, {"type": "wait", "seconds": 2}]'
643
- )
644
-
645
- task_id_input = gr.Textbox(
646
- label="Custom Task ID (optional)",
647
- placeholder="Leave empty for auto-generated"
648
- )
649
-
650
- execute_task_btn = gr.Button("⚡ Execute Task", variant="primary")
651
-
652
- task_status = gr.Textbox(label="Task Status", lines=5)
653
- task_session_id = gr.Textbox(label="Task Session ID", lines=1)
654
-
655
- with gr.Tab("📊 API Information"):
656
- gr.Markdown("### REST API Endpoints")
657
-
658
- gr.Markdown("""
659
- **Base URL:** `http://localhost:8000`
660
-
661
- **Key Endpoints:**
662
- - `POST /sessions/create` - Create new session
663
- - `POST /sessions/{id}/navigate` - Navigate to URL
664
- - `POST /sessions/{id}/extract` - Extract data
665
- - `GET /sessions/{id}/screenshot` - Get screenshot
666
- - `POST /tasks/execute` - Execute task
667
- - `GET /stream/{id}` - SSE stream (real-time updates)
668
- - `GET /health` - Health check
669
- """)
670
-
671
- gr.Markdown("### MCP Server")
672
- gr.Markdown("""
673
- **Tools available:**
674
- - `browser_navigate` - Navigate to URL
675
- - `browser_click` - Click element
676
- - `browser_extract` - Extract data
677
- - `browser_screenshot` - Capture screenshot
678
- """)
679
-
680
- gr.Markdown("### Usage Examples")
681
- gr.Markdown("""
682
- **Python Client:**
683
- ```python
684
- import requests
685
- response = requests.post("http://localhost:8000/sessions/create")
686
- session_id = response.json()["session_id"]
687
 
688
- response = requests.post(
689
- f"http://localhost:8000/sessions/{session_id}/navigate",
690
- json={"url": "https://example.com"}
691
- )
692
- ```
693
- """)
694
 
695
- with gr.Tab("🎯 Advanced Features"):
696
- gr.Markdown("### Advanced Capabilities")
697
-
698
- gr.Markdown("""
699
- **✅ Multi-Session Management**
700
- - Create and manage multiple browser sessions simultaneously
701
- - Each session runs independently
702
-
703
- **✅ Real-time Streaming (SSE)**
704
- - Live updates of browser state
705
- - Task progress monitoring
706
-
707
- **✅ JavaScript Execution**
708
- - Execute custom scripts via REST API
709
- - Access browser internals programmatically
710
-
711
- **✅ MCP Integration**
712
- - AI agent integration via Model Context Protocol
713
- - Natural language browser control
714
-
715
- **✅ Production Ready**
716
- - FastAPI backend with automatic OpenAPI docs
717
- - CORS enabled for web integration
718
- - Health monitoring and error handling
719
- """)
720
-
721
- gr.Markdown("### System Information")
722
  with gr.Row():
723
  with gr.Column():
724
- gr.Markdown("**API Health:** `GET /health`")
725
- health_status = gr.Textbox("Click refresh to check", interactive=False)
726
- refresh_btn = gr.Button("🔄 Refresh Health")
 
 
 
 
 
727
  with gr.Column():
728
- gr.Markdown("**Active Sessions:** Count from API")
729
- active_sessions = gr.Textbox("N/A", interactive=False)
730
-
731
- def check_health():
732
- try:
733
- import requests
734
- response = requests.get("http://localhost:8000/health")
735
- data = response.json()
736
- return f"Status: {data.get('status', 'Unknown')}\nActive Sessions: {data.get('active_sessions', 0)}\nActive Tasks: {data.get('active_tasks', 0)}"
737
- except:
738
- return "❌ API not available - Make sure the API server is running on port 8000"
739
-
740
- refresh_btn.click(
741
- fn=check_health,
742
- outputs=[health_status]
743
- )
744
 
745
- # Initialize with session info
746
- demo.load(
747
- fn=lambda: "Browser Automation Tool ready! Create a session to start.",
748
- outputs=[session_info]
 
749
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
 
751
  return demo
752
 
753
- # ============================================================================
754
- # Main Application
755
- # ============================================================================
756
-
757
- def start_api_server():
758
- """Start the FastAPI server on port 8000"""
759
- uvicorn.run(
760
- api_app,
761
- host="0.0.0.0",
762
- port=8000,
763
- log_level="info"
764
- )
765
-
766
- def main():
767
- """Main application entry point"""
768
- import threading
769
- import time
770
-
771
- print("🚀 Starting Browser Automation Tool...")
772
- print("=" * 50)
773
-
774
- # Start API server in background
775
- print("🔧 Starting FastAPI server on port 8000...")
776
- api_thread = threading.Thread(target=start_api_server, daemon=True)
777
- api_thread.start()
778
-
779
- # Wait for API to start
780
- time.sleep(3)
781
-
782
- # Create and launch Gradio app
783
- print("🎨 Starting Gradio UI on port 7860...")
784
- print("📊 API documentation available at: http://localhost:8000/docs")
785
- print("🌐 Gradio interface will open at: http://localhost:7860")
786
- print("=" * 50)
787
-
788
- demo = create_gradio_app()
789
- demo.launch(
790
- server_name="0.0.0.0",
791
- server_port=7860,
792
- share=False,
793
- show_error=True,
794
- debug=False
795
- )
796
-
797
  if __name__ == "__main__":
798
- main()
 
 
1
+ #!/usr/bin/env python3
2
  """
3
+ Simple Browser Automation Tool for HuggingFace Spaces
 
4
  """
5
 
6
  import gradio as gr
7
+ import requests
8
+ import time
 
 
 
 
 
 
 
 
 
9
  from selenium import webdriver
 
10
  from selenium.webdriver.chrome.options import Options
11
+ from selenium.webdriver.common.by import By
12
+ from selenium.webdriver.support.ui import WebDriverWait
13
+ from selenium.webdriver.support import expected_conditions as EC
14
+ import io
15
+ from PIL import Image
16
+
17
+ # Global driver storage
18
+ active_drivers = {}
19
+
20
+ def setup_driver(headless=True, window_size="1920,1080"):
21
+ """Setup Chrome driver with options"""
22
+ chrome_options = Options()
23
+ if headless:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  chrome_options.add_argument("--headless")
25
+ chrome_options.add_argument("--no-sandbox")
26
+ chrome_options.add_argument("--disable-dev-shm-usage")
27
+ chrome_options.add_argument(f"--window-size={window_size}")
28
+ chrome_options.add_argument("--disable-gpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ driver = webdriver.Chrome(options=chrome_options)
31
+ return driver
 
 
 
32
 
33
+ def navigate_and_screenshot(url, headless=True, window_size="1920,1080"):
34
+ """Navigate to URL and return screenshot"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  try:
36
+ driver = setup_driver(headless, window_size)
37
+ driver.get(url)
38
+ time.sleep(3) # Wait for page to load
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Take screenshot
41
+ screenshot = driver.get_screenshot_as_png()
42
+ driver.quit()
43
+
44
+ return screenshot
45
  except Exception as e:
46
+ return f"Error: {str(e)}"
47
 
48
+ def extract_text_content(url, headless=True):
49
+ """Extract text content from URL"""
50
  try:
51
+ driver = setup_driver(headless)
52
+ driver.get(url)
53
+ time.sleep(2)
54
+
55
+ # Get page title
56
+ title = driver.title
57
+
58
+ # Get page source and extract text
59
+ html = driver.page_source
60
+
61
+ # Simple text extraction (remove HTML tags)
62
+ from bs4 import BeautifulSoup
63
+ soup = BeautifulSoup(html, 'html.parser')
64
+ text = soup.get_text()[:1000] + "..." if len(soup.get_text()) > 1000 else soup.get_text()
65
+
66
+ driver.quit()
67
+
68
+ return f"Title: {title}\n\nContent:\n{text}"
69
  except Exception as e:
70
+ return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
71
 
72
+ def batch_navigate(urls, headless=True):
73
+ """Navigate to multiple URLs and return results"""
74
+ results = []
75
+ for i, url in enumerate(urls.split('\n')):
76
+ url = url.strip()
77
+ if not url:
78
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  try:
81
+ driver = setup_driver(headless)
82
+ driver.get(url)
83
+ time.sleep(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Get basic info
86
+ title = driver.title
87
+ current_url = driver.current_url
88
+ screenshot = driver.get_screenshot_as_png()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ results.append({
91
+ "url": url,
92
+ "title": title,
93
+ "current_url": current_url,
94
+ "screenshot": screenshot
95
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ driver.quit()
98
  except Exception as e:
99
+ results.append({
100
+ "url": url,
101
+ "error": str(e)
102
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ return results
 
 
 
 
 
 
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  # Gradio Interface
107
+ def main():
108
+ with gr.Blocks(title="Browser Automation Tool") as demo:
 
 
109
  gr.Markdown("# 🌐 Browser Automation Tool")
 
110
 
111
+ with gr.Tab("Single URL"):
 
 
112
  with gr.Row():
113
+ with gr.Column():
114
+ url_input = gr.Textbox(label="URL to visit", placeholder="https://example.com")
115
+ headless = gr.Checkbox(label="Headless mode", value=True)
116
+ window_size = gr.Textbox(label="Window size", value="1920,1080")
117
+
118
+ with gr.Column():
119
+ navigate_btn = gr.Button("Navigate & Screenshot", variant="primary")
120
+ extract_btn = gr.Button("Extract Content")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ screenshot_output = gr.Image(label="Screenshot")
123
+ content_output = gr.Textbox(label="Content", lines=10)
 
 
 
 
124
 
125
+ with gr.Tab("Batch Processing"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  with gr.Row():
127
  with gr.Column():
128
+ urls_input = gr.Textbox(
129
+ label="URLs (one per line)",
130
+ placeholder="https://example.com\nhttps://google.com",
131
+ lines=5
132
+ )
133
+ batch_headless = gr.Checkbox(label="Headless mode", value=True)
134
+ batch_btn = gr.Button("Process URLs", variant="primary")
135
+
136
  with gr.Column():
137
+ batch_results = gr.JSON(label="Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ # Button handlers
140
+ navigate_btn.click(
141
+ fn=navigate_and_screenshot,
142
+ inputs=[url_input, headless, window_size],
143
+ outputs=[screenshot_output]
144
  )
145
+
146
+ extract_btn.click(
147
+ fn=extract_text_content,
148
+ inputs=[url_input, headless],
149
+ outputs=[content_output]
150
+ )
151
+
152
+ batch_btn.click(
153
+ fn=batch_navigate,
154
+ inputs=[urls_input, batch_headless],
155
+ outputs=[batch_results]
156
+ )
157
+
158
+ gr.Markdown("""
159
+ ## Features
160
+ - 🌐 **Web Browser Control**: Navigate websites programmatically
161
+ - 📸 **Screenshot Capture**: Take screenshots of any webpage
162
+ - 🔍 **Content Extraction**: Extract text content from HTML
163
+ - ⚡ **Batch Processing**: Process multiple URLs at once
164
+ - 🔧 **Configurable Options**: Headless mode, window sizes
165
+ """)
166
 
167
  return demo
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  if __name__ == "__main__":
170
+ demo = main()
171
+ demo.launch(server_name="0.0.0.0", server_port=7860)