Spaces:

sheikhcoders
/

browser-automation-tool

Sleeping

App Files Files Community

sheikhcoders commited on Nov 6, 2025

Commit

39a1b59

verified ·

1 Parent(s): 7ccd7d9

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +132 -759

app.py CHANGED Viewed

@@ -1,798 +1,171 @@
 """
-Hybrid Browser Automation Tool
-Combines Gradio UI with FastAPI backend
 """
 import gradio as gr
-import asyncio
-import aiohttp
-import json
-import base64
-import uuid
-from typing import Dict, List, Any, Optional
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.responses import StreamingResponse
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from datetime import datetime
 from selenium import webdriver
-from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.options import Options
-import threading
-import uvicorn
-# ============================================================================
-# FastAPI Backend
-# ============================================================================
-# FastAPI App Setup
-api_app = FastAPI(
-    title="Browser Automation API",
-    description="Browser automation with Gradio UI and REST API",
-    version="2.0.0"
-)
-# Add CORS for Gradio integration
-api_app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# ============================================================================
-# Data Models
-# ============================================================================
-class NavigateRequest(BaseModel):
-    url: str
-    wait_time: Optional[int] = 2
-class ClickRequest(BaseModel):
-    selector: str
-    selector_type: Optional[str] = "css"
-class TypeRequest(BaseModel):
-    selector: str
-    text: str
-    selector_type: Optional[str] = "css"
-class ExtractRequest(BaseModel):
-    selector: str
-    attribute: Optional[str] = "text"
-    selector_type: Optional[str] = "css"
-class TaskRequest(BaseModel):
-    task_id: Optional[str] = None
-    actions: List[Dict[str, Any]]
-# ============================================================================
-# Browser Session Manager
-# ============================================================================
-class BrowserSession:
-    def __init__(self):
-        self.sessions: Dict[str, webdriver.Chrome] = {}
-        self.task_status: Dict[str, Dict] = {}
-        self.api_clients: Dict[str, Any] = {}
-    def create_session(self, session_id: str = None) -> str:
-        if not session_id:
-            session_id = str(uuid.uuid4())
-        chrome_options = Options()
         chrome_options.add_argument("--headless")
-        chrome_options.add_argument("--no-sandbox")
-        chrome_options.add_argument("--disable-dev-shm-usage")
-        chrome_options.add_argument("--disable-gpu")
-        try:
-            driver = webdriver.Chrome(options=chrome_options)
-            self.sessions[session_id] = driver
-            return session_id
-        except Exception as e:
-            print(f"Error creating session: {e}")
-            raise HTTPException(status_code=500, detail=f"Failed to create browser session: {e}")
-    def get_session(self, session_id: str) -> webdriver.Chrome:
-        if session_id not in self.sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        return self.sessions[session_id]
-    def close_session(self, session_id: str):
-        if session_id in self.sessions:
-            try:
-                self.sessions[session_id].quit()
-                del self.sessions[session_id]
-            except:
-                pass
-    def get_screenshot(self, session_id: str) -> str:
-        driver = self.get_session(session_id)
-        try:
-            screenshot = driver.get_screenshot_as_png()
-            return base64.b64encode(screenshot).decode()
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"Failed to capture screenshot: {e}")
-    def list_sessions(self) -> List[Dict]:
-        """List all active sessions"""
-        session_list = []
-        for session_id, driver in self.sessions.items():
-            try:
-                session_info = {
-                    "session_id": session_id,
-                    "url": driver.current_url,
-                    "title": driver.title,
-                    "created_at": getattr(driver, 'created_at', 'Unknown')
-                }
-                session_list.append(session_info)
-            except:
-                session_list.append({
-                    "session_id": session_id,
-                    "url": "Error getting URL",
-                    "title": "Error",
-                    "created_at": "Unknown"
-                })
-        return session_list
-browser_manager = BrowserSession()
-# ============================================================================
-# API Endpoints
-# ============================================================================
-@api_app.get("/")
-async def root():
-    return {
-        "name": "Hybrid Browser Automation API",
-        "version": "2.0.0",
-        "features": {
-            "gradio_ui": "User-friendly web interface",
-            "rest_api": "Programmatic API access",
-            "sse_streaming": "Real-time updates",
-            "mcp_server": "AI agent integration",
-            "session_management": "Multiple concurrent sessions"
-        },
-        "endpoints": {
-            "sessions": "/sessions",
-            "mcp": "/mcp",
-            "sse": "/stream/{session_id}",
-            "tasks": "/tasks",
-            "health": "/health"
-        }
-    }
-@api_app.get("/health")
-async def health_check():
-    return {
-        "status": "healthy",
-        "active_sessions": len(browser_manager.sessions),
-        "active_tasks": len(browser_manager.task_status),
-        "timestamp": datetime.utcnow().isoformat()
-    }
-@api_app.post("/sessions/create")
-async def create_session():
-    session_id = browser_manager.create_session()
-    return {
-        "session_id": session_id,
-        "created_at": datetime.utcnow().isoformat()
-    }
-@api_app.get("/sessions")
-async def list_sessions():
-    return {
-        "sessions": browser_manager.list_sessions(),
-        "count": len(browser_manager.sessions)
-    }
-@api_app.post("/sessions/{session_id}/navigate")
-async def navigate(session_id: str, request: NavigateRequest):
-    driver = browser_manager.get_session(session_id)
-    driver.get(request.url)
-    await asyncio.sleep(request.wait_time)
-    return {
-        "status": "success",
-        "url": driver.current_url,
-        "title": driver.title
-    }
-@api_app.post("/sessions/{session_id}/click")
-async def click_element(session_id: str, request: ClickRequest):
-    driver = browser_manager.get_session(session_id)
-    by_type = {
-        "css": By.CSS_SELECTOR,
-        "xpath": By.XPATH,
-        "id": By.ID,
-        "class": By.CLASS_NAME
-    }
-    try:
-        element = driver.find_element(by_type[request.selector_type], request.selector)
-        element.click()
-        return {"status": "success", "selector": request.selector}
-    except Exception as e:
-        return {"status": "error", "error": str(e)}
-@api_app.post("/sessions/{session_id}/type")
-async def type_text(session_id: str, request: TypeRequest):
-    driver = browser_manager.get_session(session_id)
-    by_type = {
-        "css": By.CSS_SELECTOR,
-        "xpath": By.XPATH,
-        "id": By.ID,
-        "class": By.CLASS_NAME
-    }
     try:
-        element = driver.find_element(by_type[request.selector_type], request.selector)
-        element.clear()
-        element.send_keys(request.text)
-        return {"status": "success", "text": request.text}
-    except Exception as e:
-        return {"status": "error", "error": str(e)}
-@api_app.post("/sessions/{session_id}/extract")
-async def extract_data(session_id: str, request: ExtractRequest):
-    driver = browser_manager.get_session(session_id)
-    by_type = {
-        "css": By.CSS_SELECTOR,
-        "xpath": By.XPATH,
-        "id": By.ID,
-        "class": By.CLASS_NAME
-    }
-    try:
-        elements = driver.find_elements(by_type[request.selector_type], request.selector)
-        data = []
-        for elem in elements[:50]:  # Limit to 50 elements
-            if request.attribute == "text":
-                data.append(elem.text)
-            else:
-                data.append(elem.get_attribute(request.attribute))
-        return {"status": "success", "data": data, "count": len(data)}
     except Exception as e:
-        return {"status": "error", "error": str(e)}
-@api_app.get("/sessions/{session_id}/screenshot")
-async def get_screenshot(session_id: str):
     try:
-        screenshot = browser_manager.get_screenshot(session_id)
-        return {"status": "success", "screenshot": screenshot}
     except Exception as e:
-        return {"status": "error", "error": str(e)}
-@api_app.delete("/sessions/{session_id}")
-async def close_session(session_id: str):
-    browser_manager.close_session(session_id)
-    return {"status": "success", "message": "Session closed"}
-# ============================================================================
-# SSE Streaming
-# ============================================================================
-@api_app.get("/stream/{session_id}")
-async def stream_events(session_id: str):
-    async def event_generator():
-        try:
-            driver = browser_manager.get_session(session_id)
-            while session_id in browser_manager.sessions:
-                try:
-                    event_data = {
-                        "timestamp": datetime.utcnow().isoformat(),
-                        "url": driver.current_url,
-                        "title": driver.title,
-                        "session_id": session_id
-                    }
-                    yield f"data: {json.dumps(event_data)}\n\n"
-                    await asyncio.sleep(2)
-                except Exception as e:
-                    error_data = {
-                        "error": str(e),
-                        "timestamp": datetime.utcnow().isoformat()
-                    }
-                    yield f"data: {json.dumps(error_data)}\n\n"
-                    break
-            yield f"data: {json.dumps({'status': 'closed', 'session_id': session_id})}\n\n"
-        except Exception as e:
-            yield f"data: {json.dumps({'error': f'Stream error: {e}'})}\n\n"
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-        }
-    )
-# ============================================================================
-# Task Execution
-# ============================================================================
-@api_app.post("/tasks/execute")
-async def execute_task(request: TaskRequest, background_tasks: BackgroundTasks):
-    task_id = request.task_id or str(uuid.uuid4())
-    session_id = browser_manager.create_session()
-    browser_manager.task_status[task_id] = {
-        "status": "running",
-        "session_id": session_id,
-        "progress": 0,
-        "current_step": 0,
-        "total_steps": len(request.actions),
-        "started_at": datetime.utcnow().isoformat()
-    }
-    async def run_task():
-        driver = browser_manager.get_session(session_id)
         try:
-            for i, action in enumerate(request.actions):
-                browser_manager.task_status[task_id]["current_step"] = i + 1
-                browser_manager.task_status[task_id]["progress"] = (i + 1) / len(request.actions) * 100
-                action_type = action.get("type")
-                if action_type == "navigate":
-                    driver.get(action["url"])
-                elif action_type == "click":
-                    elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
-                    elem.click()
-                elif action_type == "type":
-                    elem = driver.find_element(By.CSS_SELECTOR, action["selector"])
-                    elem.send_keys(action["text"])
-                elif action_type == "wait":
-                    await asyncio.sleep(action.get("seconds", 1))
-                await asyncio.sleep(0.5)
-            browser_manager.task_status[task_id]["status"] = "completed"
-            browser_manager.task_status[task_id]["completed_at"] = datetime.utcnow().isoformat()
-        except Exception as e:
-            browser_manager.task_status[task_id]["status"] = "error"
-            browser_manager.task_status[task_id]["error"] = str(e)
-            browser_manager.task_status[task_id]["error_at"] = datetime.utcnow().isoformat()
-    background_tasks.add_task(run_task)
-    return {
-        "task_id": task_id,
-        "session_id": session_id,
-        "status": "started"
-    }
-@api_app.get("/tasks/{task_id}/status")
-async def get_task_status(task_id: str):
-    if task_id not in browser_manager.task_status:
-        raise HTTPException(status_code=404, detail="Task not found")
-    return browser_manager.task_status[task_id]
-@api_app.get("/tasks/{task_id}/stream")
-async def stream_task_progress(task_id: str):
-    async def progress_generator():
-        while True:
-            if task_id not in browser_manager.task_status:
-                yield f"data: {json.dumps({'error': 'Task not found'})}\n\n"
-                break
-            status = browser_manager.task_status[task_id]
-            yield f"data: {json.dumps(status)}\n\n"
-            if status["status"] in ["completed", "error"]:
-                break
-            await asyncio.sleep(0.5)
-    return StreamingResponse(
-        progress_generator(),
-        media_type="text/event-stream"
-    )
-# ============================================================================
-# Gradio Frontend
-# ============================================================================
-class BrowserAPIClient:
-    def __init__(self, base_url: str = "http://localhost:8000"):
-        self.base_url = base_url
-    def make_request(self, method: str, endpoint: str, data: dict = None) -> dict:
-        import requests
-        url = f"{self.base_url}{endpoint}"
-        try:
-            if method == "GET":
-                response = requests.get(url)
-            elif method == "POST":
-                response = requests.post(url, json=data)
-            elif method == "DELETE":
-                response = requests.delete(url)
-            else:
-                raise ValueError(f"Unsupported method: {method}")
-            return response.json()
         except Exception as e:
-            return {"error": str(e)}
-# Global API client
-api_client = BrowserAPIClient()
-def create_new_session():
-    """Create a new browser session"""
-    result = api_client.make_request("POST", "/sessions/create")
-    if "error" in result:
-        return f"❌ Error: {result['error']}", None, None, None, None, None
-    session_id = result["session_id"]
-    return f"✅ Session created: {session_id}", session_id, None, None, None, None
-def navigate_to_url(url, wait_time, session_id):
-    """Navigate to a URL"""
-    if not session_id:
-        return "❌ Please create a session first", None, None, None, None, None
-    data = {"url": url, "wait_time": wait_time}
-    result = api_client.make_request("POST", f"/sessions/{session_id}/navigate", data)
-    if "error" in result:
-        return f"❌ Error: {result['error']}", None, None, None, None, None
-    return (
-        f"✅ Navigated to: {result['url']} | Title: {result['title']}",
-        session_id,
-        result["url"],
-        result["title"],
-        None,
-        None
-    )
-def extract_data_from_page(selector, attribute, session_id):
-    """Extract data from the page"""
-    if not session_id:
-        return "❌ Please create a session first", None, None, None, None, None
-    data = {"selector": selector, "attribute": attribute}
-    result = api_client.make_request("POST", f"/sessions/{session_id}/extract", data)
-    if "error" in result:
-        return f"❌ Error: {result['error']}", None, None, None, None, None
-    extracted_text = "\n".join([f"• {item}" for item in result["data"][:20]])
-    if result["count"] > 20:
-        extracted_text += f"\n... and {result['count'] - 20} more items"
-    return (
-        f"✅ Extracted {result['count']} items from selector: {selector}",
-        session_id,
-        None,
-        None,
-        extracted_text,
-        None
-    )
-def take_screenshot(session_id):
-    """Take a screenshot of the current page"""
-    if not session_id:
-        return "❌ Please create a session first", None, None, None, None, None
-    result = api_client.make_request("GET", f"/sessions/{session_id}/screenshot")
-    if "error" in result:
-        return f"❌ Error: {result['error']}", None, None, None, None, None
-    return (
-        f"✅ Screenshot captured",
-        session_id,
-        None,
-        None,
-        None,
-        result["screenshot"]
-    )
-def execute_task(actions_json, task_id):
-    """Execute a multi-step task"""
-    try:
-        actions = json.loads(actions_json)
-    except json.JSONDecodeError:
-        return "❌ Invalid JSON format for actions", None, None, None, None, None
-    data = {"actions": actions, "task_id": task_id or None}
-    result = api_client.make_request("POST", "/tasks/execute", data)
-    if "error" in result:
-        return f"❌ Error: {result['error']}", None, None, None, None, None
-    task_id = result["task_id"]
-    return (
-        f"✅ Task started: {task_id}\n\nUse the task status to monitor progress.",
-        result["session_id"],
-        None,
-        None,
-        f"Task ID: {task_id}\nSession ID: {result['session_id']}\n\nNote: Monitor progress via API endpoint /tasks/{task_id}/stream",
-        None
-    )
-def close_current_session(session_id):
-    """Close the current session"""
-    if not session_id:
-        return "❌ No active session to close", None, None, None, None, None
-    result = api_client.make_request("DELETE", f"/sessions/{session_id}")
-    return "✅ Session closed successfully", None, None, None, None, None
-# ============================================================================
 # Gradio Interface
-# ============================================================================
-def create_gradio_app():
-    with gr.Blocks(title="🌐 Browser Automation Tool", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🌐 Browser Automation Tool")
-        gr.Markdown("**Hybrid Platform: Gradio UI + FastAPI Backend + REST API + SSE + MCP**")
-        with gr.Tab("🔧 Session Management"):
-            gr.Markdown("### Browser Session Control")
             with gr.Row():
-                create_btn = gr.Button("🆕 Create New Session", variant="primary")
-                close_btn = gr.Button("❌ Close Current Session", variant="secondary")
-            session_info = gr.Textbox(label="Session Status", lines=3, max_lines=10)
-            session_id_state = gr.State()
-            current_url_state = gr.State()
-            current_title_state = gr.State()
-            extracted_data_state = gr.State()
-            screenshot_state = gr.State()
-            create_btn.click(
-                fn=create_new_session,
-                outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
-            )
-            close_btn.click(
-                fn=close_current_session,
-                inputs=[session_id_state],
-                outputs=[session_info, session_id_state, current_url_state, current_title_state, extracted_data_state, screenshot_state]
-            )
-        with gr.Tab("🌐 Navigation & Actions"):
-            gr.Markdown("### Navigate and Interact")
-            with gr.Row():
-                url_input = gr.Textbox(
-                    label="URL",
-                    placeholder="https://example.com",
-                    scale=3
-                )
-                wait_time = gr.Slider(1, 10, value=3, label="Wait Time (seconds)", scale=1)
-                navigate_btn = gr.Button("🚀 Navigate", variant="primary", scale=1)
-            with gr.Row():
-                selector_input = gr.Textbox(
-                    label="CSS Selector",
-                    placeholder=".example-class or #element-id",
-                    scale=3
-                )
-                attribute_dropdown = gr.Dropdown(
-                    ["text", "href", "src", "value", "innerHTML"],
-                    value="text",
-                    label="Extract Attribute",
-                    scale=1
-                )
-                extract_btn = gr.Button("🔍 Extract Data", variant="secondary", scale=1)
-            screenshot_btn = gr.Button("📸 Take Screenshot", variant="secondary")
-            # Status outputs
-            status_output = gr.Textbox(label="Status", lines=3)
-            current_url_display = gr.Textbox(label="Current URL", lines=2)
-            current_title_display = gr.Textbox(label="Page Title", lines=2)
-            extracted_display = gr.Textbox(label="Extracted Data", lines=8)
-            screenshot_display = gr.Image(label="Screenshot", visible=False)
-            navigate_btn.click(
-                fn=navigate_to_url,
-                inputs=[url_input, wait_time, session_id_state],
-                outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
-            )
-            extract_btn.click(
-                fn=extract_data_from_page,
-                inputs=[selector_input, attribute_dropdown, session_id_state],
-                outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_state]
-            )
-            screenshot_btn.click(
-                fn=take_screenshot,
-                inputs=[session_id_state],
-                outputs=[status_output, session_id_state, current_url_display, current_title_display, extracted_data_state, screenshot_display]
-            )
-        with gr.Tab("⚡ Task Execution"):
-            gr.Markdown("### Multi-Step Task Automation")
-            gr.Markdown("**Enter actions as JSON array:**")
-            gr.Markdown("""
-            ```json
-            [
-                {"type": "navigate", "url": "https://example.com"},
-                {"type": "wait", "seconds": 2},
-                {"type": "click", "selector": ".button-class"},
-                {"type": "type", "selector": "input[name='search']", "text": "search term"}
-            ]
-            ```
-            """)
-            task_actions = gr.Textbox(
-                label="Task Actions (JSON)",
-                lines=10,
-                placeholder='[{"type": "navigate", "url": "https://example.com"}]',
-                value='[{"type": "navigate", "url": "https://example.com"}, {"type": "wait", "seconds": 2}]'
-            )
-            task_id_input = gr.Textbox(
-                label="Custom Task ID (optional)",
-                placeholder="Leave empty for auto-generated"
-            )
-            execute_task_btn = gr.Button("⚡ Execute Task", variant="primary")
-            task_status = gr.Textbox(label="Task Status", lines=5)
-            task_session_id = gr.Textbox(label="Task Session ID", lines=1)
-        with gr.Tab("📊 API Information"):
-            gr.Markdown("### REST API Endpoints")
-            gr.Markdown("""
-            **Base URL:** `http://localhost:8000`
-            **Key Endpoints:**
-            - `POST /sessions/create` - Create new session
-            - `POST /sessions/{id}/navigate` - Navigate to URL
-            - `POST /sessions/{id}/extract` - Extract data
-            - `GET /sessions/{id}/screenshot` - Get screenshot
-            - `POST /tasks/execute` - Execute task
-            - `GET /stream/{id}` - SSE stream (real-time updates)
-            - `GET /health` - Health check
-            """)
-            gr.Markdown("### MCP Server")
-            gr.Markdown("""
-            **Tools available:**
-            - `browser_navigate` - Navigate to URL
-            - `browser_click` - Click element
-            - `browser_extract` - Extract data
-            - `browser_screenshot` - Capture screenshot
-            """)
-            gr.Markdown("### Usage Examples")
-            gr.Markdown("""
-            **Python Client:**
-            ```python
-            import requests
-            response = requests.post("http://localhost:8000/sessions/create")
-            session_id = response.json()["session_id"]
-            response = requests.post(
-                f"http://localhost:8000/sessions/{session_id}/navigate",
-                json={"url": "https://example.com"}
-            )
-            ```
-            """)
-        with gr.Tab("🎯 Advanced Features"):
-            gr.Markdown("### Advanced Capabilities")
-            gr.Markdown("""
-            **✅ Multi-Session Management**
-            - Create and manage multiple browser sessions simultaneously
-            - Each session runs independently
-            **✅ Real-time Streaming (SSE)**
-            - Live updates of browser state
-            - Task progress monitoring
-            **✅ JavaScript Execution**
-            - Execute custom scripts via REST API
-            - Access browser internals programmatically
-            **✅ MCP Integration**
-            - AI agent integration via Model Context Protocol
-            - Natural language browser control
-            **✅ Production Ready**
-            - FastAPI backend with automatic OpenAPI docs
-            - CORS enabled for web integration
-            - Health monitoring and error handling
-            """)
-            gr.Markdown("### System Information")
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("**API Health:** `GET /health`")
-                    health_status = gr.Textbox("Click refresh to check", interactive=False)
-                    refresh_btn = gr.Button("🔄 Refresh Health")
                 with gr.Column():
-                    gr.Markdown("**Active Sessions:** Count from API")
-                    active_sessions = gr.Textbox("N/A", interactive=False)
-            def check_health():
-                try:
-                    import requests
-                    response = requests.get("http://localhost:8000/health")
-                    data = response.json()
-                    return f"Status: {data.get('status', 'Unknown')}\nActive Sessions: {data.get('active_sessions', 0)}\nActive Tasks: {data.get('active_tasks', 0)}"
-                except:
-                    return "❌ API not available - Make sure the API server is running on port 8000"
-            refresh_btn.click(
-                fn=check_health,
-                outputs=[health_status]
-            )
-        # Initialize with session info
-        demo.load(
-            fn=lambda: "Browser Automation Tool ready! Create a session to start.",
-            outputs=[session_info]
         )
     return demo
-# ============================================================================
-# Main Application
-# ============================================================================
-def start_api_server():
-    """Start the FastAPI server on port 8000"""
-    uvicorn.run(
-        api_app,
-        host="0.0.0.0",
-        port=8000,
-        log_level="info"
-    )
-def main():
-    """Main application entry point"""
-    import threading
-    import time
-    print("🚀 Starting Browser Automation Tool...")
-    print("=" * 50)
-    # Start API server in background
-    print("🔧 Starting FastAPI server on port 8000...")
-    api_thread = threading.Thread(target=start_api_server, daemon=True)
-    api_thread.start()
-    # Wait for API to start
-    time.sleep(3)
-    # Create and launch Gradio app
-    print("🎨 Starting Gradio UI on port 7860...")
-    print("📊 API documentation available at: http://localhost:8000/docs")
-    print("🌐 Gradio interface will open at: http://localhost:7860")
-    print("=" * 50)
-    demo = create_gradio_app()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        debug=False
-    )
 if __name__ == "__main__":
-    main()

+#!/usr/bin/env python3
 """
+Simple Browser Automation Tool for HuggingFace Spaces
 """
 import gradio as gr
+import requests
+import time
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import io
+from PIL import Image
+# Global driver storage
+active_drivers = {}
+def setup_driver(headless=True, window_size="1920,1080"):
+    """Setup Chrome driver with options"""
+    chrome_options = Options()
+    if headless:
         chrome_options.add_argument("--headless")
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    chrome_options.add_argument(f"--window-size={window_size}")
+    chrome_options.add_argument("--disable-gpu")
+    driver = webdriver.Chrome(options=chrome_options)
+    return driver
+def navigate_and_screenshot(url, headless=True, window_size="1920,1080"):
+    """Navigate to URL and return screenshot"""
     try:
+        driver = setup_driver(headless, window_size)
+        driver.get(url)
+        time.sleep(3)  # Wait for page to load
+        # Take screenshot
+        screenshot = driver.get_screenshot_as_png()
+        driver.quit()
+        return screenshot
     except Exception as e:
+        return f"Error: {str(e)}"
+def extract_text_content(url, headless=True):
+    """Extract text content from URL"""
     try:
+        driver = setup_driver(headless)
+        driver.get(url)
+        time.sleep(2)
+        # Get page title
+        title = driver.title
+        # Get page source and extract text
+        html = driver.page_source
+        # Simple text extraction (remove HTML tags)
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(html, 'html.parser')
+        text = soup.get_text()[:1000] + "..." if len(soup.get_text()) > 1000 else soup.get_text()
+        driver.quit()
+        return f"Title: {title}\n\nContent:\n{text}"
     except Exception as e:
+        return f"Error: {str(e)}"
+def batch_navigate(urls, headless=True):
+    """Navigate to multiple URLs and return results"""
+    results = []
+    for i, url in enumerate(urls.split('\n')):
+        url = url.strip()
+        if not url:
+            continue
         try:
+            driver = setup_driver(headless)
+            driver.get(url)
+            time.sleep(2)
+            # Get basic info
+            title = driver.title
+            current_url = driver.current_url
+            screenshot = driver.get_screenshot_as_png()
+            results.append({
+                "url": url,
+                "title": title,
+                "current_url": current_url,
+                "screenshot": screenshot
+            })
+            driver.quit()
         except Exception as e:
+            results.append({
+                "url": url,
+                "error": str(e)
+            })
+    return results
 # Gradio Interface
+def main():
+    with gr.Blocks(title="Browser Automation Tool") as demo:
         gr.Markdown("# 🌐 Browser Automation Tool")
+        with gr.Tab("Single URL"):
             with gr.Row():
+                with gr.Column():
+                    url_input = gr.Textbox(label="URL to visit", placeholder="https://example.com")
+                    headless = gr.Checkbox(label="Headless mode", value=True)
+                    window_size = gr.Textbox(label="Window size", value="1920,1080")
+                with gr.Column():
+                    navigate_btn = gr.Button("Navigate & Screenshot", variant="primary")
+                    extract_btn = gr.Button("Extract Content")
+            screenshot_output = gr.Image(label="Screenshot")
+            content_output = gr.Textbox(label="Content", lines=10)
+        with gr.Tab("Batch Processing"):
             with gr.Row():
                 with gr.Column():
+                    urls_input = gr.Textbox(
+                        label="URLs (one per line)",
+                        placeholder="https://example.com\nhttps://google.com",
+                        lines=5
+                    )
+                    batch_headless = gr.Checkbox(label="Headless mode", value=True)
+                    batch_btn = gr.Button("Process URLs", variant="primary")
                 with gr.Column():
+                    batch_results = gr.JSON(label="Results")
+        # Button handlers
+        navigate_btn.click(
+            fn=navigate_and_screenshot,
+            inputs=[url_input, headless, window_size],
+            outputs=[screenshot_output]
         )
+        extract_btn.click(
+            fn=extract_text_content,
+            inputs=[url_input, headless],
+            outputs=[content_output]
+        )
+        batch_btn.click(
+            fn=batch_navigate,
+            inputs=[urls_input, batch_headless],
+            outputs=[batch_results]
+        )
+        gr.Markdown("""
+        ## Features
+        - 🌐 **Web Browser Control**: Navigate websites programmatically
+        - 📸 **Screenshot Capture**: Take screenshots of any webpage
+        - 🔍 **Content Extraction**: Extract text content from HTML
+        - ⚡ **Batch Processing**: Process multiple URLs at once
+        - 🔧 **Configurable Options**: Headless mode, window sizes
+        """)
     return demo
 if __name__ == "__main__":
+    demo = main()
+    demo.launch(server_name="0.0.0.0", server_port=7860)