Spaces:
Sleeping
Sleeping
| """ | |
| Browser MCP Server | |
| This module provides MCP server functionality for browser automation and interaction. | |
| It handles tasks such as web scraping, form submission, and automated browsing. | |
| Main functions: | |
| - browse_url: Opens a URL and performs specified actions | |
| - submit_form: Fills and submits forms on web pages | |
| """ | |
| import json | |
| import os | |
| import sys | |
| import traceback | |
| from browser_use import Agent | |
| from browser_use.agent.views import AgentHistoryList | |
| from browser_use.browser.browser import Browser, BrowserConfig | |
| from browser_use.browser.context import BrowserContext, BrowserContextConfig | |
| from dotenv import load_dotenv | |
| from langchain_openai import ChatOpenAI | |
| from mcp.server.fastmcp import FastMCP | |
| from pydantic import Field | |
| from aworld.logs.util import logger | |
| mcp = FastMCP("browser-server") | |
| browser_system_prompt = """ | |
| ===== NAVIGATION STRATEGY ===== | |
| 1. START: Navigate to the most authoritative source for this information | |
| - For general queries: Use Google with specific search terms | |
| - For known sources: Go directly to the relevant website | |
| 2. EVALUATE: Assess each page methodically | |
| - Scan headings and highlighted text first | |
| - Look for data tables, charts, or official statistics | |
| - Check publication dates for timeliness | |
| 3. EXTRACT: Capture exactly what's needed | |
| - Take screenshots of visual evidence (charts, tables, etc.) | |
| - Copy precise text that answers the query | |
| - Note source URLs for citation | |
| 4. DOWNLOAD: Save the most relevant file to local path for further processing | |
| - Save the text if possible for futher text reading and analysis | |
| - Save the image if possible for futher image reasoning analysis | |
| - Save the pdf if possible for futher pdf reading and analysis | |
| 5. ROBOT DETECTION: | |
| - If the page is a robot detection page, abort immediately | |
| - Navigate to the most authoritative source for similar information instead | |
| ===== EFFICIENCY GUIDELINES ===== | |
| - Use specific search queries with key terms from the task | |
| - Avoid getting distracted by tangential information | |
| - If blocked by paywalls, try archive.org or similar alternatives | |
| - Document each significant finding clearly and concisely | |
| Your goal is to extract precisely the information needed with minimal browsing steps. | |
| """ | |
| async def browser_use( | |
| task: str = Field(description="The task to perform using the browser."), | |
| ) -> str: | |
| """ | |
| Perform browser actions using the browser-use package. | |
| Args: | |
| task (str): The task to perform using the browser. | |
| Returns: | |
| str: The result of the browser actions. | |
| """ | |
| browser = Browser( | |
| config=BrowserConfig( | |
| headless=False, | |
| new_context_config=BrowserContextConfig( | |
| disable_security=True, | |
| user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", | |
| minimum_wait_page_load_time=10, | |
| maximum_wait_page_load_time=30, | |
| ), | |
| ) | |
| ) | |
| browser_context = BrowserContext( | |
| config=BrowserContextConfig( | |
| trace_path=os.getenv("LOG_FILE_PATH" + "/browser_trace.log") | |
| ), | |
| browser=browser, | |
| ) | |
| agent = Agent( | |
| task=task, | |
| llm=ChatOpenAI( | |
| model=os.getenv("LLM_MODEL_NAME"), | |
| api_key=os.getenv("LLM_API_KEY"), | |
| base_url=os.getenv("LLM_BASE_URL"), | |
| model_name=os.getenv("LLM_MODEL_NAME"), | |
| openai_api_base=os.getenv("LLM_BASE_URL"), | |
| openai_api_key=os.getenv("LLM_API_KEY"), | |
| temperature=1.0, | |
| ), | |
| browser_context=browser_context, | |
| extend_system_message=browser_system_prompt, | |
| ) | |
| try: | |
| browser_execution: AgentHistoryList = await agent.run(max_steps=50) | |
| if ( | |
| browser_execution is not None | |
| and browser_execution.is_done() | |
| and browser_execution.is_successful() | |
| ): | |
| exec_trace = browser_execution.extracted_content() | |
| logger.info( | |
| ">>> 🌏 Browse Execution Succeed!\n" | |
| f">>> 💡 Result: {json.dumps(exec_trace, ensure_ascii=False, indent=4)}\n" | |
| ">>> 🌏 Browse Execution Succeed!\n" | |
| ) | |
| return browser_execution.final_result() | |
| else: | |
| return f"Browser execution failed for task: {task}" | |
| except Exception as e: | |
| logger.error(f"Browser execution failed: {traceback.format_exc()}") | |
| return f"Browser execution failed for task: {task} due to {str(e)}" | |
| finally: | |
| await browser.close() | |
| logger.info("Browser Closed!") | |
| def main(): | |
| load_dotenv() | |
| print("Starting Browser MCP Server...", file=sys.stderr) | |
| mcp.run(transport="stdio") | |
| # Make the module callable | |
| def __call__(): | |
| """ | |
| Make the module callable for uvx. | |
| This function is called when the module is executed directly. | |
| """ | |
| main() | |
| sys.modules[__name__].__call__ = __call__ | |
| # Run the server when the script is executed directly | |
| if __name__ == "__main__": | |
| main() | |