#!/usr/bin/env python3 """Browser MCP v2 — Playwright Chromium + content cache + resource blocking.""" import asyncio, base64, json, os, subprocess, shutil, threading from pathlib import Path from typing import Optional from cachetools import TTLCache SCRIPTS_DIR = Path("/scripts") SCRIPTS_DIR.mkdir(exist_ok=True) # Content cache: URL → text/html (5-min TTL, max 128 pages) _page_cache: TTLCache = TTLCache(maxsize=128, ttl=300) _cache_lock = asyncio.Lock() import mcp.types as types from mcp.server import Server from mcp.server.sse import SseServerTransport from starlette.applications import Starlette from starlette.requests import Request from starlette.responses import Response from starlette.routing import Mount, Route import uvicorn from playwright.async_api import async_playwright, Browser, BrowserContext, Page mcp_server = Server("browser-mcp") sse = SseServerTransport("/browser/messages/") # Optimized Chrome flags — sourced from Playwright Docker + headless benchmarks _CHROME_FLAGS = [ "--headless=new", "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", # critical in Docker: /dev/shm is only 64 MB "--disable-gpu", "--disable-software-rasterizer", "--disable-extensions", "--disable-background-networking", # stops background DNS/resource prefetch "--disable-backgrounding-occluded-windows", "--disable-renderer-backgrounding", "--disable-background-timer-throttling", "--no-first-run", "--no-zygote", "--mute-audio", "--hide-scrollbars", "--window-size=1280,720", ] # Resource types to block when fast_mode=True on navigate _BLOCK_TYPES = {"image", "media", "font", "stylesheet"} class BrowserManager: def __init__(self): self.pw = None self.browser: Optional[Browser] = None self.context: Optional[BrowserContext] = None self.tabs: list[Page] = [] self.current: int = 0 self.console_msgs: list[str] = [] self._fast_mode: bool = False # block images/fonts/css self._ready = threading.Event() async def init(self): self.pw = await async_playwright().start() self.browser = await self.pw.chromium.launch( headless=True, args=_CHROME_FLAGS, ) self.context = await self.browser.new_context( viewport={"width": 1280, "height": 800}, user_agent=( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" ), ) await self._setup_routing(self.context) page = await self.context.new_page() page.on("console", lambda m: self.console_msgs.append(f"[{m.type}] {m.text}")) self.tabs = [page] self.current = 0 self._ready.set() async def _setup_routing(self, ctx: BrowserContext): async def route_handler(route): if self._fast_mode and route.request.resource_type in _BLOCK_TYPES: await route.abort() else: await route.continue_() await ctx.route("**/*", route_handler) async def page(self) -> Page: if not self.tabs: await self.init() return self.tabs[self.current] async def new_tab(self, url: str = "about:blank") -> Page: p = await self.context.new_page() p.on("console", lambda m: self.console_msgs.append(f"[{m.type}] {m.text}")) if url and url != "about:blank": await p.goto(url, timeout=30000, wait_until="domcontentloaded") self.tabs.append(p) self.current = len(self.tabs) - 1 return p bm = BrowserManager() def ok(t): return [types.TextContent(type="text", text=str(t))] def img(data: bytes): return [types.ImageContent(type="image", data=base64.b64encode(data).decode(), mimeType="image/png")] @mcp_server.list_tools() async def list_tools(): T = types.Tool return [ T(name="navigate", description=( "Navigate to a URL. Default wait: domcontentloaded (fast). " "Use networkidle only if page needs JS to fully render. " "fast_mode=true blocks images/fonts/CSS — 30-50% faster for text extraction." ), inputSchema={"type": "object", "properties": { "url": {"type": "string"}, "wait_until": {"type": "string", "default": "domcontentloaded", "enum": ["load", "domcontentloaded", "networkidle"]}, "fast_mode": {"type": "boolean", "default": False, "description": "Block images/fonts/CSS to speed up text-only tasks"}, }, "required": ["url"]}), T(name="screenshot", description="Take a screenshot of the current page or a specific element. Returns a PNG image.", inputSchema={"type": "object", "properties": { "full_page": {"type": "boolean", "default": False}, "selector": {"type": "string", "description": "CSS selector to screenshot just that element"} }, "required": []}), T(name="get_text", description="Get visible text of the page or element. Cached 5 min per URL.", inputSchema={"type": "object", "properties": { "selector": {"type": "string", "default": "body"}, "no_cache": {"type": "boolean", "default": False}, }, "required": []}), T(name="get_html", description="Get HTML source of the page or element. Cached 5 min per URL.", inputSchema={"type": "object", "properties": { "selector": {"type": "string", "default": "html"}, "outer": {"type": "boolean", "default": True}, "no_cache": {"type": "boolean", "default": False}, }, "required": []}), T(name="click", description="Click an element by CSS selector.", inputSchema={"type": "object", "properties": { "selector": {"type": "string"}, "button": {"type": "string", "default": "left", "enum": ["left", "right", "middle"]}, "count": {"type": "integer", "default": 1} }, "required": ["selector"]}), T(name="type_text", description="Type text into an input element.", inputSchema={"type": "object", "properties": { "selector": {"type": "string"}, "text": {"type": "string"}, "clear": {"type": "boolean", "default": False} }, "required": ["selector", "text"]}), T(name="fill", description="Fill a form field (clears first then sets value).", inputSchema={"type": "object", "properties": { "selector": {"type": "string"}, "value": {"type": "string"} }, "required": ["selector", "value"]}), T(name="select_option", description="Select an option in a