File size: 8,007 Bytes
db4810d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
"""
Playwright browser on steroids.
"""
import asyncio
import gc
import logging
from dataclasses import dataclass, field
from playwright._impl._api_structures import ProxySettings
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from browser_use.utils import time_execution_async
logger = logging.getLogger(__name__)
@dataclass
class BrowserConfig:
r"""
Configuration for the Browser.
Default values:
headless: True
Whether to run browser in headless mode
disable_security: True
Disable browser security features
extra_chromium_args: []
Extra arguments to pass to the browser
wss_url: None
Connect to a browser instance via WebSocket
cdp_url: None
Connect to a browser instance via CDP
chrome_instance_path: None
Path to a Chrome instance to use to connect to your normal browser
e.g. '/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome'
"""
headless: bool = False
disable_security: bool = True
extra_chromium_args: list[str] = field(default_factory=list)
chrome_instance_path: str | None = None
wss_url: str | None = None
cdp_url: str | None = None
proxy: ProxySettings | None = field(default=None)
new_context_config: BrowserContextConfig = field(default_factory=BrowserContextConfig)
_force_keep_browser_alive: bool = False
# @singleton: TODO - think about id singleton makes sense here
# @dev By default this is a singleton, but you can create multiple instances if you need to.
class Browser:
"""
Playwright browser on steroids.
This is persistant browser factory that can spawn multiple browser contexts.
It is recommended to use only one instance of Browser per your application (RAM usage will grow otherwise).
"""
def __init__(
self,
config: BrowserConfig = BrowserConfig(),
):
logger.debug('Initializing new browser')
self.config = config
self.playwright: Playwright | None = None
self.playwright_browser: PlaywrightBrowser | None = None
self.disable_security_args = []
if self.config.disable_security:
self.disable_security_args = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-features=IsolateOrigins,site-per-process',
]
async def new_context(self, config: BrowserContextConfig = BrowserContextConfig()) -> BrowserContext:
"""Create a browser context"""
return BrowserContext(config=config, browser=self)
async def get_playwright_browser(self) -> PlaywrightBrowser:
"""Get a browser context"""
if self.playwright_browser is None:
return await self._init()
return self.playwright_browser
@time_execution_async('--init (browser)')
async def _init(self):
"""Initialize the browser session"""
playwright = await async_playwright().start()
browser = await self._setup_browser(playwright)
self.playwright = playwright
self.playwright_browser = browser
return self.playwright_browser
async def _setup_cdp(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
if not self.config.cdp_url:
raise ValueError('CDP URL is required')
logger.info(f'Connecting to remote browser via CDP {self.config.cdp_url}')
browser = await playwright.chromium.connect_over_cdp(self.config.cdp_url)
return browser
async def _setup_wss(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
if not self.config.wss_url:
raise ValueError('WSS URL is required')
logger.info(f'Connecting to remote browser via WSS {self.config.wss_url}')
browser = await playwright.chromium.connect(self.config.wss_url)
return browser
async def _setup_browser_with_instance(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
if not self.config.chrome_instance_path:
raise ValueError('Chrome instance path is required')
import subprocess
import requests
try:
# Check if browser is already running
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
logger.info('Reusing existing Chrome instance')
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except requests.ConnectionError:
logger.debug('No existing Chrome instance found, starting a new one')
# Start a new Chrome instance
subprocess.Popen(
[
self.config.chrome_instance_path,
'--remote-debugging-port=9222',
]
+ self.config.extra_chromium_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Attempt to connect again after starting a new instance
for _ in range(10):
try:
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
break
except requests.ConnectionError:
pass
await asyncio.sleep(1)
# Attempt to connect again after starting a new instance
try:
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except Exception as e:
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
raise RuntimeError(
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
)
async def _setup_standard_browser(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
browser = await playwright.chromium.launch(
headless=self.config.headless,
args=[
'--no-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--disable-background-timer-throttling',
'--disable-popup-blocking',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-window-activation',
'--disable-focus-on-load',
'--no-first-run',
'--no-default-browser-check',
'--no-startup-window',
'--window-position=0,0',
# '--window-size=1280,1000',
]
+ self.disable_security_args
+ self.config.extra_chromium_args,
proxy=self.config.proxy,
)
# convert to Browser
return browser
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
try:
if self.config.cdp_url:
return await self._setup_cdp(playwright)
if self.config.wss_url:
return await self._setup_wss(playwright)
elif self.config.chrome_instance_path:
return await self._setup_browser_with_instance(playwright)
else:
return await self._setup_standard_browser(playwright)
except Exception as e:
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
raise
async def close(self):
"""Close the browser instance"""
try:
if not self.config._force_keep_browser_alive:
if self.playwright_browser:
await self.playwright_browser.close()
del self.playwright_browser
if self.playwright:
await self.playwright.stop()
del self.playwright
except Exception as e:
logger.debug(f'Failed to close browser properly: {e}')
finally:
self.playwright_browser = None
self.playwright = None
gc.collect()
def __del__(self):
"""Async cleanup when object is destroyed"""
try:
if self.playwright_browser or self.playwright:
loop = asyncio.get_running_loop()
if loop.is_running():
loop.create_task(self.close())
else:
asyncio.run(self.close())
except Exception as e:
logger.debug(f'Failed to cleanup browser in destructor: {e}')
|