from __future__ import annotations from selenium import webdriver from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.support.wait import WebDriverWait DEFAULT_USER_AGENT = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" ) def configure_browser(*, chrome_binary: str, chromedriver_path: str, page_timeout: int = 40) -> WebDriver: options = webdriver.ChromeOptions() options.binary_location = chrome_binary options.page_load_strategy = "eager" options.add_argument("--headless=new") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("--disable-background-networking") options.add_argument("--disable-background-timer-throttling") options.add_argument("--disable-backgrounding-occluded-windows") options.add_argument("--disable-renderer-backgrounding") options.add_argument("--disable-extensions") options.add_argument("--disable-default-apps") options.add_argument("--no-first-run") options.add_argument("--no-default-browser-check") options.add_argument("--mute-audio") options.add_argument("--window-size=1440,1280") options.add_argument("--lang=zh-CN") options.add_argument(f"--user-agent={DEFAULT_USER_AGENT}") options.add_argument("--remote-debugging-pipe") options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) options.add_experimental_option("useAutomationExtension", False) service = ChromeService(executable_path=chromedriver_path) driver = webdriver.Chrome(service=service, options=options) driver.set_page_load_timeout(page_timeout) driver.set_script_timeout(min(page_timeout, 20)) driver.implicitly_wait(6) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});" }, ) return driver def open_with_recovery(driver: WebDriver, url: str) -> bool: try: driver.get(url) return False except TimeoutException: try: driver.execute_script("window.stop();") except WebDriverException: pass return True def wait_for_ready(driver_wait: WebDriverWait, *, allow_interactive: bool = True) -> str: acceptable_states = {"complete", "interactive"} if allow_interactive else {"complete"} driver_wait.until( lambda web_driver: web_driver.execute_script("return document.readyState") in acceptable_states, "The target page did not finish loading in time.", ) return str(driver_wait._driver.execute_script("return document.readyState"))