| from __future__ import annotations |
|
|
| from selenium import webdriver |
| from selenium.common.exceptions import TimeoutException, WebDriverException |
| from selenium.webdriver.chrome.service import Service as ChromeService |
| from selenium.webdriver.remote.webdriver import WebDriver |
| from selenium.webdriver.support.wait import WebDriverWait |
|
|
|
|
| DEFAULT_USER_AGENT = ( |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " |
| "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" |
| ) |
|
|
|
|
| def configure_browser(*, chrome_binary: str, chromedriver_path: str, page_timeout: int = 40) -> WebDriver: |
| options = webdriver.ChromeOptions() |
| options.binary_location = chrome_binary |
| options.page_load_strategy = "eager" |
| options.add_argument("--headless=new") |
| options.add_argument("--disable-gpu") |
| options.add_argument("--no-sandbox") |
| options.add_argument("--disable-dev-shm-usage") |
| options.add_argument("--disable-blink-features=AutomationControlled") |
| options.add_argument("--disable-background-networking") |
| options.add_argument("--disable-background-timer-throttling") |
| options.add_argument("--disable-backgrounding-occluded-windows") |
| options.add_argument("--disable-renderer-backgrounding") |
| options.add_argument("--disable-extensions") |
| options.add_argument("--disable-default-apps") |
| options.add_argument("--no-first-run") |
| options.add_argument("--no-default-browser-check") |
| options.add_argument("--mute-audio") |
| options.add_argument("--window-size=1440,1280") |
| options.add_argument("--lang=zh-CN") |
| options.add_argument(f"--user-agent={DEFAULT_USER_AGENT}") |
| options.add_argument("--remote-debugging-pipe") |
| options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) |
| options.add_experimental_option("useAutomationExtension", False) |
|
|
| service = ChromeService(executable_path=chromedriver_path) |
| driver = webdriver.Chrome(service=service, options=options) |
| driver.set_page_load_timeout(page_timeout) |
| driver.set_script_timeout(min(page_timeout, 20)) |
| driver.implicitly_wait(6) |
| driver.execute_cdp_cmd( |
| "Page.addScriptToEvaluateOnNewDocument", |
| { |
| "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});" |
| }, |
| ) |
| return driver |
|
|
|
|
| def open_with_recovery(driver: WebDriver, url: str) -> bool: |
| try: |
| driver.get(url) |
| return False |
| except TimeoutException: |
| try: |
| driver.execute_script("window.stop();") |
| except WebDriverException: |
| pass |
| return True |
|
|
|
|
| def wait_for_ready(driver_wait: WebDriverWait, *, allow_interactive: bool = True) -> str: |
| acceptable_states = {"complete", "interactive"} if allow_interactive else {"complete"} |
| driver_wait.until( |
| lambda web_driver: web_driver.execute_script("return document.readyState") in acceptable_states, |
| "The target page did not finish loading in time.", |
| ) |
| return str(driver_wait._driver.execute_script("return document.readyState"))
|
|
|