| """HCaptcha solver using Playwright browser automation. |
| |
| Supports HCaptchaTaskProxyless task type. |
| Visits the target page, interacts with the hCaptcha widget, and extracts the response token. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import asyncio |
| import logging |
| from typing import Any |
|
|
| from playwright.async_api import Browser, Playwright, async_playwright |
|
|
| from ..core.config import Config |
|
|
| log = logging.getLogger(__name__) |
|
|
| _STEALTH_JS = """ |
| Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); |
| Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); |
| Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]}); |
| window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}}; |
| """ |
|
|
| _EXTRACT_HCAPTCHA_TOKEN_JS = """ |
| () => { |
| const textarea = document.querySelector('[name="h-captcha-response"]') |
| || document.querySelector('[name="g-recaptcha-response"]'); |
| if (textarea && textarea.value && textarea.value.length > 20) { |
| return textarea.value; |
| } |
| if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') { |
| const resp = window.hcaptcha.getResponse(); |
| if (resp && resp.length > 20) return resp; |
| } |
| return null; |
| } |
| """ |
|
|
|
|
| class HCaptchaSolver: |
| """Solves HCaptchaTaskProxyless tasks via headless Chromium.""" |
|
|
| def __init__(self, config: Config, browser: Browser | None = None) -> None: |
| self._config = config |
| self._playwright: Playwright | None = None |
| self._browser: Browser | None = browser |
| self._owns_browser = browser is None |
|
|
| async def start(self) -> None: |
| if self._browser is not None: |
| return |
| self._playwright = await async_playwright().start() |
| self._browser = await self._playwright.chromium.launch( |
| headless=self._config.browser_headless, |
| args=[ |
| "--disable-blink-features=AutomationControlled", |
| "--no-sandbox", |
| "--disable-dev-shm-usage", |
| "--disable-gpu", |
| ], |
| ) |
| log.info("HCaptchaSolver browser started") |
|
|
| async def stop(self) -> None: |
| if self._owns_browser: |
| if self._browser: |
| await self._browser.close() |
| if self._playwright: |
| await self._playwright.stop() |
| log.info("HCaptchaSolver stopped") |
|
|
| async def solve(self, params: dict[str, Any]) -> dict[str, Any]: |
| website_url = params["websiteURL"] |
| website_key = params["websiteKey"] |
|
|
| last_error: Exception | None = None |
| for attempt in range(self._config.captcha_retries): |
| try: |
| token = await self._solve_once(website_url, website_key) |
| return {"gRecaptchaResponse": token} |
| except Exception as exc: |
| last_error = exc |
| log.warning( |
| "HCaptcha attempt %d/%d failed: %s", |
| attempt + 1, |
| self._config.captcha_retries, |
| exc, |
| ) |
| if attempt < self._config.captcha_retries - 1: |
| await asyncio.sleep(2) |
|
|
| raise RuntimeError( |
| f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}" |
| ) |
|
|
| async def _solve_once(self, website_url: str, website_key: str) -> str: |
| assert self._browser is not None |
|
|
| context = await self._browser.new_context( |
| user_agent=( |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
| "AppleWebKit/537.36 (KHTML, like Gecko) " |
| "Chrome/131.0.0.0 Safari/537.36" |
| ), |
| viewport={"width": 1920, "height": 1080}, |
| locale="en-US", |
| ) |
| page = await context.new_page() |
| await page.add_init_script(_STEALTH_JS) |
|
|
| try: |
| timeout_ms = self._config.browser_timeout * 1000 |
| await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms) |
|
|
| await page.mouse.move(400, 300) |
| await asyncio.sleep(1) |
|
|
| |
| iframe_element = page.frame_locator( |
| 'iframe[title="Widget containing checkbox for hCaptcha security challenge"]' |
| ) |
| checkbox = iframe_element.locator("#checkbox") |
| await checkbox.click(timeout=10_000) |
|
|
| |
| for _ in range(6): |
| await asyncio.sleep(5) |
| token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS) |
| if isinstance(token, str) and len(token) > 20: |
| break |
| else: |
| token = None |
|
|
| if not isinstance(token, str) or len(token) < 20: |
| raise RuntimeError(f"Invalid hCaptcha token: {token!r}") |
|
|
| log.info("Got hCaptcha token (len=%d)", len(token)) |
| return token |
| finally: |
| await context.close() |
|
|