File size: 5,116 Bytes
504b397 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | """HCaptcha solver using Playwright browser automation.
Supports HCaptchaTaskProxyless task type.
Visits the target page, interacts with the hCaptcha widget, and extracts the response token.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any
from playwright.async_api import Browser, Playwright, async_playwright
from ..core.config import Config
log = logging.getLogger(__name__)
_STEALTH_JS = """
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
"""
_EXTRACT_HCAPTCHA_TOKEN_JS = """
() => {
const textarea = document.querySelector('[name="h-captcha-response"]')
|| document.querySelector('[name="g-recaptcha-response"]');
if (textarea && textarea.value && textarea.value.length > 20) {
return textarea.value;
}
if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') {
const resp = window.hcaptcha.getResponse();
if (resp && resp.length > 20) return resp;
}
return null;
}
"""
class HCaptchaSolver:
"""Solves HCaptchaTaskProxyless tasks via headless Chromium."""
def __init__(self, config: Config, browser: Browser | None = None) -> None:
self._config = config
self._playwright: Playwright | None = None
self._browser: Browser | None = browser
self._owns_browser = browser is None
async def start(self) -> None:
if self._browser is not None:
return
self._playwright = await async_playwright().start()
self._browser = await self._playwright.chromium.launch(
headless=self._config.browser_headless,
args=[
"--disable-blink-features=AutomationControlled",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
],
)
log.info("HCaptchaSolver browser started")
async def stop(self) -> None:
if self._owns_browser:
if self._browser:
await self._browser.close()
if self._playwright:
await self._playwright.stop()
log.info("HCaptchaSolver stopped")
async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
website_url = params["websiteURL"]
website_key = params["websiteKey"]
last_error: Exception | None = None
for attempt in range(self._config.captcha_retries):
try:
token = await self._solve_once(website_url, website_key)
return {"gRecaptchaResponse": token}
except Exception as exc:
last_error = exc
log.warning(
"HCaptcha attempt %d/%d failed: %s",
attempt + 1,
self._config.captcha_retries,
exc,
)
if attempt < self._config.captcha_retries - 1:
await asyncio.sleep(2)
raise RuntimeError(
f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}"
)
async def _solve_once(self, website_url: str, website_key: str) -> str:
assert self._browser is not None
context = await self._browser.new_context(
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
),
viewport={"width": 1920, "height": 1080},
locale="en-US",
)
page = await context.new_page()
await page.add_init_script(_STEALTH_JS)
try:
timeout_ms = self._config.browser_timeout * 1000
await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
await page.mouse.move(400, 300)
await asyncio.sleep(1)
# Click only the checkbox iframe — match by specific title to avoid the challenge iframe
iframe_element = page.frame_locator(
'iframe[title="Widget containing checkbox for hCaptcha security challenge"]'
)
checkbox = iframe_element.locator("#checkbox")
await checkbox.click(timeout=10_000)
# Wait for token — may require challenge completion; poll up to 30s
for _ in range(6):
await asyncio.sleep(5)
token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS)
if isinstance(token, str) and len(token) > 20:
break
else:
token = None
if not isinstance(token, str) or len(token) < 20:
raise RuntimeError(f"Invalid hCaptcha token: {token!r}")
log.info("Got hCaptcha token (len=%d)", len(token))
return token
finally:
await context.close()
|