flow / src /services /hcaptcha.py
zbq111's picture
Upload 75 files
504b397 verified
"""HCaptcha solver using Playwright browser automation.
Supports HCaptchaTaskProxyless task type.
Visits the target page, interacts with the hCaptcha widget, and extracts the response token.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Any
from playwright.async_api import Browser, Playwright, async_playwright
from ..core.config import Config
log = logging.getLogger(__name__)
_STEALTH_JS = """
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
"""
_EXTRACT_HCAPTCHA_TOKEN_JS = """
() => {
const textarea = document.querySelector('[name="h-captcha-response"]')
|| document.querySelector('[name="g-recaptcha-response"]');
if (textarea && textarea.value && textarea.value.length > 20) {
return textarea.value;
}
if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') {
const resp = window.hcaptcha.getResponse();
if (resp && resp.length > 20) return resp;
}
return null;
}
"""
class HCaptchaSolver:
"""Solves HCaptchaTaskProxyless tasks via headless Chromium."""
def __init__(self, config: Config, browser: Browser | None = None) -> None:
self._config = config
self._playwright: Playwright | None = None
self._browser: Browser | None = browser
self._owns_browser = browser is None
async def start(self) -> None:
if self._browser is not None:
return
self._playwright = await async_playwright().start()
self._browser = await self._playwright.chromium.launch(
headless=self._config.browser_headless,
args=[
"--disable-blink-features=AutomationControlled",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
],
)
log.info("HCaptchaSolver browser started")
async def stop(self) -> None:
if self._owns_browser:
if self._browser:
await self._browser.close()
if self._playwright:
await self._playwright.stop()
log.info("HCaptchaSolver stopped")
async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
website_url = params["websiteURL"]
website_key = params["websiteKey"]
last_error: Exception | None = None
for attempt in range(self._config.captcha_retries):
try:
token = await self._solve_once(website_url, website_key)
return {"gRecaptchaResponse": token}
except Exception as exc:
last_error = exc
log.warning(
"HCaptcha attempt %d/%d failed: %s",
attempt + 1,
self._config.captcha_retries,
exc,
)
if attempt < self._config.captcha_retries - 1:
await asyncio.sleep(2)
raise RuntimeError(
f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}"
)
async def _solve_once(self, website_url: str, website_key: str) -> str:
assert self._browser is not None
context = await self._browser.new_context(
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
),
viewport={"width": 1920, "height": 1080},
locale="en-US",
)
page = await context.new_page()
await page.add_init_script(_STEALTH_JS)
try:
timeout_ms = self._config.browser_timeout * 1000
await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
await page.mouse.move(400, 300)
await asyncio.sleep(1)
# Click only the checkbox iframe — match by specific title to avoid the challenge iframe
iframe_element = page.frame_locator(
'iframe[title="Widget containing checkbox for hCaptcha security challenge"]'
)
checkbox = iframe_element.locator("#checkbox")
await checkbox.click(timeout=10_000)
# Wait for token — may require challenge completion; poll up to 30s
for _ in range(6):
await asyncio.sleep(5)
token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS)
if isinstance(token, str) and len(token) > 20:
break
else:
token = None
if not isinstance(token, str) or len(token) < 20:
raise RuntimeError(f"Invalid hCaptcha token: {token!r}")
log.info("Got hCaptcha token (len=%d)", len(token))
return token
finally:
await context.close()