Spaces:

dragg2
/

mycaptcha

Running

App Files Files Community

dragg2 commited on 25 days ago

Commit

3a04f21

verified ·

1 Parent(s): d47a09c

Upload 18 files

Browse files

Files changed (18) hide show

Dockerfile +13 -0
main.py +19 -0
src/__init__.py +1 -0
src/api/__init__.py +1 -0
src/api/routes.py +171 -0
src/core/__init__.py +1 -0
src/core/config.py +106 -0
src/main.py +132 -0
src/models/__init__.py +1 -0
src/models/task.py +70 -0
src/services/__init__.py +1 -0
src/services/classification.py +203 -0
src/services/hcaptcha.py +142 -0
src/services/recaptcha_v2.py +278 -0
src/services/recaptcha_v3.py +160 -0
src/services/recognition.py +171 -0
src/services/task_manager.py +92 -0
src/services/turnstile.py +144 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt
+RUN playwright install --with-deps chromium
+COPY . .
+EXPOSE 8000
+CMD ["python", "main.py"]

main.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Service entrypoint compatible with Render-style deployment."""
+import os
+import uvicorn
+from src.main import app
+if __name__ == "__main__":
+    from src.core.config import config
+    port = int(os.environ.get("PORT", config.server_port))
+    uvicorn.run(
+        "src.main:app",
+        host=config.server_host,
+        port=port,
+        reload=False,
+    )

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Application package."""

src/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """API package."""

src/api/routes.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""YesCaptcha / AntiCaptcha compatible HTTP routes."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter
+from ..core.config import config
+from ..models.task import (
+    CreateTaskRequest,
+    CreateTaskResponse,
+    GetBalanceRequest,
+    GetBalanceResponse,
+    GetTaskResultRequest,
+    GetTaskResultResponse,
+    SolutionObject,
+)
+from ..services.task_manager import TaskStatus, task_manager
+log = logging.getLogger(__name__)
+router = APIRouter()
+_BROWSER_TASK_TYPES = {
+    "RecaptchaV3TaskProxyless",
+    "RecaptchaV3TaskProxylessM1",
+    "RecaptchaV3TaskProxylessM1S7",
+    "RecaptchaV3TaskProxylessM1S9",
+    "RecaptchaV3EnterpriseTask",
+    "RecaptchaV3EnterpriseTaskM1",
+    "NoCaptchaTaskProxyless",
+    "RecaptchaV2TaskProxyless",
+    "RecaptchaV2EnterpriseTaskProxyless",
+    "HCaptchaTaskProxyless",
+    "TurnstileTaskProxyless",
+    "TurnstileTaskProxylessM1",
+}
+_IMAGE_TASK_TYPES = {
+    "ImageToTextTask",
+    "ImageToTextTaskMuggle",
+    "ImageToTextTaskM1",
+}
+_CLASSIFICATION_TASK_TYPES = {
+    "HCaptchaClassification",
+    "ReCaptchaV2Classification",
+    "FunCaptchaClassification",
+    "AwsClassification",
+}
+def _check_client_key(client_key: str) -> CreateTaskResponse | None:
+    """Return an error response if the client key is invalid, else None."""
+    if config.client_key and client_key != config.client_key:
+        return CreateTaskResponse(
+            errorId=1,
+            errorCode="ERROR_KEY_DOES_NOT_EXIST",
+            errorDescription="Invalid clientKey",
+        )
+    return None
+@router.post("/createTask", response_model=CreateTaskResponse)
+async def create_task(request: CreateTaskRequest) -> CreateTaskResponse:
+    err = _check_client_key(request.clientKey)
+    if err:
+        return err
+    supported = task_manager.supported_types()
+    if request.task.type not in supported:
+        return CreateTaskResponse(
+            errorId=1,
+            errorCode="ERROR_TASK_NOT_SUPPORTED",
+            errorDescription=f"Task type '{request.task.type}' is not supported. "
+            f"Supported: {supported}",
+        )
+    # Validate required fields for browser-based tasks
+    if request.task.type in _BROWSER_TASK_TYPES:
+        if not request.task.websiteURL or not request.task.websiteKey:
+            return CreateTaskResponse(
+                errorId=1,
+                errorCode="ERROR_TASK_PROPERTY_EMPTY",
+                errorDescription="websiteURL and websiteKey are required",
+            )
+    # Validate required fields for ImageToText tasks
+    if request.task.type in _IMAGE_TASK_TYPES:
+        if not request.task.body:
+            return CreateTaskResponse(
+                errorId=1,
+                errorCode="ERROR_TASK_PROPERTY_EMPTY",
+                errorDescription="body (base64 image) is required",
+            )
+    # Validate required fields for classification tasks
+    if request.task.type in _CLASSIFICATION_TASK_TYPES:
+        has_image = (
+            request.task.image
+            or request.task.images
+            or request.task.body
+            or request.task.queries
+        )
+        if not has_image:
+            return CreateTaskResponse(
+                errorId=1,
+                errorCode="ERROR_TASK_PROPERTY_EMPTY",
+                errorDescription="image data is required for classification tasks",
+            )
+    params = request.task.model_dump(exclude_none=True)
+    task_id = task_manager.create_task(request.task.type, params)
+    log.info("Created task %s (type=%s)", task_id, request.task.type)
+    return CreateTaskResponse(errorId=0, taskId=task_id)
+@router.post("/getTaskResult", response_model=GetTaskResultResponse)
+async def get_task_result(
+    request: GetTaskResultRequest,
+) -> GetTaskResultResponse:
+    if config.client_key and request.clientKey != config.client_key:
+        return GetTaskResultResponse(
+            errorId=1,
+            errorCode="ERROR_KEY_DOES_NOT_EXIST",
+            errorDescription="Invalid clientKey",
+        )
+    task = task_manager.get_task(request.taskId)
+    if task is None:
+        return GetTaskResultResponse(
+            errorId=1,
+            errorCode="ERROR_NO_SUCH_CAPCHA_ID",
+            errorDescription="Task not found",
+        )
+    if task.status == TaskStatus.PROCESSING:
+        return GetTaskResultResponse(errorId=0, status="processing")
+    if task.status == TaskStatus.READY:
+        return GetTaskResultResponse(
+            errorId=0,
+            status="ready",
+            solution=SolutionObject(**(task.solution or {})),
+        )
+    return GetTaskResultResponse(
+        errorId=1,
+        errorCode=task.error_code or "ERROR_CAPTCHA_UNSOLVABLE",
+        errorDescription=task.error_description,
+    )
+@router.post("/getBalance", response_model=GetBalanceResponse)
+async def get_balance(request: GetBalanceRequest) -> GetBalanceResponse:
+    if config.client_key and request.clientKey != config.client_key:
+        return GetBalanceResponse(errorId=1, balance=0)
+    return GetBalanceResponse(errorId=0, balance=99999.0)
+@router.get("/api/v1/health")
+async def health() -> dict[str, object]:
+    return {
+        "status": "ok",
+        "supported_task_types": task_manager.supported_types(),
+        "browser_headless": config.browser_headless,
+        "cloud_model": config.cloud_model,
+        "local_model": config.local_model,
+    }

src/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Core application helpers."""

src/core/config.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""Environment-driven application configuration.
+Two model backends are supported:
+  Cloud model  — a remote OpenAI-compatible API (e.g. gpt-5.4 via a hosted
+                 endpoint).  Used as the powerful multimodal backbone for
+                 tasks like audio transcription.
+  Local model  — a self-hosted model served via SGLang, vLLM, or any
+                 OpenAI-compatible server (e.g. Qwen3.5-2B on localhost).
+                 Used for high-throughput image recognition / classification.
+Both backends expose ``/v1/chat/completions``; the only difference is the
+base URL, API key, and model name.
+"""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class Config:
+    server_host: str
+    server_port: int
+    # Auth: YesCaptcha clientKey
+    client_key: str | None
+    # ── Cloud model (remote API) ──
+    cloud_base_url: str
+    cloud_api_key: str
+    cloud_model: str
+    # ── Local model (self-hosted via SGLang / vLLM) ──
+    local_base_url: str
+    local_api_key: str
+    local_model: str
+    captcha_retries: int
+    captcha_timeout: int
+    # Playwright browser
+    browser_headless: bool
+    browser_timeout: int  # seconds
+    # ── Convenience aliases (backward-compat) ──
+    @property
+    def captcha_base_url(self) -> str:
+        return self.cloud_base_url
+    @property
+    def captcha_api_key(self) -> str:
+        return self.cloud_api_key
+    @property
+    def captcha_model(self) -> str:
+        return self.cloud_model
+    @property
+    def captcha_multimodal_model(self) -> str:
+        return self.local_model
+def load_config() -> Config:
+    return Config(
+        server_host=os.environ.get("SERVER_HOST", "0.0.0.0"),
+        server_port=int(os.environ.get("SERVER_PORT", "8000")),
+        client_key=os.environ.get("CLIENT_KEY", "").strip() or None,
+        # Cloud model
+        cloud_base_url=os.environ.get(
+            "CLOUD_BASE_URL",
+            os.environ.get("CAPTCHA_BASE_URL", "https://your-openai-compatible-endpoint/v1"),
+        ),
+        cloud_api_key=os.environ.get(
+            "CLOUD_API_KEY",
+            os.environ.get("CAPTCHA_API_KEY", ""),
+        ),
+        cloud_model=os.environ.get(
+            "CLOUD_MODEL",
+            os.environ.get("CAPTCHA_MODEL", "gpt-5.4"),
+        ),
+        # Local model
+        local_base_url=os.environ.get(
+            "LOCAL_BASE_URL",
+            os.environ.get("CAPTCHA_BASE_URL", "http://localhost:30000/v1"),
+        ),
+        local_api_key=os.environ.get(
+            "LOCAL_API_KEY",
+            os.environ.get("CAPTCHA_API_KEY", "EMPTY"),
+        ),
+        local_model=os.environ.get(
+            "LOCAL_MODEL",
+            os.environ.get("CAPTCHA_MULTIMODAL_MODEL", "Qwen/Qwen3.5-2B"),
+        ),
+        captcha_retries=int(os.environ.get("CAPTCHA_RETRIES", "3")),
+        captcha_timeout=int(os.environ.get("CAPTCHA_TIMEOUT", "30")),
+        browser_headless=os.environ.get("BROWSER_HEADLESS", "true").strip().lower()
+        in {"1", "true", "yes"},
+        browser_timeout=int(os.environ.get("BROWSER_TIMEOUT", "30")),
+    )
+config = load_config()

src/main.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""FastAPI application with Playwright lifecycle management."""
+from __future__ import annotations
+import logging
+from contextlib import asynccontextmanager
+from typing import AsyncIterator
+from fastapi import FastAPI
+from .api.routes import router
+from .core.config import config
+from .services.classification import ClassificationSolver
+from .services.hcaptcha import HCaptchaSolver
+from .services.recognition import CaptchaRecognizer
+from .services.recaptcha_v2 import RecaptchaV2Solver
+from .services.recaptcha_v3 import RecaptchaV3Solver
+from .services.task_manager import task_manager
+from .services.turnstile import TurnstileSolver
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+log = logging.getLogger(__name__)
+_RECAPTCHA_V3_TYPES = [
+    "RecaptchaV3TaskProxyless",
+    "RecaptchaV3TaskProxylessM1",
+    "RecaptchaV3TaskProxylessM1S7",
+    "RecaptchaV3TaskProxylessM1S9",
+    "RecaptchaV3EnterpriseTask",
+    "RecaptchaV3EnterpriseTaskM1",
+]
+_RECAPTCHA_V2_TYPES = [
+    "NoCaptchaTaskProxyless",
+    "RecaptchaV2TaskProxyless",
+    "RecaptchaV2EnterpriseTaskProxyless",
+]
+_HCAPTCHA_TYPES = [
+    "HCaptchaTaskProxyless",
+]
+_TURNSTILE_TYPES = [
+    "TurnstileTaskProxyless",
+    "TurnstileTaskProxylessM1",
+]
+_CLASSIFICATION_TYPES = [
+    "HCaptchaClassification",
+    "ReCaptchaV2Classification",
+    "FunCaptchaClassification",
+    "AwsClassification",
+]
+_IMAGE_TEXT_TYPES = [
+    "ImageToTextTask",
+    "ImageToTextTaskMuggle",
+    "ImageToTextTaskM1",
+]
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncIterator[None]:
+    # ── startup ──
+    v3_solver = RecaptchaV3Solver(config)
+    await v3_solver.start()
+    for task_type in _RECAPTCHA_V3_TYPES:
+        task_manager.register_solver(task_type, v3_solver)
+    log.info("Registered reCAPTCHA v3 solver for types: %s", _RECAPTCHA_V3_TYPES)
+    v2_solver = RecaptchaV2Solver(config)
+    await v2_solver.start()
+    for task_type in _RECAPTCHA_V2_TYPES:
+        task_manager.register_solver(task_type, v2_solver)
+    log.info("Registered reCAPTCHA v2 solver for types: %s", _RECAPTCHA_V2_TYPES)
+    hcaptcha_solver = HCaptchaSolver(config)
+    await hcaptcha_solver.start()
+    for task_type in _HCAPTCHA_TYPES:
+        task_manager.register_solver(task_type, hcaptcha_solver)
+    log.info("Registered hCaptcha solver for types: %s", _HCAPTCHA_TYPES)
+    turnstile_solver = TurnstileSolver(config)
+    await turnstile_solver.start()
+    for task_type in _TURNSTILE_TYPES:
+        task_manager.register_solver(task_type, turnstile_solver)
+    log.info("Registered Turnstile solver for types: %s", _TURNSTILE_TYPES)
+    recognizer = CaptchaRecognizer(config)
+    for task_type in _IMAGE_TEXT_TYPES:
+        task_manager.register_solver(task_type, recognizer)
+    log.info("Registered image captcha recognizer for types: %s", _IMAGE_TEXT_TYPES)
+    classifier = ClassificationSolver(config)
+    for task_type in _CLASSIFICATION_TYPES:
+        task_manager.register_solver(task_type, classifier)
+    log.info("Registered classification solver for types: %s", _CLASSIFICATION_TYPES)
+    yield
+    # ── shutdown ──
+    await v3_solver.stop()
+    await v2_solver.stop()
+    await hcaptcha_solver.stop()
+    await turnstile_solver.stop()
+app = FastAPI(
+    title="Captcha Solver Service",
+    version="3.0.0",
+    description="YesCaptcha-compatible captcha solving service for flow2api.",
+    lifespan=lifespan,
+)
+app.include_router(router)
+@app.get("/")
+async def root() -> dict[str, object]:
+    return {
+        "service": "captcha-solver",
+        "version": "3.0.0",
+        "endpoints": {
+            "createTask": "/createTask",
+            "getTaskResult": "/getTaskResult",
+            "getBalance": "/getBalance",
+            "health": "/api/v1/health",
+        },
+        "supported_task_types": task_manager.supported_types(),
+    }

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Pydantic API models."""

src/models/task.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""YesCaptcha / AntiCaptcha compatible API models."""
+from __future__ import annotations
+from pydantic import BaseModel, Field
+# ── createTask ──────────────────────────────────────────────
+class TaskObject(BaseModel):
+    type: str
+    websiteURL: str | None = None
+    websiteKey: str | None = None
+    pageAction: str | None = None
+    minScore: float | None = None
+    isInvisible: bool | None = None
+    # Image captcha / classification fields
+    body: str | None = None
+    image: str | None = None
+    images: list[str] | None = None
+    question: str | None = None
+    queries: list[str] | str | None = None
+    project_name: str | None = None
+class CreateTaskRequest(BaseModel):
+    clientKey: str
+    task: TaskObject
+class CreateTaskResponse(BaseModel):
+    errorId: int = 0
+    taskId: str | None = None
+    errorCode: str | None = None
+    errorDescription: str | None = None
+# ── getTaskResult ───────────────────────────────────────────
+class GetTaskResultRequest(BaseModel):
+    clientKey: str
+    taskId: str
+class SolutionObject(BaseModel):
+    gRecaptchaResponse: str | None = None
+    text: str | None = None
+    token: str | None = None
+    objects: list[int] | None = None
+    answer: bool | list[int] | None = None
+    userAgent: str | None = None
+class GetTaskResultResponse(BaseModel):
+    errorId: int = 0
+    status: str | None = None
+    solution: SolutionObject | None = None
+    errorCode: str | None = None
+    errorDescription: str | None = None
+# ── getBalance ──────────────────────────────────────────────
+class GetBalanceRequest(BaseModel):
+    clientKey: str
+class GetBalanceResponse(BaseModel):
+    errorId: int = 0
+    balance: float = 99999.0

src/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Service layer package."""

src/services/classification.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""Image classification solvers for various captcha types.
+Supports HCaptchaClassification, ReCaptchaV2Classification,
+FunCaptchaClassification, and AwsClassification task types.
+All classification tasks send images + question text to an OpenAI-compatible
+vision model for analysis and return structured coordinate/index results.
+"""
+from __future__ import annotations
+import base64
+import io
+import json
+import logging
+import re
+from typing import Any
+from openai import AsyncOpenAI
+from PIL import Image
+from ..core.config import Config
+log = logging.getLogger(__name__)
+HCAPTCHA_SYSTEM_PROMPT = """\
+You are an image classification assistant for HCaptcha challenges.
+Given a question and one or more base64-encoded images, determine which images match the question.
+Return STRICT JSON only. No markdown, no extra text.
+For single-image questions (is this image X?):
+{"answer": true}  or  {"answer": false}
+For multi-image grid questions (select all images containing X):
+{"answer": [0, 2, 5]}
+where numbers are 0-indexed positions of matching images.
+Rules:
+- Return ONLY the JSON object, nothing else.
+- Be precise with your classification.
+"""
+RECAPTCHA_V2_SYSTEM_PROMPT = """\
+You are an image classification assistant for reCAPTCHA v2 challenges.
+Given a question and a grid image (3x3 or 4x4), identify which cells match the question.
+The image cells are numbered 0-8 (3x3) or 0-15 (4x4), left-to-right, top-to-bottom.
+Return STRICT JSON only:
+{"objects": [0, 3, 6]}
+where numbers are 0-indexed positions of matching cells.
+Rules:
+- Return ONLY the JSON object, nothing else.
+- If no cells match, return {"objects": []}.
+"""
+FUNCAPTCHA_SYSTEM_PROMPT = """\
+You are an image classification assistant for FunCaptcha challenges.
+Given a question and a grid image (typically 2x3 = 6 cells), identify which cell
+is the correct answer.
+Cells are numbered 0-5, left-to-right, top-to-bottom.
+Return STRICT JSON only:
+{"objects": [3]}
+where the number is the 0-indexed position of the correct cell.
+Rules:
+- Return ONLY the JSON object, nothing else.
+- Usually only one cell is correct.
+"""
+AWS_SYSTEM_PROMPT = """\
+You are an image classification assistant for AWS CAPTCHA challenges.
+Given a question and one or more images, identify the correct answer.
+Return STRICT JSON only:
+{"objects": [1]}
+where the number is the 0-indexed position of the matching image.
+Rules:
+- Return ONLY the JSON object, nothing else.
+"""
+class ClassificationSolver:
+    """Solves image classification captchas using a vision model."""
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._client = AsyncOpenAI(
+            base_url=config.local_base_url,
+            api_key=config.local_api_key,
+        )
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        task_type = params.get("type", "")
+        system_prompt = self._get_system_prompt(task_type)
+        question = params.get("question", "") or params.get("queries", "")
+        # Handle different image field names across task types
+        images = self._extract_images(params)
+        if not images:
+            raise ValueError("No image data provided")
+        result = await self._classify(system_prompt, question, images)
+        return result
+    @staticmethod
+    def _get_system_prompt(task_type: str) -> str:
+        prompts = {
+            "HCaptchaClassification": HCAPTCHA_SYSTEM_PROMPT,
+            "ReCaptchaV2Classification": RECAPTCHA_V2_SYSTEM_PROMPT,
+            "FunCaptchaClassification": FUNCAPTCHA_SYSTEM_PROMPT,
+            "AwsClassification": AWS_SYSTEM_PROMPT,
+        }
+        return prompts.get(task_type, RECAPTCHA_V2_SYSTEM_PROMPT)
+    @staticmethod
+    def _extract_images(params: dict[str, Any]) -> list[str]:
+        """Extract base64 image(s) from various param formats."""
+        images: list[str] = []
+        if "image" in params:
+            images.append(params["image"])
+        if "images" in params:
+            imgs = params["images"]
+            if isinstance(imgs, list):
+                images.extend(imgs)
+            elif isinstance(imgs, str):
+                images.append(imgs)
+        if "body" in params and not images:
+            images.append(params["body"])
+        # HCaptcha queries format: list of base64 strings
+        if "queries" in params and isinstance(params["queries"], list):
+            images.extend(params["queries"])
+        return images
+    @staticmethod
+    def _prepare_image(b64_data: str) -> str:
+        """Ensure image is properly formatted as a data URL."""
+        if b64_data.startswith("data:image"):
+            return b64_data
+        try:
+            img_bytes = base64.b64decode(b64_data)
+            img = Image.open(io.BytesIO(img_bytes))
+            fmt = img.format or "PNG"
+            mime = f"image/{fmt.lower()}"
+            return f"data:{mime};base64,{b64_data}"
+        except Exception:
+            return f"data:image/png;base64,{b64_data}"
+    async def _classify(
+        self, system_prompt: str, question: str, images: list[str]
+    ) -> dict[str, Any]:
+        content: list[dict[str, Any]] = []
+        for img_b64 in images:
+            data_url = self._prepare_image(img_b64)
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": data_url, "detail": "high"},
+            })
+        user_text = question if question else "Classify this captcha image."
+        content.append({"type": "text", "text": user_text})
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                response = await self._client.chat.completions.create(
+                    model=self._config.captcha_multimodal_model,
+                    temperature=0.05,
+                    max_tokens=512,
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": content},
+                    ],
+                )
+                raw = response.choices[0].message.content or ""
+                return self._parse_json(raw)
+            except Exception as exc:
+                last_error = exc
+                log.warning("Classification attempt %d failed: %s", attempt + 1, exc)
+        raise RuntimeError(
+            f"Classification failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    @staticmethod
+    def _parse_json(text: str) -> dict[str, Any]:
+        match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
+        cleaned = match.group(1) if match else text.strip()
+        data = json.loads(cleaned)
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected JSON object, got {type(data).__name__}")
+        return data

src/services/hcaptcha.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""HCaptcha solver using Playwright browser automation.
+Supports HCaptchaTaskProxyless task type.
+Visits the target page, interacts with the hCaptcha widget, and extracts the response token.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import Any
+from playwright.async_api import Browser, Playwright, async_playwright
+from ..core.config import Config
+log = logging.getLogger(__name__)
+_STEALTH_JS = """
+Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
+Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
+window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
+"""
+_EXTRACT_HCAPTCHA_TOKEN_JS = """
+() => {
+    const textarea = document.querySelector('[name="h-captcha-response"]')
+        || document.querySelector('[name="g-recaptcha-response"]');
+    if (textarea && textarea.value && textarea.value.length > 20) {
+        return textarea.value;
+    }
+    if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') {
+        const resp = window.hcaptcha.getResponse();
+        if (resp && resp.length > 20) return resp;
+    }
+    return null;
+}
+"""
+class HCaptchaSolver:
+    """Solves HCaptchaTaskProxyless tasks via headless Chromium."""
+    def __init__(self, config: Config, browser: Browser | None = None) -> None:
+        self._config = config
+        self._playwright: Playwright | None = None
+        self._browser: Browser | None = browser
+        self._owns_browser = browser is None
+    async def start(self) -> None:
+        if self._browser is not None:
+            return
+        self._playwright = await async_playwright().start()
+        self._browser = await self._playwright.chromium.launch(
+            headless=self._config.browser_headless,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+            ],
+        )
+        log.info("HCaptchaSolver browser started")
+    async def stop(self) -> None:
+        if self._owns_browser:
+            if self._browser:
+                await self._browser.close()
+            if self._playwright:
+                await self._playwright.stop()
+        log.info("HCaptchaSolver stopped")
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        website_url = params["websiteURL"]
+        website_key = params["websiteKey"]
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                token = await self._solve_once(website_url, website_key)
+                return {"gRecaptchaResponse": token}
+            except Exception as exc:
+                last_error = exc
+                log.warning(
+                    "HCaptcha attempt %d/%d failed: %s",
+                    attempt + 1,
+                    self._config.captcha_retries,
+                    exc,
+                )
+                if attempt < self._config.captcha_retries - 1:
+                    await asyncio.sleep(2)
+        raise RuntimeError(
+            f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    async def _solve_once(self, website_url: str, website_key: str) -> str:
+        assert self._browser is not None
+        context = await self._browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/131.0.0.0 Safari/537.36"
+            ),
+            viewport={"width": 1920, "height": 1080},
+            locale="en-US",
+        )
+        page = await context.new_page()
+        await page.add_init_script(_STEALTH_JS)
+        try:
+            timeout_ms = self._config.browser_timeout * 1000
+            await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
+            await page.mouse.move(400, 300)
+            await asyncio.sleep(1)
+            # Click only the checkbox iframe — match by specific title to avoid the challenge iframe
+            iframe_element = page.frame_locator(
+                'iframe[title="Widget containing checkbox for hCaptcha security challenge"]'
+            )
+            checkbox = iframe_element.locator("#checkbox")
+            await checkbox.click(timeout=10_000)
+            # Wait for token — may require challenge completion; poll up to 30s
+            for _ in range(6):
+                await asyncio.sleep(5)
+                token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS)
+                if isinstance(token, str) and len(token) > 20:
+                    break
+            else:
+                token = None
+            if not isinstance(token, str) or len(token) < 20:
+                raise RuntimeError(f"Invalid hCaptcha token: {token!r}")
+            log.info("Got hCaptcha token (len=%d)", len(token))
+            return token
+        finally:
+            await context.close()

src/services/recaptcha_v2.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""reCAPTCHA v2 solver using Playwright browser automation.
+Supports NoCaptchaTaskProxyless, RecaptchaV2TaskProxyless,
+and RecaptchaV2EnterpriseTaskProxyless task types.
+Strategy:
+  1. Visit the target page with a realistic browser context.
+  2. Click the reCAPTCHA checkbox.
+  3. If the challenge dialog appears (bot detected), switch to the audio
+     challenge, download the audio file, transcribe it via the configured
+     speech-to-text model, and submit the text.
+  4. Extract the gRecaptchaResponse token.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import Any
+import httpx
+from playwright.async_api import Browser, Playwright, async_playwright
+from ..core.config import Config
+log = logging.getLogger(__name__)
+_STEALTH_JS = """
+Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
+Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
+window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
+"""
+_EXTRACT_TOKEN_JS = """
+() => {
+    const textarea = document.querySelector('#g-recaptcha-response')
+        || document.querySelector('[name="g-recaptcha-response"]');
+    if (textarea && textarea.value && textarea.value.length > 20) {
+        return textarea.value;
+    }
+    const gr = window.grecaptcha?.enterprise || window.grecaptcha;
+    if (gr && typeof gr.getResponse === 'function') {
+        const resp = gr.getResponse();
+        if (resp && resp.length > 20) return resp;
+    }
+    return null;
+}
+"""
+class RecaptchaV2Solver:
+    """Solves reCAPTCHA v2 tasks via headless Chromium with checkbox clicking.
+    Falls back to the audio challenge path when Google presents a visual
+    challenge to the headless browser.
+    """
+    def __init__(self, config: Config, browser: Browser | None = None) -> None:
+        self._config = config
+        self._playwright: Playwright | None = None
+        self._browser: Browser | None = browser
+        self._owns_browser = browser is None
+    async def start(self) -> None:
+        if self._browser is not None:
+            return
+        self._playwright = await async_playwright().start()
+        self._browser = await self._playwright.chromium.launch(
+            headless=self._config.browser_headless,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+            ],
+        )
+        log.info("RecaptchaV2Solver browser started")
+    async def stop(self) -> None:
+        if self._owns_browser:
+            if self._browser:
+                await self._browser.close()
+            if self._playwright:
+                await self._playwright.stop()
+        log.info("RecaptchaV2Solver stopped")
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        website_url = params["websiteURL"]
+        website_key = params["websiteKey"]
+        is_invisible = params.get("isInvisible", False)
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                token = await self._solve_once(website_url, website_key, is_invisible)
+                return {"gRecaptchaResponse": token}
+            except Exception as exc:
+                last_error = exc
+                log.warning(
+                    "reCAPTCHA v2 attempt %d/%d failed: %s",
+                    attempt + 1,
+                    self._config.captcha_retries,
+                    exc,
+                )
+                if attempt < self._config.captcha_retries - 1:
+                    await asyncio.sleep(2)
+        raise RuntimeError(
+            f"reCAPTCHA v2 failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    async def _solve_once(
+        self, website_url: str, website_key: str, is_invisible: bool
+    ) -> str:
+        assert self._browser is not None
+        context = await self._browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/131.0.0.0 Safari/537.36"
+            ),
+            viewport={"width": 1920, "height": 1080},
+            locale="en-US",
+        )
+        page = await context.new_page()
+        await page.add_init_script(_STEALTH_JS)
+        try:
+            timeout_ms = self._config.browser_timeout * 1000
+            await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
+            await page.mouse.move(400, 300)
+            await asyncio.sleep(0.5)
+            if is_invisible:
+                token = await page.evaluate(
+                    """
+                    ([key]) => new Promise((resolve, reject) => {
+                        const gr = window.grecaptcha?.enterprise || window.grecaptcha;
+                        if (!gr) { reject(new Error('grecaptcha not found')); return; }
+                        gr.ready(() => {
+                            gr.execute(key).then(resolve).catch(reject);
+                        });
+                    })
+                    """,
+                    [website_key],
+                )
+            else:
+                token = await self._solve_checkbox(page)
+            if not isinstance(token, str) or len(token) < 20:
+                raise RuntimeError(f"Invalid reCAPTCHA v2 token: {token!r}")
+            log.info("Got reCAPTCHA v2 token (len=%d)", len(token))
+            return token
+        finally:
+            await context.close()
+    async def _solve_checkbox(self, page: Any) -> str | None:
+        """Click the reCAPTCHA checkbox. If a visual challenge appears, try audio path."""
+        # The checkbox iframe always has title="reCAPTCHA"
+        checkbox_frame = page.frame_locator('iframe[title="reCAPTCHA"]').first
+        checkbox = checkbox_frame.locator("#recaptcha-anchor")
+        await checkbox.click(timeout=10_000)
+        await asyncio.sleep(2)
+        # Check if token was issued immediately (low-risk sessions)
+        token = await page.evaluate(_EXTRACT_TOKEN_JS)
+        if isinstance(token, str) and len(token) > 20:
+            return token
+        # Challenge dialog appeared — try audio challenge path
+        log.info("reCAPTCHA challenge detected, attempting audio path")
+        try:
+            token = await self._solve_audio_challenge(page)
+        except Exception as exc:
+            log.warning("Audio challenge path failed: %s", exc)
+            token = None
+        return token
+    async def _solve_audio_challenge(self, page: Any) -> str | None:
+        """Click the audio button in the bframe and transcribe the audio."""
+        # The challenge bframe has title containing "recaptcha challenge"
+        bframe = page.frame_locator('iframe[title*="recaptcha challenge"]')
+        # Click the audio challenge button
+        audio_btn = bframe.locator("#recaptcha-audio-button")
+        await audio_btn.click(timeout=8_000)
+        # Wait for the audio challenge iframe to load its content
+        await asyncio.sleep(3)
+        # After clicking audio, a new bframe is rendered with the audio player
+        bframe = page.frame_locator('iframe[title*="recaptcha challenge"]')
+        # Get the audio source URL — try multiple selectors
+        audio_src = None
+        for selector in [
+            ".rc-audiochallenge-tdownload-link",
+            "a[href*='.mp3']",
+            "audio source",
+        ]:
+            try:
+                element = bframe.locator(selector).first
+                audio_src = await element.get_attribute("href", timeout=5_000) or await element.get_attribute("src", timeout=1_000)
+                if audio_src:
+                    break
+            except Exception:
+                continue
+        if not audio_src:
+            raise RuntimeError("Could not find audio challenge download link")
+        # Download the audio file
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.get(audio_src)
+            resp.raise_for_status()
+            audio_bytes = resp.content
+        # Transcribe via the vision/language model (base64 audio → text)
+        transcript = await self._transcribe_audio(audio_bytes)
+        log.info("Audio transcribed: %r", transcript[:40] if transcript else None)
+        if not transcript:
+            raise RuntimeError("Audio transcription returned empty result")
+        # Submit the transcript
+        audio_input = bframe.locator("#audio-response")
+        await audio_input.fill(transcript.strip().lower())
+        verify_btn = bframe.locator("#recaptcha-verify-button")
+        await verify_btn.click(timeout=8_000)
+        await asyncio.sleep(2)
+        return await page.evaluate(_EXTRACT_TOKEN_JS)
+    async def _transcribe_audio(self, audio_bytes: bytes) -> str | None:
+        """Send audio bytes to the OpenAI-compatible audio transcription endpoint."""
+        import base64
+        audio_b64 = base64.b64encode(audio_bytes).decode()
+        payload = {
+            "model": self._config.captcha_model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": (
+                                "This is a reCAPTCHA audio challenge. "
+                                "The audio contains spoken digits or words. "
+                                "Transcribe exactly what is spoken, digits only, "
+                                "separated by spaces. Reply with only the transcription."
+                            ),
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:audio/mp3;base64,{audio_b64}"},
+                        },
+                    ],
+                }
+            ],
+            "max_tokens": 50,
+            "temperature": 0,
+        }
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.post(
+                f"{self._config.captcha_base_url}/chat/completions",
+                headers={"Authorization": f"Bearer {self._config.captcha_api_key}"},
+                json=payload,
+            )
+            if resp.status_code != 200:
+                raise RuntimeError(f"Transcription API error {resp.status_code}: {resp.text[:200]}")
+            data = resp.json()
+            return data["choices"][0]["message"]["content"].strip()

src/services/recaptcha_v3.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""reCAPTCHA v3 solver using Playwright browser automation."""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import Any
+from playwright.async_api import Browser, Playwright, async_playwright
+from ..core.config import Config
+log = logging.getLogger(__name__)
+# JS executed inside the browser to obtain a reCAPTCHA v3 token.
+# Handles both standard and enterprise reCAPTCHA libraries.
+_EXECUTE_JS = """
+([key, action]) => new Promise((resolve, reject) => {
+    const gr = window.grecaptcha?.enterprise || window.grecaptcha;
+    if (gr && typeof gr.execute === 'function') {
+        gr.ready(() => {
+            gr.execute(key, {action}).then(resolve).catch(reject);
+        });
+        return;
+    }
+    // grecaptcha not loaded yet — inject the script ourselves
+    const script = document.createElement('script');
+    script.src = 'https://www.google.com/recaptcha/api.js?render=' + key;
+    script.onerror = () => reject(new Error('Failed to load reCAPTCHA script'));
+    script.onload = () => {
+        const g = window.grecaptcha;
+        if (!g) { reject(new Error('grecaptcha still undefined after script load')); return; }
+        g.ready(() => {
+            g.execute(key, {action}).then(resolve).catch(reject);
+        });
+    };
+    document.head.appendChild(script);
+})
+"""
+# Basic anti-detection init script
+_STEALTH_JS = """
+Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
+Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
+window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
+"""
+class RecaptchaV3Solver:
+    """Solves RecaptchaV3TaskProxyless tasks via headless Chromium."""
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._playwright: Playwright | None = None
+        self._browser: Browser | None = None
+    async def start(self) -> None:
+        self._playwright = await async_playwright().start()
+        self._browser = await self._playwright.chromium.launch(
+            headless=self._config.browser_headless,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+            ],
+        )
+        log.info(
+            "Playwright browser started (headless=%s)", self._config.browser_headless
+        )
+    async def stop(self) -> None:
+        if self._browser:
+            await self._browser.close()
+        if self._playwright:
+            await self._playwright.stop()
+        log.info("Playwright browser stopped")
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        website_url = params["websiteURL"]
+        website_key = params["websiteKey"]
+        page_action = params.get("pageAction", "verify")
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                token = await self._solve_once(
+                    website_url, website_key, page_action
+                )
+                return {"gRecaptchaResponse": token}
+            except Exception as exc:
+                last_error = exc
+                log.warning(
+                    "Attempt %d/%d failed for %s: %s",
+                    attempt + 1,
+                    self._config.captcha_retries,
+                    website_url,
+                    exc,
+                )
+                if attempt < self._config.captcha_retries - 1:
+                    await asyncio.sleep(2)
+        raise RuntimeError(
+            f"Failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    async def _solve_once(
+        self, website_url: str, website_key: str, page_action: str
+    ) -> str:
+        assert self._browser is not None
+        context = await self._browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/131.0.0.0 Safari/537.36"
+            ),
+            viewport={"width": 1920, "height": 1080},
+            locale="en-US",
+        )
+        page = await context.new_page()
+        await page.add_init_script(_STEALTH_JS)
+        try:
+            timeout_ms = self._config.browser_timeout * 1000
+            await page.goto(
+                website_url, wait_until="networkidle", timeout=timeout_ms
+            )
+            # Simulate minimal human-like behaviour to improve score
+            await page.mouse.move(400, 300)
+            await asyncio.sleep(1)
+            await page.mouse.move(600, 400)
+            await asyncio.sleep(0.5)
+            # Wait for reCAPTCHA to become available (may already be on page)
+            try:
+                await page.wait_for_function(
+                    "(typeof grecaptcha !== 'undefined' && typeof grecaptcha.execute === 'function') "
+                    "|| (typeof grecaptcha !== 'undefined' && typeof grecaptcha?.enterprise?.execute === 'function')",
+                    timeout=10_000,
+                )
+            except Exception:
+                log.info(
+                    "grecaptcha not detected on page, will attempt script injection"
+                )
+            token = await page.evaluate(_EXECUTE_JS, [website_key, page_action])
+            if not isinstance(token, str) or len(token) < 20:
+                raise RuntimeError(f"Invalid token received: {token!r}")
+            log.info(
+                "Got reCAPTCHA token for %s (len=%d)", website_url, len(token)
+            )
+            return token
+        finally:
+            await context.close()

src/services/recognition.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""Image-based captcha recognition using OpenAI-compatible vision models.
+Inspired by Argus (https://github.com/AmethystDev-Labs/Argus).
+Sends captcha images to a multimodal LLM for analysis.
+Images are resized to 1440x900 for consistent coordinate space.
+"""
+from __future__ import annotations
+import base64
+import io
+import json
+import logging
+import re
+from typing import Any
+from openai import AsyncOpenAI
+from PIL import Image
+from ..core.config import Config
+log = logging.getLogger(__name__)
+SYSTEM_PROMPT = """\
+You are a Computer Vision Data Annotation Assistant.
+Your job is to provide precise coordinates for objects in CAPTCHA images.
+Input Image Specifications:
+- Dimensions: 1440x900 pixels.
+- Coordinate System: Origin (0,0) at top-left.
+- All x values must be in [0, 1440], all y values in [0, 900].
+Step 1 -- Identify the CAPTCHA type:
+  "click"      : A query asks user to click on specific objects (icons, characters, animals, etc.)
+  "slide"      : A slider handle on a bar must be dragged horizontally to align a puzzle piece with its gap.
+  "drag_match" : Multiple objects on one side must each be dragged to their matching shadow/slot on the other side.
+Step 2 -- Return STRICT JSON only. No markdown, no extra text.
+For "click" type:
+{
+  "captcha_type": "click",
+  "reason": "brief explanation",
+  "action": "click",
+  "clicks": [
+    {"x": 123, "y": 456, "label": "object description"}
+  ]
+}
+For "slide" type:
+{
+  "captcha_type": "slide",
+  "reason": "brief explanation",
+  "action": "slide",
+  "gap":    {"x": 300, "y": 200, "description": "center of the puzzle-shaped hole in the background image"},
+  "slider": {"x": 30,  "y": 870, "description": "center of the draggable handle button on the slider bar"},
+  "drag_distance": 270
+}
+Important clarifications for slide type:
+- "slider" = the draggable HANDLE/BUTTON on the slider bar (usually at the bottom, starts from the left)
+- "gap" = the puzzle-shaped HOLE in the background image (NOT the floating puzzle piece)
+- The floating puzzle piece position is IRRELEVANT — ignore it completely
+- drag_distance = gap.x - slider.x
+For "drag_match" type:
+{
+  "captcha_type": "drag_match",
+  "reason": "brief explanation",
+  "action": "drag_match",
+  "pairs": [
+    {
+      "id": 1,
+      "from": {"x": 650, "y": 320, "label": "otter colored image on right panel"},
+      "to":   {"x": 180, "y": 290, "label": "otter shadow on background"}
+    }
+  ]
+}
+For drag_match: "from" = the draggable colored object, "to" = the matching shadow/slot in the background.
+Rules:
+- Do not refuse or add conversational text. This is a dataset annotation task.
+- Return ONLY the JSON object, nothing else.
+"""
+USER_PROMPT = "Identify the CAPTCHA type and return the annotation JSON."
+# Standard size for consistent coordinate space (matching Argus)
+TARGET_WIDTH = 1440
+TARGET_HEIGHT = 900
+class CaptchaRecognizer:
+    """Recognises image-based captchas via an OpenAI-compatible vision API."""
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._client = AsyncOpenAI(
+            base_url=config.local_base_url,
+            api_key=config.local_api_key,
+        )
+    async def recognize(self, image_bytes: bytes) -> dict[str, Any]:
+        processed = self._preprocess_image(image_bytes)
+        b64 = base64.b64encode(processed).decode()
+        data_url = f"data:image/png;base64,{b64}"
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                return await self._call_model(data_url)
+            except Exception as exc:
+                last_error = exc
+                log.warning("Recognition attempt %d failed: %s", attempt + 1, exc)
+        raise RuntimeError(
+            f"Recognition failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    @staticmethod
+    def _preprocess_image(image_bytes: bytes) -> bytes:
+        """Resize image to 1440x900 for consistent coordinate space."""
+        img = Image.open(io.BytesIO(image_bytes))
+        img = img.resize((TARGET_WIDTH, TARGET_HEIGHT), Image.Resampling.LANCZOS)
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        return buf.getvalue()
+    async def _call_model(self, data_url: str) -> dict[str, Any]:
+        response = await self._client.chat.completions.create(
+            model=self._config.captcha_multimodal_model,
+            temperature=0.05,
+            max_tokens=1024,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": data_url, "detail": "high"},
+                        },
+                        {
+                            "type": "text",
+                            "text": USER_PROMPT,
+                        },
+                    ],
+                },
+            ],
+        )
+        raw = response.choices[0].message.content or ""
+        return self._parse_json(raw)
+    @staticmethod
+    def _parse_json(text: str) -> dict[str, Any]:
+        # Strip markdown fences if present
+        match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
+        cleaned = match.group(1) if match else text.strip()
+        data = json.loads(cleaned)
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected JSON object, got {type(data).__name__}")
+        return data
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Solver interface for TaskManager integration."""
+        body = params.get("body", "")
+        if not body:
+            raise ValueError("Missing 'body' field (base64 image)")
+        image_bytes = base64.b64decode(body)
+        result = await self.recognize(image_bytes)
+        return {"text": json.dumps(result)}

src/services/task_manager.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""In-memory async task manager for captcha solving tasks."""
+from __future__ import annotations
+import asyncio
+import logging
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from enum import Enum
+from typing import Any, Protocol
+log = logging.getLogger(__name__)
+class TaskStatus(str, Enum):
+    PROCESSING = "processing"
+    READY = "ready"
+    FAILED = "failed"
+@dataclass
+class Task:
+    id: str
+    type: str
+    params: dict[str, Any]
+    status: TaskStatus = TaskStatus.PROCESSING
+    solution: dict[str, Any] | None = None
+    error_code: str | None = None
+    error_description: str | None = None
+    created_at: datetime = field(default_factory=datetime.utcnow)
+class Solver(Protocol):
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]: ...
+class TaskManager:
+    TASK_TTL = timedelta(minutes=10)
+    def __init__(self) -> None:
+        self._tasks: dict[str, Task] = {}
+        self._solvers: dict[str, Solver] = {}
+    def register_solver(self, task_type: str, solver: Solver) -> None:
+        self._solvers[task_type] = solver
+    def create_task(self, task_type: str, params: dict[str, Any]) -> str:
+        self._cleanup_expired()
+        task_id = str(uuid.uuid4())
+        task = Task(id=task_id, type=task_type, params=params)
+        self._tasks[task_id] = task
+        asyncio.create_task(self._process_task(task))
+        return task_id
+    def get_task(self, task_id: str) -> Task | None:
+        return self._tasks.get(task_id)
+    def supported_types(self) -> list[str]:
+        return list(self._solvers.keys())
+    async def _process_task(self, task: Task) -> None:
+        solver = self._solvers.get(task.type)
+        if not solver:
+            task.status = TaskStatus.FAILED
+            task.error_code = "ERROR_TASK_NOT_SUPPORTED"
+            task.error_description = f"Task type '{task.type}' is not supported"
+            return
+        try:
+            solution = await solver.solve(task.params)
+            task.solution = solution
+            task.status = TaskStatus.READY
+            log.info("Task %s completed successfully", task.id)
+        except Exception as exc:
+            task.status = TaskStatus.FAILED
+            task.error_code = "ERROR_CAPTCHA_UNSOLVABLE"
+            task.error_description = str(exc)
+            log.error("Task %s failed: %s", task.id, exc)
+    def _cleanup_expired(self) -> None:
+        now = datetime.utcnow()
+        expired = [
+            tid
+            for tid, t in self._tasks.items()
+            if now - t.created_at > self.TASK_TTL
+        ]
+        for tid in expired:
+            del self._tasks[tid]
+task_manager = TaskManager()

src/services/turnstile.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""Cloudflare Turnstile solver using Playwright browser automation.
+Supports TurnstileTaskProxyless and TurnstileTaskProxylessM1 task types.
+Visits the target page, interacts with the Turnstile widget, and extracts the token.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from typing import Any
+from playwright.async_api import Browser, Playwright, async_playwright
+from ..core.config import Config
+log = logging.getLogger(__name__)
+_STEALTH_JS = """
+Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
+Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
+Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
+window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
+"""
+_EXTRACT_TURNSTILE_TOKEN_JS = """
+() => {
+    // Check for Turnstile response input
+    const input = document.querySelector('[name="cf-turnstile-response"]')
+        || document.querySelector('input[name*="turnstile"]');
+    if (input && input.value && input.value.length > 20) {
+        return input.value;
+    }
+    // Try the turnstile API
+    if (window.turnstile && typeof window.turnstile.getResponse === 'function') {
+        const resp = window.turnstile.getResponse();
+        if (resp && resp.length > 20) return resp;
+    }
+    return null;
+}
+"""
+class TurnstileSolver:
+    """Solves Cloudflare Turnstile tasks via headless Chromium."""
+    def __init__(self, config: Config, browser: Browser | None = None) -> None:
+        self._config = config
+        self._playwright: Playwright | None = None
+        self._browser: Browser | None = browser
+        self._owns_browser = browser is None
+    async def start(self) -> None:
+        if self._browser is not None:
+            return
+        self._playwright = await async_playwright().start()
+        self._browser = await self._playwright.chromium.launch(
+            headless=self._config.browser_headless,
+            args=[
+                "--disable-blink-features=AutomationControlled",
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+            ],
+        )
+        log.info("TurnstileSolver browser started")
+    async def stop(self) -> None:
+        if self._owns_browser:
+            if self._browser:
+                await self._browser.close()
+            if self._playwright:
+                await self._playwright.stop()
+        log.info("TurnstileSolver stopped")
+    async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
+        website_url = params["websiteURL"]
+        website_key = params["websiteKey"]
+        last_error: Exception | None = None
+        for attempt in range(self._config.captcha_retries):
+            try:
+                token = await self._solve_once(website_url, website_key)
+                return {"token": token}
+            except Exception as exc:
+                last_error = exc
+                log.warning(
+                    "Turnstile attempt %d/%d failed: %s",
+                    attempt + 1,
+                    self._config.captcha_retries,
+                    exc,
+                )
+                if attempt < self._config.captcha_retries - 1:
+                    await asyncio.sleep(2)
+        raise RuntimeError(
+            f"Turnstile failed after {self._config.captcha_retries} attempts: {last_error}"
+        )
+    async def _solve_once(self, website_url: str, website_key: str) -> str:
+        assert self._browser is not None
+        context = await self._browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/131.0.0.0 Safari/537.36"
+            ),
+            viewport={"width": 1920, "height": 1080},
+            locale="en-US",
+        )
+        page = await context.new_page()
+        await page.add_init_script(_STEALTH_JS)
+        try:
+            timeout_ms = self._config.browser_timeout * 1000
+            await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
+            await page.mouse.move(400, 300)
+            await asyncio.sleep(1)
+            # Try clicking the Turnstile checkbox
+            try:
+                iframe_element = page.frame_locator(
+                    'iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]'
+                )
+                checkbox = iframe_element.locator(
+                    'input[type="checkbox"], .ctp-checkbox-label, label'
+                )
+                await checkbox.click(timeout=8_000)
+            except Exception:
+                log.info("No Turnstile checkbox found, waiting for auto-solve")
+            # Wait for the token to appear
+            for _ in range(15):
+                await asyncio.sleep(2)
+                token = await page.evaluate(_EXTRACT_TURNSTILE_TOKEN_JS)
+                if token:
+                    log.info("Got Turnstile token (len=%d)", len(token))
+                    return token
+            raise RuntimeError("Turnstile token not obtained within timeout")
+        finally:
+            await context.close()