dragg2 commited on
Commit
3a04f21
·
verified ·
1 Parent(s): d47a09c

Upload 18 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt
7
+ RUN playwright install --with-deps chromium
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 8000
12
+
13
+ CMD ["python", "main.py"]
main.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Service entrypoint compatible with Render-style deployment."""
2
+
3
+ import os
4
+
5
+ import uvicorn
6
+
7
+ from src.main import app
8
+
9
+
10
+ if __name__ == "__main__":
11
+ from src.core.config import config
12
+
13
+ port = int(os.environ.get("PORT", config.server_port))
14
+ uvicorn.run(
15
+ "src.main:app",
16
+ host=config.server_host,
17
+ port=port,
18
+ reload=False,
19
+ )
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Application package."""
src/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """API package."""
src/api/routes.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """YesCaptcha / AntiCaptcha compatible HTTP routes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from fastapi import APIRouter
8
+
9
+ from ..core.config import config
10
+ from ..models.task import (
11
+ CreateTaskRequest,
12
+ CreateTaskResponse,
13
+ GetBalanceRequest,
14
+ GetBalanceResponse,
15
+ GetTaskResultRequest,
16
+ GetTaskResultResponse,
17
+ SolutionObject,
18
+ )
19
+ from ..services.task_manager import TaskStatus, task_manager
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+ router = APIRouter()
24
+
25
+ _BROWSER_TASK_TYPES = {
26
+ "RecaptchaV3TaskProxyless",
27
+ "RecaptchaV3TaskProxylessM1",
28
+ "RecaptchaV3TaskProxylessM1S7",
29
+ "RecaptchaV3TaskProxylessM1S9",
30
+ "RecaptchaV3EnterpriseTask",
31
+ "RecaptchaV3EnterpriseTaskM1",
32
+ "NoCaptchaTaskProxyless",
33
+ "RecaptchaV2TaskProxyless",
34
+ "RecaptchaV2EnterpriseTaskProxyless",
35
+ "HCaptchaTaskProxyless",
36
+ "TurnstileTaskProxyless",
37
+ "TurnstileTaskProxylessM1",
38
+ }
39
+
40
+ _IMAGE_TASK_TYPES = {
41
+ "ImageToTextTask",
42
+ "ImageToTextTaskMuggle",
43
+ "ImageToTextTaskM1",
44
+ }
45
+
46
+ _CLASSIFICATION_TASK_TYPES = {
47
+ "HCaptchaClassification",
48
+ "ReCaptchaV2Classification",
49
+ "FunCaptchaClassification",
50
+ "AwsClassification",
51
+ }
52
+
53
+
54
+ def _check_client_key(client_key: str) -> CreateTaskResponse | None:
55
+ """Return an error response if the client key is invalid, else None."""
56
+ if config.client_key and client_key != config.client_key:
57
+ return CreateTaskResponse(
58
+ errorId=1,
59
+ errorCode="ERROR_KEY_DOES_NOT_EXIST",
60
+ errorDescription="Invalid clientKey",
61
+ )
62
+ return None
63
+
64
+
65
+ @router.post("/createTask", response_model=CreateTaskResponse)
66
+ async def create_task(request: CreateTaskRequest) -> CreateTaskResponse:
67
+ err = _check_client_key(request.clientKey)
68
+ if err:
69
+ return err
70
+
71
+ supported = task_manager.supported_types()
72
+ if request.task.type not in supported:
73
+ return CreateTaskResponse(
74
+ errorId=1,
75
+ errorCode="ERROR_TASK_NOT_SUPPORTED",
76
+ errorDescription=f"Task type '{request.task.type}' is not supported. "
77
+ f"Supported: {supported}",
78
+ )
79
+
80
+ # Validate required fields for browser-based tasks
81
+ if request.task.type in _BROWSER_TASK_TYPES:
82
+ if not request.task.websiteURL or not request.task.websiteKey:
83
+ return CreateTaskResponse(
84
+ errorId=1,
85
+ errorCode="ERROR_TASK_PROPERTY_EMPTY",
86
+ errorDescription="websiteURL and websiteKey are required",
87
+ )
88
+
89
+ # Validate required fields for ImageToText tasks
90
+ if request.task.type in _IMAGE_TASK_TYPES:
91
+ if not request.task.body:
92
+ return CreateTaskResponse(
93
+ errorId=1,
94
+ errorCode="ERROR_TASK_PROPERTY_EMPTY",
95
+ errorDescription="body (base64 image) is required",
96
+ )
97
+
98
+ # Validate required fields for classification tasks
99
+ if request.task.type in _CLASSIFICATION_TASK_TYPES:
100
+ has_image = (
101
+ request.task.image
102
+ or request.task.images
103
+ or request.task.body
104
+ or request.task.queries
105
+ )
106
+ if not has_image:
107
+ return CreateTaskResponse(
108
+ errorId=1,
109
+ errorCode="ERROR_TASK_PROPERTY_EMPTY",
110
+ errorDescription="image data is required for classification tasks",
111
+ )
112
+
113
+ params = request.task.model_dump(exclude_none=True)
114
+ task_id = task_manager.create_task(request.task.type, params)
115
+
116
+ log.info("Created task %s (type=%s)", task_id, request.task.type)
117
+ return CreateTaskResponse(errorId=0, taskId=task_id)
118
+
119
+
120
+ @router.post("/getTaskResult", response_model=GetTaskResultResponse)
121
+ async def get_task_result(
122
+ request: GetTaskResultRequest,
123
+ ) -> GetTaskResultResponse:
124
+ if config.client_key and request.clientKey != config.client_key:
125
+ return GetTaskResultResponse(
126
+ errorId=1,
127
+ errorCode="ERROR_KEY_DOES_NOT_EXIST",
128
+ errorDescription="Invalid clientKey",
129
+ )
130
+
131
+ task = task_manager.get_task(request.taskId)
132
+ if task is None:
133
+ return GetTaskResultResponse(
134
+ errorId=1,
135
+ errorCode="ERROR_NO_SUCH_CAPCHA_ID",
136
+ errorDescription="Task not found",
137
+ )
138
+
139
+ if task.status == TaskStatus.PROCESSING:
140
+ return GetTaskResultResponse(errorId=0, status="processing")
141
+
142
+ if task.status == TaskStatus.READY:
143
+ return GetTaskResultResponse(
144
+ errorId=0,
145
+ status="ready",
146
+ solution=SolutionObject(**(task.solution or {})),
147
+ )
148
+
149
+ return GetTaskResultResponse(
150
+ errorId=1,
151
+ errorCode=task.error_code or "ERROR_CAPTCHA_UNSOLVABLE",
152
+ errorDescription=task.error_description,
153
+ )
154
+
155
+
156
+ @router.post("/getBalance", response_model=GetBalanceResponse)
157
+ async def get_balance(request: GetBalanceRequest) -> GetBalanceResponse:
158
+ if config.client_key and request.clientKey != config.client_key:
159
+ return GetBalanceResponse(errorId=1, balance=0)
160
+ return GetBalanceResponse(errorId=0, balance=99999.0)
161
+
162
+
163
+ @router.get("/api/v1/health")
164
+ async def health() -> dict[str, object]:
165
+ return {
166
+ "status": "ok",
167
+ "supported_task_types": task_manager.supported_types(),
168
+ "browser_headless": config.browser_headless,
169
+ "cloud_model": config.cloud_model,
170
+ "local_model": config.local_model,
171
+ }
src/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Core application helpers."""
src/core/config.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Environment-driven application configuration.
2
+
3
+ Two model backends are supported:
4
+
5
+ Cloud model — a remote OpenAI-compatible API (e.g. gpt-5.4 via a hosted
6
+ endpoint). Used as the powerful multimodal backbone for
7
+ tasks like audio transcription.
8
+
9
+ Local model — a self-hosted model served via SGLang, vLLM, or any
10
+ OpenAI-compatible server (e.g. Qwen3.5-2B on localhost).
11
+ Used for high-throughput image recognition / classification.
12
+
13
+ Both backends expose ``/v1/chat/completions``; the only difference is the
14
+ base URL, API key, and model name.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ from dataclasses import dataclass
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class Config:
25
+ server_host: str
26
+ server_port: int
27
+
28
+ # Auth: YesCaptcha clientKey
29
+ client_key: str | None
30
+
31
+ # ── Cloud model (remote API) ──
32
+ cloud_base_url: str
33
+ cloud_api_key: str
34
+ cloud_model: str
35
+
36
+ # ── Local model (self-hosted via SGLang / vLLM) ──
37
+ local_base_url: str
38
+ local_api_key: str
39
+ local_model: str
40
+
41
+ captcha_retries: int
42
+ captcha_timeout: int
43
+
44
+ # Playwright browser
45
+ browser_headless: bool
46
+ browser_timeout: int # seconds
47
+
48
+ # ── Convenience aliases (backward-compat) ──
49
+
50
+ @property
51
+ def captcha_base_url(self) -> str:
52
+ return self.cloud_base_url
53
+
54
+ @property
55
+ def captcha_api_key(self) -> str:
56
+ return self.cloud_api_key
57
+
58
+ @property
59
+ def captcha_model(self) -> str:
60
+ return self.cloud_model
61
+
62
+ @property
63
+ def captcha_multimodal_model(self) -> str:
64
+ return self.local_model
65
+
66
+
67
+ def load_config() -> Config:
68
+ return Config(
69
+ server_host=os.environ.get("SERVER_HOST", "0.0.0.0"),
70
+ server_port=int(os.environ.get("SERVER_PORT", "8000")),
71
+ client_key=os.environ.get("CLIENT_KEY", "").strip() or None,
72
+ # Cloud model
73
+ cloud_base_url=os.environ.get(
74
+ "CLOUD_BASE_URL",
75
+ os.environ.get("CAPTCHA_BASE_URL", "https://your-openai-compatible-endpoint/v1"),
76
+ ),
77
+ cloud_api_key=os.environ.get(
78
+ "CLOUD_API_KEY",
79
+ os.environ.get("CAPTCHA_API_KEY", ""),
80
+ ),
81
+ cloud_model=os.environ.get(
82
+ "CLOUD_MODEL",
83
+ os.environ.get("CAPTCHA_MODEL", "gpt-5.4"),
84
+ ),
85
+ # Local model
86
+ local_base_url=os.environ.get(
87
+ "LOCAL_BASE_URL",
88
+ os.environ.get("CAPTCHA_BASE_URL", "http://localhost:30000/v1"),
89
+ ),
90
+ local_api_key=os.environ.get(
91
+ "LOCAL_API_KEY",
92
+ os.environ.get("CAPTCHA_API_KEY", "EMPTY"),
93
+ ),
94
+ local_model=os.environ.get(
95
+ "LOCAL_MODEL",
96
+ os.environ.get("CAPTCHA_MULTIMODAL_MODEL", "Qwen/Qwen3.5-2B"),
97
+ ),
98
+ captcha_retries=int(os.environ.get("CAPTCHA_RETRIES", "3")),
99
+ captcha_timeout=int(os.environ.get("CAPTCHA_TIMEOUT", "30")),
100
+ browser_headless=os.environ.get("BROWSER_HEADLESS", "true").strip().lower()
101
+ in {"1", "true", "yes"},
102
+ browser_timeout=int(os.environ.get("BROWSER_TIMEOUT", "30")),
103
+ )
104
+
105
+
106
+ config = load_config()
src/main.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI application with Playwright lifecycle management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from contextlib import asynccontextmanager
7
+ from typing import AsyncIterator
8
+
9
+ from fastapi import FastAPI
10
+
11
+ from .api.routes import router
12
+ from .core.config import config
13
+ from .services.classification import ClassificationSolver
14
+ from .services.hcaptcha import HCaptchaSolver
15
+ from .services.recognition import CaptchaRecognizer
16
+ from .services.recaptcha_v2 import RecaptchaV2Solver
17
+ from .services.recaptcha_v3 import RecaptchaV3Solver
18
+ from .services.task_manager import task_manager
19
+ from .services.turnstile import TurnstileSolver
20
+
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
24
+ )
25
+ log = logging.getLogger(__name__)
26
+
27
+ _RECAPTCHA_V3_TYPES = [
28
+ "RecaptchaV3TaskProxyless",
29
+ "RecaptchaV3TaskProxylessM1",
30
+ "RecaptchaV3TaskProxylessM1S7",
31
+ "RecaptchaV3TaskProxylessM1S9",
32
+ "RecaptchaV3EnterpriseTask",
33
+ "RecaptchaV3EnterpriseTaskM1",
34
+ ]
35
+
36
+ _RECAPTCHA_V2_TYPES = [
37
+ "NoCaptchaTaskProxyless",
38
+ "RecaptchaV2TaskProxyless",
39
+ "RecaptchaV2EnterpriseTaskProxyless",
40
+ ]
41
+
42
+ _HCAPTCHA_TYPES = [
43
+ "HCaptchaTaskProxyless",
44
+ ]
45
+
46
+ _TURNSTILE_TYPES = [
47
+ "TurnstileTaskProxyless",
48
+ "TurnstileTaskProxylessM1",
49
+ ]
50
+
51
+ _CLASSIFICATION_TYPES = [
52
+ "HCaptchaClassification",
53
+ "ReCaptchaV2Classification",
54
+ "FunCaptchaClassification",
55
+ "AwsClassification",
56
+ ]
57
+
58
+ _IMAGE_TEXT_TYPES = [
59
+ "ImageToTextTask",
60
+ "ImageToTextTaskMuggle",
61
+ "ImageToTextTaskM1",
62
+ ]
63
+
64
+
65
+ @asynccontextmanager
66
+ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
67
+ # ── startup ──
68
+ v3_solver = RecaptchaV3Solver(config)
69
+ await v3_solver.start()
70
+ for task_type in _RECAPTCHA_V3_TYPES:
71
+ task_manager.register_solver(task_type, v3_solver)
72
+ log.info("Registered reCAPTCHA v3 solver for types: %s", _RECAPTCHA_V3_TYPES)
73
+
74
+ v2_solver = RecaptchaV2Solver(config)
75
+ await v2_solver.start()
76
+ for task_type in _RECAPTCHA_V2_TYPES:
77
+ task_manager.register_solver(task_type, v2_solver)
78
+ log.info("Registered reCAPTCHA v2 solver for types: %s", _RECAPTCHA_V2_TYPES)
79
+
80
+ hcaptcha_solver = HCaptchaSolver(config)
81
+ await hcaptcha_solver.start()
82
+ for task_type in _HCAPTCHA_TYPES:
83
+ task_manager.register_solver(task_type, hcaptcha_solver)
84
+ log.info("Registered hCaptcha solver for types: %s", _HCAPTCHA_TYPES)
85
+
86
+ turnstile_solver = TurnstileSolver(config)
87
+ await turnstile_solver.start()
88
+ for task_type in _TURNSTILE_TYPES:
89
+ task_manager.register_solver(task_type, turnstile_solver)
90
+ log.info("Registered Turnstile solver for types: %s", _TURNSTILE_TYPES)
91
+
92
+ recognizer = CaptchaRecognizer(config)
93
+ for task_type in _IMAGE_TEXT_TYPES:
94
+ task_manager.register_solver(task_type, recognizer)
95
+ log.info("Registered image captcha recognizer for types: %s", _IMAGE_TEXT_TYPES)
96
+
97
+ classifier = ClassificationSolver(config)
98
+ for task_type in _CLASSIFICATION_TYPES:
99
+ task_manager.register_solver(task_type, classifier)
100
+ log.info("Registered classification solver for types: %s", _CLASSIFICATION_TYPES)
101
+
102
+ yield
103
+ # ── shutdown ──
104
+ await v3_solver.stop()
105
+ await v2_solver.stop()
106
+ await hcaptcha_solver.stop()
107
+ await turnstile_solver.stop()
108
+
109
+
110
+ app = FastAPI(
111
+ title="Captcha Solver Service",
112
+ version="3.0.0",
113
+ description="YesCaptcha-compatible captcha solving service for flow2api.",
114
+ lifespan=lifespan,
115
+ )
116
+
117
+ app.include_router(router)
118
+
119
+
120
+ @app.get("/")
121
+ async def root() -> dict[str, object]:
122
+ return {
123
+ "service": "captcha-solver",
124
+ "version": "3.0.0",
125
+ "endpoints": {
126
+ "createTask": "/createTask",
127
+ "getTaskResult": "/getTaskResult",
128
+ "getBalance": "/getBalance",
129
+ "health": "/api/v1/health",
130
+ },
131
+ "supported_task_types": task_manager.supported_types(),
132
+ }
src/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Pydantic API models."""
src/models/task.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """YesCaptcha / AntiCaptcha compatible API models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ # ── createTask ──────────────────────────────────────────────
9
+
10
+ class TaskObject(BaseModel):
11
+ type: str
12
+ websiteURL: str | None = None
13
+ websiteKey: str | None = None
14
+ pageAction: str | None = None
15
+ minScore: float | None = None
16
+ isInvisible: bool | None = None
17
+ # Image captcha / classification fields
18
+ body: str | None = None
19
+ image: str | None = None
20
+ images: list[str] | None = None
21
+ question: str | None = None
22
+ queries: list[str] | str | None = None
23
+ project_name: str | None = None
24
+
25
+
26
+ class CreateTaskRequest(BaseModel):
27
+ clientKey: str
28
+ task: TaskObject
29
+
30
+
31
+ class CreateTaskResponse(BaseModel):
32
+ errorId: int = 0
33
+ taskId: str | None = None
34
+ errorCode: str | None = None
35
+ errorDescription: str | None = None
36
+
37
+
38
+ # ── getTaskResult ───────────────────────────────────────────
39
+
40
+ class GetTaskResultRequest(BaseModel):
41
+ clientKey: str
42
+ taskId: str
43
+
44
+
45
+ class SolutionObject(BaseModel):
46
+ gRecaptchaResponse: str | None = None
47
+ text: str | None = None
48
+ token: str | None = None
49
+ objects: list[int] | None = None
50
+ answer: bool | list[int] | None = None
51
+ userAgent: str | None = None
52
+
53
+
54
+ class GetTaskResultResponse(BaseModel):
55
+ errorId: int = 0
56
+ status: str | None = None
57
+ solution: SolutionObject | None = None
58
+ errorCode: str | None = None
59
+ errorDescription: str | None = None
60
+
61
+
62
+ # ── getBalance ──────────────────────────────────────────────
63
+
64
+ class GetBalanceRequest(BaseModel):
65
+ clientKey: str
66
+
67
+
68
+ class GetBalanceResponse(BaseModel):
69
+ errorId: int = 0
70
+ balance: float = 99999.0
src/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Service layer package."""
src/services/classification.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image classification solvers for various captcha types.
2
+
3
+ Supports HCaptchaClassification, ReCaptchaV2Classification,
4
+ FunCaptchaClassification, and AwsClassification task types.
5
+
6
+ All classification tasks send images + question text to an OpenAI-compatible
7
+ vision model for analysis and return structured coordinate/index results.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import base64
13
+ import io
14
+ import json
15
+ import logging
16
+ import re
17
+ from typing import Any
18
+
19
+ from openai import AsyncOpenAI
20
+ from PIL import Image
21
+
22
+ from ..core.config import Config
23
+
24
+ log = logging.getLogger(__name__)
25
+
26
+ HCAPTCHA_SYSTEM_PROMPT = """\
27
+ You are an image classification assistant for HCaptcha challenges.
28
+ Given a question and one or more base64-encoded images, determine which images match the question.
29
+
30
+ Return STRICT JSON only. No markdown, no extra text.
31
+
32
+ For single-image questions (is this image X?):
33
+ {"answer": true} or {"answer": false}
34
+
35
+ For multi-image grid questions (select all images containing X):
36
+ {"answer": [0, 2, 5]}
37
+ where numbers are 0-indexed positions of matching images.
38
+
39
+ Rules:
40
+ - Return ONLY the JSON object, nothing else.
41
+ - Be precise with your classification.
42
+ """
43
+
44
+ RECAPTCHA_V2_SYSTEM_PROMPT = """\
45
+ You are an image classification assistant for reCAPTCHA v2 challenges.
46
+ Given a question and a grid image (3x3 or 4x4), identify which cells match the question.
47
+
48
+ The image cells are numbered 0-8 (3x3) or 0-15 (4x4), left-to-right, top-to-bottom.
49
+
50
+ Return STRICT JSON only:
51
+ {"objects": [0, 3, 6]}
52
+ where numbers are 0-indexed positions of matching cells.
53
+
54
+ Rules:
55
+ - Return ONLY the JSON object, nothing else.
56
+ - If no cells match, return {"objects": []}.
57
+ """
58
+
59
+ FUNCAPTCHA_SYSTEM_PROMPT = """\
60
+ You are an image classification assistant for FunCaptcha challenges.
61
+ Given a question and a grid image (typically 2x3 = 6 cells), identify which cell
62
+ is the correct answer.
63
+
64
+ Cells are numbered 0-5, left-to-right, top-to-bottom.
65
+
66
+ Return STRICT JSON only:
67
+ {"objects": [3]}
68
+ where the number is the 0-indexed position of the correct cell.
69
+
70
+ Rules:
71
+ - Return ONLY the JSON object, nothing else.
72
+ - Usually only one cell is correct.
73
+ """
74
+
75
+ AWS_SYSTEM_PROMPT = """\
76
+ You are an image classification assistant for AWS CAPTCHA challenges.
77
+ Given a question and one or more images, identify the correct answer.
78
+
79
+ Return STRICT JSON only:
80
+ {"objects": [1]}
81
+ where the number is the 0-indexed position of the matching image.
82
+
83
+ Rules:
84
+ - Return ONLY the JSON object, nothing else.
85
+ """
86
+
87
+
88
+ class ClassificationSolver:
89
+ """Solves image classification captchas using a vision model."""
90
+
91
+ def __init__(self, config: Config) -> None:
92
+ self._config = config
93
+ self._client = AsyncOpenAI(
94
+ base_url=config.local_base_url,
95
+ api_key=config.local_api_key,
96
+ )
97
+
98
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
99
+ task_type = params.get("type", "")
100
+ system_prompt = self._get_system_prompt(task_type)
101
+ question = params.get("question", "") or params.get("queries", "")
102
+
103
+ # Handle different image field names across task types
104
+ images = self._extract_images(params)
105
+ if not images:
106
+ raise ValueError("No image data provided")
107
+
108
+ result = await self._classify(system_prompt, question, images)
109
+ return result
110
+
111
+ @staticmethod
112
+ def _get_system_prompt(task_type: str) -> str:
113
+ prompts = {
114
+ "HCaptchaClassification": HCAPTCHA_SYSTEM_PROMPT,
115
+ "ReCaptchaV2Classification": RECAPTCHA_V2_SYSTEM_PROMPT,
116
+ "FunCaptchaClassification": FUNCAPTCHA_SYSTEM_PROMPT,
117
+ "AwsClassification": AWS_SYSTEM_PROMPT,
118
+ }
119
+ return prompts.get(task_type, RECAPTCHA_V2_SYSTEM_PROMPT)
120
+
121
+ @staticmethod
122
+ def _extract_images(params: dict[str, Any]) -> list[str]:
123
+ """Extract base64 image(s) from various param formats."""
124
+ images: list[str] = []
125
+
126
+ if "image" in params:
127
+ images.append(params["image"])
128
+
129
+ if "images" in params:
130
+ imgs = params["images"]
131
+ if isinstance(imgs, list):
132
+ images.extend(imgs)
133
+ elif isinstance(imgs, str):
134
+ images.append(imgs)
135
+
136
+ if "body" in params and not images:
137
+ images.append(params["body"])
138
+
139
+ # HCaptcha queries format: list of base64 strings
140
+ if "queries" in params and isinstance(params["queries"], list):
141
+ images.extend(params["queries"])
142
+
143
+ return images
144
+
145
+ @staticmethod
146
+ def _prepare_image(b64_data: str) -> str:
147
+ """Ensure image is properly formatted as a data URL."""
148
+ if b64_data.startswith("data:image"):
149
+ return b64_data
150
+ try:
151
+ img_bytes = base64.b64decode(b64_data)
152
+ img = Image.open(io.BytesIO(img_bytes))
153
+ fmt = img.format or "PNG"
154
+ mime = f"image/{fmt.lower()}"
155
+ return f"data:{mime};base64,{b64_data}"
156
+ except Exception:
157
+ return f"data:image/png;base64,{b64_data}"
158
+
159
+ async def _classify(
160
+ self, system_prompt: str, question: str, images: list[str]
161
+ ) -> dict[str, Any]:
162
+ content: list[dict[str, Any]] = []
163
+
164
+ for img_b64 in images:
165
+ data_url = self._prepare_image(img_b64)
166
+ content.append({
167
+ "type": "image_url",
168
+ "image_url": {"url": data_url, "detail": "high"},
169
+ })
170
+
171
+ user_text = question if question else "Classify this captcha image."
172
+ content.append({"type": "text", "text": user_text})
173
+
174
+ last_error: Exception | None = None
175
+ for attempt in range(self._config.captcha_retries):
176
+ try:
177
+ response = await self._client.chat.completions.create(
178
+ model=self._config.captcha_multimodal_model,
179
+ temperature=0.05,
180
+ max_tokens=512,
181
+ messages=[
182
+ {"role": "system", "content": system_prompt},
183
+ {"role": "user", "content": content},
184
+ ],
185
+ )
186
+ raw = response.choices[0].message.content or ""
187
+ return self._parse_json(raw)
188
+ except Exception as exc:
189
+ last_error = exc
190
+ log.warning("Classification attempt %d failed: %s", attempt + 1, exc)
191
+
192
+ raise RuntimeError(
193
+ f"Classification failed after {self._config.captcha_retries} attempts: {last_error}"
194
+ )
195
+
196
+ @staticmethod
197
+ def _parse_json(text: str) -> dict[str, Any]:
198
+ match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
199
+ cleaned = match.group(1) if match else text.strip()
200
+ data = json.loads(cleaned)
201
+ if not isinstance(data, dict):
202
+ raise ValueError(f"Expected JSON object, got {type(data).__name__}")
203
+ return data
src/services/hcaptcha.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HCaptcha solver using Playwright browser automation.
2
+
3
+ Supports HCaptchaTaskProxyless task type.
4
+ Visits the target page, interacts with the hCaptcha widget, and extracts the response token.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ from typing import Any
12
+
13
+ from playwright.async_api import Browser, Playwright, async_playwright
14
+
15
+ from ..core.config import Config
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+ _STEALTH_JS = """
20
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
21
+ Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
22
+ Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
23
+ window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
24
+ """
25
+
26
+ _EXTRACT_HCAPTCHA_TOKEN_JS = """
27
+ () => {
28
+ const textarea = document.querySelector('[name="h-captcha-response"]')
29
+ || document.querySelector('[name="g-recaptcha-response"]');
30
+ if (textarea && textarea.value && textarea.value.length > 20) {
31
+ return textarea.value;
32
+ }
33
+ if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') {
34
+ const resp = window.hcaptcha.getResponse();
35
+ if (resp && resp.length > 20) return resp;
36
+ }
37
+ return null;
38
+ }
39
+ """
40
+
41
+
42
+ class HCaptchaSolver:
43
+ """Solves HCaptchaTaskProxyless tasks via headless Chromium."""
44
+
45
+ def __init__(self, config: Config, browser: Browser | None = None) -> None:
46
+ self._config = config
47
+ self._playwright: Playwright | None = None
48
+ self._browser: Browser | None = browser
49
+ self._owns_browser = browser is None
50
+
51
+ async def start(self) -> None:
52
+ if self._browser is not None:
53
+ return
54
+ self._playwright = await async_playwright().start()
55
+ self._browser = await self._playwright.chromium.launch(
56
+ headless=self._config.browser_headless,
57
+ args=[
58
+ "--disable-blink-features=AutomationControlled",
59
+ "--no-sandbox",
60
+ "--disable-dev-shm-usage",
61
+ "--disable-gpu",
62
+ ],
63
+ )
64
+ log.info("HCaptchaSolver browser started")
65
+
66
+ async def stop(self) -> None:
67
+ if self._owns_browser:
68
+ if self._browser:
69
+ await self._browser.close()
70
+ if self._playwright:
71
+ await self._playwright.stop()
72
+ log.info("HCaptchaSolver stopped")
73
+
74
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
75
+ website_url = params["websiteURL"]
76
+ website_key = params["websiteKey"]
77
+
78
+ last_error: Exception | None = None
79
+ for attempt in range(self._config.captcha_retries):
80
+ try:
81
+ token = await self._solve_once(website_url, website_key)
82
+ return {"gRecaptchaResponse": token}
83
+ except Exception as exc:
84
+ last_error = exc
85
+ log.warning(
86
+ "HCaptcha attempt %d/%d failed: %s",
87
+ attempt + 1,
88
+ self._config.captcha_retries,
89
+ exc,
90
+ )
91
+ if attempt < self._config.captcha_retries - 1:
92
+ await asyncio.sleep(2)
93
+
94
+ raise RuntimeError(
95
+ f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}"
96
+ )
97
+
98
+ async def _solve_once(self, website_url: str, website_key: str) -> str:
99
+ assert self._browser is not None
100
+
101
+ context = await self._browser.new_context(
102
+ user_agent=(
103
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
104
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
105
+ "Chrome/131.0.0.0 Safari/537.36"
106
+ ),
107
+ viewport={"width": 1920, "height": 1080},
108
+ locale="en-US",
109
+ )
110
+ page = await context.new_page()
111
+ await page.add_init_script(_STEALTH_JS)
112
+
113
+ try:
114
+ timeout_ms = self._config.browser_timeout * 1000
115
+ await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
116
+
117
+ await page.mouse.move(400, 300)
118
+ await asyncio.sleep(1)
119
+
120
+ # Click only the checkbox iframe — match by specific title to avoid the challenge iframe
121
+ iframe_element = page.frame_locator(
122
+ 'iframe[title="Widget containing checkbox for hCaptcha security challenge"]'
123
+ )
124
+ checkbox = iframe_element.locator("#checkbox")
125
+ await checkbox.click(timeout=10_000)
126
+
127
+ # Wait for token — may require challenge completion; poll up to 30s
128
+ for _ in range(6):
129
+ await asyncio.sleep(5)
130
+ token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS)
131
+ if isinstance(token, str) and len(token) > 20:
132
+ break
133
+ else:
134
+ token = None
135
+
136
+ if not isinstance(token, str) or len(token) < 20:
137
+ raise RuntimeError(f"Invalid hCaptcha token: {token!r}")
138
+
139
+ log.info("Got hCaptcha token (len=%d)", len(token))
140
+ return token
141
+ finally:
142
+ await context.close()
src/services/recaptcha_v2.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """reCAPTCHA v2 solver using Playwright browser automation.
2
+
3
+ Supports NoCaptchaTaskProxyless, RecaptchaV2TaskProxyless,
4
+ and RecaptchaV2EnterpriseTaskProxyless task types.
5
+
6
+ Strategy:
7
+ 1. Visit the target page with a realistic browser context.
8
+ 2. Click the reCAPTCHA checkbox.
9
+ 3. If the challenge dialog appears (bot detected), switch to the audio
10
+ challenge, download the audio file, transcribe it via the configured
11
+ speech-to-text model, and submit the text.
12
+ 4. Extract the gRecaptchaResponse token.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import logging
19
+ from typing import Any
20
+
21
+ import httpx
22
+ from playwright.async_api import Browser, Playwright, async_playwright
23
+
24
+ from ..core.config import Config
25
+
26
+ log = logging.getLogger(__name__)
27
+
28
+ _STEALTH_JS = """
29
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
30
+ Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
31
+ Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
32
+ window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
33
+ """
34
+
35
+ _EXTRACT_TOKEN_JS = """
36
+ () => {
37
+ const textarea = document.querySelector('#g-recaptcha-response')
38
+ || document.querySelector('[name="g-recaptcha-response"]');
39
+ if (textarea && textarea.value && textarea.value.length > 20) {
40
+ return textarea.value;
41
+ }
42
+ const gr = window.grecaptcha?.enterprise || window.grecaptcha;
43
+ if (gr && typeof gr.getResponse === 'function') {
44
+ const resp = gr.getResponse();
45
+ if (resp && resp.length > 20) return resp;
46
+ }
47
+ return null;
48
+ }
49
+ """
50
+
51
+
52
+ class RecaptchaV2Solver:
53
+ """Solves reCAPTCHA v2 tasks via headless Chromium with checkbox clicking.
54
+
55
+ Falls back to the audio challenge path when Google presents a visual
56
+ challenge to the headless browser.
57
+ """
58
+
59
+ def __init__(self, config: Config, browser: Browser | None = None) -> None:
60
+ self._config = config
61
+ self._playwright: Playwright | None = None
62
+ self._browser: Browser | None = browser
63
+ self._owns_browser = browser is None
64
+
65
+ async def start(self) -> None:
66
+ if self._browser is not None:
67
+ return
68
+ self._playwright = await async_playwright().start()
69
+ self._browser = await self._playwright.chromium.launch(
70
+ headless=self._config.browser_headless,
71
+ args=[
72
+ "--disable-blink-features=AutomationControlled",
73
+ "--no-sandbox",
74
+ "--disable-dev-shm-usage",
75
+ "--disable-gpu",
76
+ ],
77
+ )
78
+ log.info("RecaptchaV2Solver browser started")
79
+
80
+ async def stop(self) -> None:
81
+ if self._owns_browser:
82
+ if self._browser:
83
+ await self._browser.close()
84
+ if self._playwright:
85
+ await self._playwright.stop()
86
+ log.info("RecaptchaV2Solver stopped")
87
+
88
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
89
+ website_url = params["websiteURL"]
90
+ website_key = params["websiteKey"]
91
+ is_invisible = params.get("isInvisible", False)
92
+
93
+ last_error: Exception | None = None
94
+ for attempt in range(self._config.captcha_retries):
95
+ try:
96
+ token = await self._solve_once(website_url, website_key, is_invisible)
97
+ return {"gRecaptchaResponse": token}
98
+ except Exception as exc:
99
+ last_error = exc
100
+ log.warning(
101
+ "reCAPTCHA v2 attempt %d/%d failed: %s",
102
+ attempt + 1,
103
+ self._config.captcha_retries,
104
+ exc,
105
+ )
106
+ if attempt < self._config.captcha_retries - 1:
107
+ await asyncio.sleep(2)
108
+
109
+ raise RuntimeError(
110
+ f"reCAPTCHA v2 failed after {self._config.captcha_retries} attempts: {last_error}"
111
+ )
112
+
113
+ async def _solve_once(
114
+ self, website_url: str, website_key: str, is_invisible: bool
115
+ ) -> str:
116
+ assert self._browser is not None
117
+
118
+ context = await self._browser.new_context(
119
+ user_agent=(
120
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
121
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
122
+ "Chrome/131.0.0.0 Safari/537.36"
123
+ ),
124
+ viewport={"width": 1920, "height": 1080},
125
+ locale="en-US",
126
+ )
127
+ page = await context.new_page()
128
+ await page.add_init_script(_STEALTH_JS)
129
+
130
+ try:
131
+ timeout_ms = self._config.browser_timeout * 1000
132
+ await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
133
+ await page.mouse.move(400, 300)
134
+ await asyncio.sleep(0.5)
135
+
136
+ if is_invisible:
137
+ token = await page.evaluate(
138
+ """
139
+ ([key]) => new Promise((resolve, reject) => {
140
+ const gr = window.grecaptcha?.enterprise || window.grecaptcha;
141
+ if (!gr) { reject(new Error('grecaptcha not found')); return; }
142
+ gr.ready(() => {
143
+ gr.execute(key).then(resolve).catch(reject);
144
+ });
145
+ })
146
+ """,
147
+ [website_key],
148
+ )
149
+ else:
150
+ token = await self._solve_checkbox(page)
151
+
152
+ if not isinstance(token, str) or len(token) < 20:
153
+ raise RuntimeError(f"Invalid reCAPTCHA v2 token: {token!r}")
154
+
155
+ log.info("Got reCAPTCHA v2 token (len=%d)", len(token))
156
+ return token
157
+ finally:
158
+ await context.close()
159
+
160
+ async def _solve_checkbox(self, page: Any) -> str | None:
161
+ """Click the reCAPTCHA checkbox. If a visual challenge appears, try audio path."""
162
+ # The checkbox iframe always has title="reCAPTCHA"
163
+ checkbox_frame = page.frame_locator('iframe[title="reCAPTCHA"]').first
164
+ checkbox = checkbox_frame.locator("#recaptcha-anchor")
165
+ await checkbox.click(timeout=10_000)
166
+ await asyncio.sleep(2)
167
+
168
+ # Check if token was issued immediately (low-risk sessions)
169
+ token = await page.evaluate(_EXTRACT_TOKEN_JS)
170
+ if isinstance(token, str) and len(token) > 20:
171
+ return token
172
+
173
+ # Challenge dialog appeared — try audio challenge path
174
+ log.info("reCAPTCHA challenge detected, attempting audio path")
175
+ try:
176
+ token = await self._solve_audio_challenge(page)
177
+ except Exception as exc:
178
+ log.warning("Audio challenge path failed: %s", exc)
179
+ token = None
180
+
181
+ return token
182
+
183
+ async def _solve_audio_challenge(self, page: Any) -> str | None:
184
+ """Click the audio button in the bframe and transcribe the audio."""
185
+ # The challenge bframe has title containing "recaptcha challenge"
186
+ bframe = page.frame_locator('iframe[title*="recaptcha challenge"]')
187
+
188
+ # Click the audio challenge button
189
+ audio_btn = bframe.locator("#recaptcha-audio-button")
190
+ await audio_btn.click(timeout=8_000)
191
+
192
+ # Wait for the audio challenge iframe to load its content
193
+ await asyncio.sleep(3)
194
+
195
+ # After clicking audio, a new bframe is rendered with the audio player
196
+ bframe = page.frame_locator('iframe[title*="recaptcha challenge"]')
197
+
198
+ # Get the audio source URL — try multiple selectors
199
+ audio_src = None
200
+ for selector in [
201
+ ".rc-audiochallenge-tdownload-link",
202
+ "a[href*='.mp3']",
203
+ "audio source",
204
+ ]:
205
+ try:
206
+ element = bframe.locator(selector).first
207
+ audio_src = await element.get_attribute("href", timeout=5_000) or await element.get_attribute("src", timeout=1_000)
208
+ if audio_src:
209
+ break
210
+ except Exception:
211
+ continue
212
+
213
+ if not audio_src:
214
+ raise RuntimeError("Could not find audio challenge download link")
215
+
216
+ # Download the audio file
217
+ async with httpx.AsyncClient(timeout=30) as client:
218
+ resp = await client.get(audio_src)
219
+ resp.raise_for_status()
220
+ audio_bytes = resp.content
221
+
222
+ # Transcribe via the vision/language model (base64 audio → text)
223
+ transcript = await self._transcribe_audio(audio_bytes)
224
+ log.info("Audio transcribed: %r", transcript[:40] if transcript else None)
225
+
226
+ if not transcript:
227
+ raise RuntimeError("Audio transcription returned empty result")
228
+
229
+ # Submit the transcript
230
+ audio_input = bframe.locator("#audio-response")
231
+ await audio_input.fill(transcript.strip().lower())
232
+ verify_btn = bframe.locator("#recaptcha-verify-button")
233
+ await verify_btn.click(timeout=8_000)
234
+ await asyncio.sleep(2)
235
+
236
+ return await page.evaluate(_EXTRACT_TOKEN_JS)
237
+
238
+ async def _transcribe_audio(self, audio_bytes: bytes) -> str | None:
239
+ """Send audio bytes to the OpenAI-compatible audio transcription endpoint."""
240
+ import base64
241
+
242
+ audio_b64 = base64.b64encode(audio_bytes).decode()
243
+ payload = {
244
+ "model": self._config.captcha_model,
245
+ "messages": [
246
+ {
247
+ "role": "user",
248
+ "content": [
249
+ {
250
+ "type": "text",
251
+ "text": (
252
+ "This is a reCAPTCHA audio challenge. "
253
+ "The audio contains spoken digits or words. "
254
+ "Transcribe exactly what is spoken, digits only, "
255
+ "separated by spaces. Reply with only the transcription."
256
+ ),
257
+ },
258
+ {
259
+ "type": "image_url",
260
+ "image_url": {"url": f"data:audio/mp3;base64,{audio_b64}"},
261
+ },
262
+ ],
263
+ }
264
+ ],
265
+ "max_tokens": 50,
266
+ "temperature": 0,
267
+ }
268
+
269
+ async with httpx.AsyncClient(timeout=30) as client:
270
+ resp = await client.post(
271
+ f"{self._config.captcha_base_url}/chat/completions",
272
+ headers={"Authorization": f"Bearer {self._config.captcha_api_key}"},
273
+ json=payload,
274
+ )
275
+ if resp.status_code != 200:
276
+ raise RuntimeError(f"Transcription API error {resp.status_code}: {resp.text[:200]}")
277
+ data = resp.json()
278
+ return data["choices"][0]["message"]["content"].strip()
src/services/recaptcha_v3.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """reCAPTCHA v3 solver using Playwright browser automation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import Any
8
+
9
+ from playwright.async_api import Browser, Playwright, async_playwright
10
+
11
+ from ..core.config import Config
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+ # JS executed inside the browser to obtain a reCAPTCHA v3 token.
16
+ # Handles both standard and enterprise reCAPTCHA libraries.
17
+ _EXECUTE_JS = """
18
+ ([key, action]) => new Promise((resolve, reject) => {
19
+ const gr = window.grecaptcha?.enterprise || window.grecaptcha;
20
+ if (gr && typeof gr.execute === 'function') {
21
+ gr.ready(() => {
22
+ gr.execute(key, {action}).then(resolve).catch(reject);
23
+ });
24
+ return;
25
+ }
26
+ // grecaptcha not loaded yet — inject the script ourselves
27
+ const script = document.createElement('script');
28
+ script.src = 'https://www.google.com/recaptcha/api.js?render=' + key;
29
+ script.onerror = () => reject(new Error('Failed to load reCAPTCHA script'));
30
+ script.onload = () => {
31
+ const g = window.grecaptcha;
32
+ if (!g) { reject(new Error('grecaptcha still undefined after script load')); return; }
33
+ g.ready(() => {
34
+ g.execute(key, {action}).then(resolve).catch(reject);
35
+ });
36
+ };
37
+ document.head.appendChild(script);
38
+ })
39
+ """
40
+
41
+ # Basic anti-detection init script
42
+ _STEALTH_JS = """
43
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
44
+ Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
45
+ Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
46
+ window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
47
+ """
48
+
49
+
50
+ class RecaptchaV3Solver:
51
+ """Solves RecaptchaV3TaskProxyless tasks via headless Chromium."""
52
+
53
+ def __init__(self, config: Config) -> None:
54
+ self._config = config
55
+ self._playwright: Playwright | None = None
56
+ self._browser: Browser | None = None
57
+
58
+ async def start(self) -> None:
59
+ self._playwright = await async_playwright().start()
60
+ self._browser = await self._playwright.chromium.launch(
61
+ headless=self._config.browser_headless,
62
+ args=[
63
+ "--disable-blink-features=AutomationControlled",
64
+ "--no-sandbox",
65
+ "--disable-dev-shm-usage",
66
+ "--disable-gpu",
67
+ ],
68
+ )
69
+ log.info(
70
+ "Playwright browser started (headless=%s)", self._config.browser_headless
71
+ )
72
+
73
+ async def stop(self) -> None:
74
+ if self._browser:
75
+ await self._browser.close()
76
+ if self._playwright:
77
+ await self._playwright.stop()
78
+ log.info("Playwright browser stopped")
79
+
80
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
81
+ website_url = params["websiteURL"]
82
+ website_key = params["websiteKey"]
83
+ page_action = params.get("pageAction", "verify")
84
+
85
+ last_error: Exception | None = None
86
+ for attempt in range(self._config.captcha_retries):
87
+ try:
88
+ token = await self._solve_once(
89
+ website_url, website_key, page_action
90
+ )
91
+ return {"gRecaptchaResponse": token}
92
+ except Exception as exc:
93
+ last_error = exc
94
+ log.warning(
95
+ "Attempt %d/%d failed for %s: %s",
96
+ attempt + 1,
97
+ self._config.captcha_retries,
98
+ website_url,
99
+ exc,
100
+ )
101
+ if attempt < self._config.captcha_retries - 1:
102
+ await asyncio.sleep(2)
103
+
104
+ raise RuntimeError(
105
+ f"Failed after {self._config.captcha_retries} attempts: {last_error}"
106
+ )
107
+
108
+ async def _solve_once(
109
+ self, website_url: str, website_key: str, page_action: str
110
+ ) -> str:
111
+ assert self._browser is not None
112
+
113
+ context = await self._browser.new_context(
114
+ user_agent=(
115
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
116
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
117
+ "Chrome/131.0.0.0 Safari/537.36"
118
+ ),
119
+ viewport={"width": 1920, "height": 1080},
120
+ locale="en-US",
121
+ )
122
+
123
+ page = await context.new_page()
124
+ await page.add_init_script(_STEALTH_JS)
125
+
126
+ try:
127
+ timeout_ms = self._config.browser_timeout * 1000
128
+ await page.goto(
129
+ website_url, wait_until="networkidle", timeout=timeout_ms
130
+ )
131
+
132
+ # Simulate minimal human-like behaviour to improve score
133
+ await page.mouse.move(400, 300)
134
+ await asyncio.sleep(1)
135
+ await page.mouse.move(600, 400)
136
+ await asyncio.sleep(0.5)
137
+
138
+ # Wait for reCAPTCHA to become available (may already be on page)
139
+ try:
140
+ await page.wait_for_function(
141
+ "(typeof grecaptcha !== 'undefined' && typeof grecaptcha.execute === 'function') "
142
+ "|| (typeof grecaptcha !== 'undefined' && typeof grecaptcha?.enterprise?.execute === 'function')",
143
+ timeout=10_000,
144
+ )
145
+ except Exception:
146
+ log.info(
147
+ "grecaptcha not detected on page, will attempt script injection"
148
+ )
149
+
150
+ token = await page.evaluate(_EXECUTE_JS, [website_key, page_action])
151
+
152
+ if not isinstance(token, str) or len(token) < 20:
153
+ raise RuntimeError(f"Invalid token received: {token!r}")
154
+
155
+ log.info(
156
+ "Got reCAPTCHA token for %s (len=%d)", website_url, len(token)
157
+ )
158
+ return token
159
+ finally:
160
+ await context.close()
src/services/recognition.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image-based captcha recognition using OpenAI-compatible vision models.
2
+
3
+ Inspired by Argus (https://github.com/AmethystDev-Labs/Argus).
4
+ Sends captcha images to a multimodal LLM for analysis.
5
+ Images are resized to 1440x900 for consistent coordinate space.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import base64
11
+ import io
12
+ import json
13
+ import logging
14
+ import re
15
+ from typing import Any
16
+
17
+ from openai import AsyncOpenAI
18
+ from PIL import Image
19
+
20
+ from ..core.config import Config
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+ SYSTEM_PROMPT = """\
25
+ You are a Computer Vision Data Annotation Assistant.
26
+ Your job is to provide precise coordinates for objects in CAPTCHA images.
27
+
28
+ Input Image Specifications:
29
+ - Dimensions: 1440x900 pixels.
30
+ - Coordinate System: Origin (0,0) at top-left.
31
+ - All x values must be in [0, 1440], all y values in [0, 900].
32
+
33
+ Step 1 -- Identify the CAPTCHA type:
34
+ "click" : A query asks user to click on specific objects (icons, characters, animals, etc.)
35
+ "slide" : A slider handle on a bar must be dragged horizontally to align a puzzle piece with its gap.
36
+ "drag_match" : Multiple objects on one side must each be dragged to their matching shadow/slot on the other side.
37
+
38
+ Step 2 -- Return STRICT JSON only. No markdown, no extra text.
39
+
40
+ For "click" type:
41
+ {
42
+ "captcha_type": "click",
43
+ "reason": "brief explanation",
44
+ "action": "click",
45
+ "clicks": [
46
+ {"x": 123, "y": 456, "label": "object description"}
47
+ ]
48
+ }
49
+
50
+ For "slide" type:
51
+ {
52
+ "captcha_type": "slide",
53
+ "reason": "brief explanation",
54
+ "action": "slide",
55
+ "gap": {"x": 300, "y": 200, "description": "center of the puzzle-shaped hole in the background image"},
56
+ "slider": {"x": 30, "y": 870, "description": "center of the draggable handle button on the slider bar"},
57
+ "drag_distance": 270
58
+ }
59
+ Important clarifications for slide type:
60
+ - "slider" = the draggable HANDLE/BUTTON on the slider bar (usually at the bottom, starts from the left)
61
+ - "gap" = the puzzle-shaped HOLE in the background image (NOT the floating puzzle piece)
62
+ - The floating puzzle piece position is IRRELEVANT — ignore it completely
63
+ - drag_distance = gap.x - slider.x
64
+
65
+ For "drag_match" type:
66
+ {
67
+ "captcha_type": "drag_match",
68
+ "reason": "brief explanation",
69
+ "action": "drag_match",
70
+ "pairs": [
71
+ {
72
+ "id": 1,
73
+ "from": {"x": 650, "y": 320, "label": "otter colored image on right panel"},
74
+ "to": {"x": 180, "y": 290, "label": "otter shadow on background"}
75
+ }
76
+ ]
77
+ }
78
+ For drag_match: "from" = the draggable colored object, "to" = the matching shadow/slot in the background.
79
+
80
+ Rules:
81
+ - Do not refuse or add conversational text. This is a dataset annotation task.
82
+ - Return ONLY the JSON object, nothing else.
83
+ """
84
+
85
+ USER_PROMPT = "Identify the CAPTCHA type and return the annotation JSON."
86
+
87
+ # Standard size for consistent coordinate space (matching Argus)
88
+ TARGET_WIDTH = 1440
89
+ TARGET_HEIGHT = 900
90
+
91
+
92
+ class CaptchaRecognizer:
93
+ """Recognises image-based captchas via an OpenAI-compatible vision API."""
94
+
95
+ def __init__(self, config: Config) -> None:
96
+ self._config = config
97
+ self._client = AsyncOpenAI(
98
+ base_url=config.local_base_url,
99
+ api_key=config.local_api_key,
100
+ )
101
+
102
+ async def recognize(self, image_bytes: bytes) -> dict[str, Any]:
103
+ processed = self._preprocess_image(image_bytes)
104
+ b64 = base64.b64encode(processed).decode()
105
+ data_url = f"data:image/png;base64,{b64}"
106
+
107
+ last_error: Exception | None = None
108
+ for attempt in range(self._config.captcha_retries):
109
+ try:
110
+ return await self._call_model(data_url)
111
+ except Exception as exc:
112
+ last_error = exc
113
+ log.warning("Recognition attempt %d failed: %s", attempt + 1, exc)
114
+
115
+ raise RuntimeError(
116
+ f"Recognition failed after {self._config.captcha_retries} attempts: {last_error}"
117
+ )
118
+
119
+ @staticmethod
120
+ def _preprocess_image(image_bytes: bytes) -> bytes:
121
+ """Resize image to 1440x900 for consistent coordinate space."""
122
+ img = Image.open(io.BytesIO(image_bytes))
123
+ img = img.resize((TARGET_WIDTH, TARGET_HEIGHT), Image.Resampling.LANCZOS)
124
+ buf = io.BytesIO()
125
+ img.save(buf, format="PNG")
126
+ return buf.getvalue()
127
+
128
+ async def _call_model(self, data_url: str) -> dict[str, Any]:
129
+ response = await self._client.chat.completions.create(
130
+ model=self._config.captcha_multimodal_model,
131
+ temperature=0.05,
132
+ max_tokens=1024,
133
+ messages=[
134
+ {"role": "system", "content": SYSTEM_PROMPT},
135
+ {
136
+ "role": "user",
137
+ "content": [
138
+ {
139
+ "type": "image_url",
140
+ "image_url": {"url": data_url, "detail": "high"},
141
+ },
142
+ {
143
+ "type": "text",
144
+ "text": USER_PROMPT,
145
+ },
146
+ ],
147
+ },
148
+ ],
149
+ )
150
+
151
+ raw = response.choices[0].message.content or ""
152
+ return self._parse_json(raw)
153
+
154
+ @staticmethod
155
+ def _parse_json(text: str) -> dict[str, Any]:
156
+ # Strip markdown fences if present
157
+ match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
158
+ cleaned = match.group(1) if match else text.strip()
159
+ data = json.loads(cleaned)
160
+ if not isinstance(data, dict):
161
+ raise ValueError(f"Expected JSON object, got {type(data).__name__}")
162
+ return data
163
+
164
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
165
+ """Solver interface for TaskManager integration."""
166
+ body = params.get("body", "")
167
+ if not body:
168
+ raise ValueError("Missing 'body' field (base64 image)")
169
+ image_bytes = base64.b64decode(body)
170
+ result = await self.recognize(image_bytes)
171
+ return {"text": json.dumps(result)}
src/services/task_manager.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """In-memory async task manager for captcha solving tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import uuid
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime, timedelta
10
+ from enum import Enum
11
+ from typing import Any, Protocol
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ class TaskStatus(str, Enum):
17
+ PROCESSING = "processing"
18
+ READY = "ready"
19
+ FAILED = "failed"
20
+
21
+
22
+ @dataclass
23
+ class Task:
24
+ id: str
25
+ type: str
26
+ params: dict[str, Any]
27
+ status: TaskStatus = TaskStatus.PROCESSING
28
+ solution: dict[str, Any] | None = None
29
+ error_code: str | None = None
30
+ error_description: str | None = None
31
+ created_at: datetime = field(default_factory=datetime.utcnow)
32
+
33
+
34
+ class Solver(Protocol):
35
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]: ...
36
+
37
+
38
+ class TaskManager:
39
+ TASK_TTL = timedelta(minutes=10)
40
+
41
+ def __init__(self) -> None:
42
+ self._tasks: dict[str, Task] = {}
43
+ self._solvers: dict[str, Solver] = {}
44
+
45
+ def register_solver(self, task_type: str, solver: Solver) -> None:
46
+ self._solvers[task_type] = solver
47
+
48
+ def create_task(self, task_type: str, params: dict[str, Any]) -> str:
49
+ self._cleanup_expired()
50
+ task_id = str(uuid.uuid4())
51
+ task = Task(id=task_id, type=task_type, params=params)
52
+ self._tasks[task_id] = task
53
+ asyncio.create_task(self._process_task(task))
54
+ return task_id
55
+
56
+ def get_task(self, task_id: str) -> Task | None:
57
+ return self._tasks.get(task_id)
58
+
59
+ def supported_types(self) -> list[str]:
60
+ return list(self._solvers.keys())
61
+
62
+ async def _process_task(self, task: Task) -> None:
63
+ solver = self._solvers.get(task.type)
64
+ if not solver:
65
+ task.status = TaskStatus.FAILED
66
+ task.error_code = "ERROR_TASK_NOT_SUPPORTED"
67
+ task.error_description = f"Task type '{task.type}' is not supported"
68
+ return
69
+
70
+ try:
71
+ solution = await solver.solve(task.params)
72
+ task.solution = solution
73
+ task.status = TaskStatus.READY
74
+ log.info("Task %s completed successfully", task.id)
75
+ except Exception as exc:
76
+ task.status = TaskStatus.FAILED
77
+ task.error_code = "ERROR_CAPTCHA_UNSOLVABLE"
78
+ task.error_description = str(exc)
79
+ log.error("Task %s failed: %s", task.id, exc)
80
+
81
+ def _cleanup_expired(self) -> None:
82
+ now = datetime.utcnow()
83
+ expired = [
84
+ tid
85
+ for tid, t in self._tasks.items()
86
+ if now - t.created_at > self.TASK_TTL
87
+ ]
88
+ for tid in expired:
89
+ del self._tasks[tid]
90
+
91
+
92
+ task_manager = TaskManager()
src/services/turnstile.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cloudflare Turnstile solver using Playwright browser automation.
2
+
3
+ Supports TurnstileTaskProxyless and TurnstileTaskProxylessM1 task types.
4
+ Visits the target page, interacts with the Turnstile widget, and extracts the token.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ from typing import Any
12
+
13
+ from playwright.async_api import Browser, Playwright, async_playwright
14
+
15
+ from ..core.config import Config
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+ _STEALTH_JS = """
20
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
21
+ Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']});
22
+ Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
23
+ window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}};
24
+ """
25
+
26
+ _EXTRACT_TURNSTILE_TOKEN_JS = """
27
+ () => {
28
+ // Check for Turnstile response input
29
+ const input = document.querySelector('[name="cf-turnstile-response"]')
30
+ || document.querySelector('input[name*="turnstile"]');
31
+ if (input && input.value && input.value.length > 20) {
32
+ return input.value;
33
+ }
34
+ // Try the turnstile API
35
+ if (window.turnstile && typeof window.turnstile.getResponse === 'function') {
36
+ const resp = window.turnstile.getResponse();
37
+ if (resp && resp.length > 20) return resp;
38
+ }
39
+ return null;
40
+ }
41
+ """
42
+
43
+
44
+ class TurnstileSolver:
45
+ """Solves Cloudflare Turnstile tasks via headless Chromium."""
46
+
47
+ def __init__(self, config: Config, browser: Browser | None = None) -> None:
48
+ self._config = config
49
+ self._playwright: Playwright | None = None
50
+ self._browser: Browser | None = browser
51
+ self._owns_browser = browser is None
52
+
53
+ async def start(self) -> None:
54
+ if self._browser is not None:
55
+ return
56
+ self._playwright = await async_playwright().start()
57
+ self._browser = await self._playwright.chromium.launch(
58
+ headless=self._config.browser_headless,
59
+ args=[
60
+ "--disable-blink-features=AutomationControlled",
61
+ "--no-sandbox",
62
+ "--disable-dev-shm-usage",
63
+ "--disable-gpu",
64
+ ],
65
+ )
66
+ log.info("TurnstileSolver browser started")
67
+
68
+ async def stop(self) -> None:
69
+ if self._owns_browser:
70
+ if self._browser:
71
+ await self._browser.close()
72
+ if self._playwright:
73
+ await self._playwright.stop()
74
+ log.info("TurnstileSolver stopped")
75
+
76
+ async def solve(self, params: dict[str, Any]) -> dict[str, Any]:
77
+ website_url = params["websiteURL"]
78
+ website_key = params["websiteKey"]
79
+
80
+ last_error: Exception | None = None
81
+ for attempt in range(self._config.captcha_retries):
82
+ try:
83
+ token = await self._solve_once(website_url, website_key)
84
+ return {"token": token}
85
+ except Exception as exc:
86
+ last_error = exc
87
+ log.warning(
88
+ "Turnstile attempt %d/%d failed: %s",
89
+ attempt + 1,
90
+ self._config.captcha_retries,
91
+ exc,
92
+ )
93
+ if attempt < self._config.captcha_retries - 1:
94
+ await asyncio.sleep(2)
95
+
96
+ raise RuntimeError(
97
+ f"Turnstile failed after {self._config.captcha_retries} attempts: {last_error}"
98
+ )
99
+
100
+ async def _solve_once(self, website_url: str, website_key: str) -> str:
101
+ assert self._browser is not None
102
+
103
+ context = await self._browser.new_context(
104
+ user_agent=(
105
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
106
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
107
+ "Chrome/131.0.0.0 Safari/537.36"
108
+ ),
109
+ viewport={"width": 1920, "height": 1080},
110
+ locale="en-US",
111
+ )
112
+ page = await context.new_page()
113
+ await page.add_init_script(_STEALTH_JS)
114
+
115
+ try:
116
+ timeout_ms = self._config.browser_timeout * 1000
117
+ await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms)
118
+
119
+ await page.mouse.move(400, 300)
120
+ await asyncio.sleep(1)
121
+
122
+ # Try clicking the Turnstile checkbox
123
+ try:
124
+ iframe_element = page.frame_locator(
125
+ 'iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]'
126
+ )
127
+ checkbox = iframe_element.locator(
128
+ 'input[type="checkbox"], .ctp-checkbox-label, label'
129
+ )
130
+ await checkbox.click(timeout=8_000)
131
+ except Exception:
132
+ log.info("No Turnstile checkbox found, waiting for auto-solve")
133
+
134
+ # Wait for the token to appear
135
+ for _ in range(15):
136
+ await asyncio.sleep(2)
137
+ token = await page.evaluate(_EXTRACT_TURNSTILE_TOKEN_JS)
138
+ if token:
139
+ log.info("Got Turnstile token (len=%d)", len(token))
140
+ return token
141
+
142
+ raise RuntimeError("Turnstile token not obtained within timeout")
143
+ finally:
144
+ await context.close()