diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..2b0971d1ea239e025c312a8f6cd653d0d225cf65 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +docs/assets/ohmycaptcha-diagram.png filter=lfs diff=lfs merge=lfs -text +docs/assets/ohmycaptcha-hero.png filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000000000000000000000000000000000..726b5cbfb2fead0ac617a0176b6309a7dea48379 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,51 @@ +name: Deploy Docs + +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install docs dependencies + run: | + python -m pip install --upgrade pip + pip install mkdocs mkdocs-material mkdocs-redirects mkdocs-static-i18n pymdown-extensions + + - name: Build docs + run: mkdocs build --strict + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: site + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..16444913ec4dd428b5358246bd9b67d372cab19c --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.venv/ +__pycache__/ +.pytest_cache/ +site/ +*.pyc +cache/ +CLAUDE.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..d29a2bb44d852c6977129aaedc91425fb29f1fc4 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,67 @@ +# Contributing + +Thanks for contributing to OhMyCaptcha. + +## Development setup + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +playwright install --with-deps chromium +``` + +## Run the project locally + +```bash +python main.py +``` + +## Validate changes + +Run tests: + +```bash +pytest tests/ +``` + +Run type checks: + +```bash +npx pyright +``` + +Build docs: + +```bash +mkdocs build --strict +``` + +## Contribution guidelines + +- Keep changes aligned with the implemented task types and documented behavior. +- Do not add secret values, personal endpoints, or account-specific configuration to the repository. +- Prefer small, reviewable pull requests. +- Update docs when behavior changes. +- Keep examples copy-pasteable and placeholder-based. +- Avoid overstating compatibility or production guarantees. + +## Pull requests + +A good pull request usually includes: + +- a concise summary of the change +- why the change is needed +- tests or validation notes +- documentation updates if relevant + +## Documentation style + +This repository aims for documentation that is: + +- clear +- practical +- implementation-aware +- safe for public distribution + +If you add deployment examples, use placeholders instead of real secrets or private URLs. diff --git a/DISCLAIMER.md b/DISCLAIMER.md new file mode 100644 index 0000000000000000000000000000000000000000..c68c5da7644d3ca78939d4fbdc358bc90e60fd60 --- /dev/null +++ b/DISCLAIMER.md @@ -0,0 +1,31 @@ +# Disclaimer + +**OhMyCaptcha is intended for legitimate research, security testing, and educational purposes only.** + +## Intended use + +This software is designed to help developers: + +- Test and validate CAPTCHA integration in their own applications +- Research automated browser behavior and bot detection mechanisms +- Learn about multimodal AI applied to image recognition tasks +- Build compatible tooling for services that use YesCaptcha-style APIs under their own authorization + +## Prohibited use + +You **must not** use OhMyCaptcha to: + +- Bypass CAPTCHAs on websites or services **without explicit written permission** from the site or service owner +- Conduct unauthorized automated access to third-party systems +- Circumvent security controls intended to prevent abuse or fraud +- Violate any website's Terms of Service + +## Legal responsibility + +- Unauthorized automated access to third-party services may violate their Terms of Service and could be illegal under applicable laws, including but not limited to the Computer Fraud and Abuse Act (CFAA), the EU General Data Protection Regulation (GDPR), or equivalent legislation in your jurisdiction. +- **The authors and contributors of OhMyCaptcha accept no liability** for any misuse, damages, legal consequences, or losses arising from the use or misuse of this software. +- By deploying or using this software, **you accept full responsibility** for ensuring that your usage complies with all applicable laws, regulations, and third-party terms of service. + +## No warranty + +This software is provided **"as is"**, without warranty of any kind. See [LICENSE](LICENSE) for the full MIT license terms. diff --git a/DISCLAIMER.zh-CN.md b/DISCLAIMER.zh-CN.md new file mode 100644 index 0000000000000000000000000000000000000000..1b4d22523f4aeeaaadd83a0d59146f7aaa10e913 --- /dev/null +++ b/DISCLAIMER.zh-CN.md @@ -0,0 +1,31 @@ +# 免责声明 + +**OhMyCaptcha 仅供合法的安全研究、渗透测试和技术学习使用。** + +## 合法用途 + +本软件旨在帮助开发者: + +- 在自己的应用中测试和验证 CAPTCHA 集成 +- 研究自动化浏览器行为和机器人检测机制 +- 学习多模态 AI 在图像识别任务中的应用 +- 在获得授权的前提下,为使用 YesCaptcha 风格 API 的服务构建兼容工具 + +## 禁止用途 + +你**不得**将 OhMyCaptcha 用于: + +- 在**未获得网站或服务所有者明确书面授权**的情况下,绕过其 CAPTCHA +- 对第三方系统进行未经授权的自动化访问 +- 规避旨在防止滥用或欺诈的安全控制措施 +- 违反任何网站的服务条款 + +## 法律责任 + +- 未经授权地对第三方服务进行自动化访问,可能违反其服务条款,并可能在相关法律管辖区(包括但不限于《计算机欺诈与滥用法》、欧盟《通用数据保护条例》(GDPR)或当地等效法规)下构成违法行为。 +- **OhMyCaptcha 的作者和贡献者不承担任何因使用或滥用本软件而导致的损失、法律后果或责任。** +- 通过部署或使用本软件,**你完全同意并承担**确保其使用方式符合所有适用法律法规及第三方服务条款的责任。 + +## 无担保声明 + +本软件按**"原样"**提供,不附带任何形式的担保。完整 MIT 许可证条款请参阅 [LICENSE](LICENSE)。 diff --git a/Dockerfile.render b/Dockerfile.render new file mode 100644 index 0000000000000000000000000000000000000000..97a7ab8ce46b98bd44a0dc749a4a496cd12760eb --- /dev/null +++ b/Dockerfile.render @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir --root-user-action=ignore -r requirements.txt +RUN playwright install --with-deps chromium + +COPY . . + +EXPOSE 8000 + +CMD ["python", "main.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2e7d37617453bbf37faf5a1ee0496c3383989041 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 shenhao-stu + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 66d44aa13dc05cc46d27f7f66637d9d00456738b..38b3f5e8b396db6e8385491c40e8f472cd0a20da 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,304 @@ +

+ OhMyCaptcha +
+ Version + License + Task Types + Runtime + Deploy + Docs +

+ +

🧩 OhMyCaptcha

+ +

+ Self-hostable YesCaptcha-style captcha solver for flow2api and similar integrations +
+ 19 task types · reCAPTCHA v2/v3 · hCaptcha · Cloudflare Turnstile · Image Classification +

+ +

+ Quick Start • + Architecture • + Task Types • + Deployment • + Development +

+ +

+ 中文说明 • + Documentation • + Render Guide • + Hugging Face Guide +

+ +

+ OhMyCaptcha +

+ +--- + +## ✨ What Is This? + +**OhMyCaptcha** is a self-hosted captcha-solving service exposing a **YesCaptcha-style async API** with **19 supported task types**. Designed as a third-party captcha solver for **flow2api** and systems that expect `createTask` / `getTaskResult` semantics. + +| Capability | Details | +|-----------|---------| +| **Browser automation** | Playwright + Chromium for reCAPTCHA v2/v3, hCaptcha, Cloudflare Turnstile | +| **Image recognition** | Local multimodal model (Qwen3.5-2B via SGLang) for image captcha analysis | +| **Image classification** | Local vision model for HCaptcha, reCAPTCHA v2, FunCaptcha, AWS grid classification | +| **API compatibility** | Full YesCaptcha `createTask`/`getTaskResult`/`getBalance` protocol | +| **Deployment** | Local, Render, Hugging Face Spaces with Docker support | + --- -title: Flow -emoji: 🦀 -colorFrom: gray -colorTo: gray -sdk: docker -pinned: false -license: mit + +## 📦 Quick Start + +```bash +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +playwright install --with-deps chromium + +# Local model (self-hosted via SGLang) +export LOCAL_BASE_URL="http://localhost:30000/v1" +export LOCAL_MODEL="Qwen/Qwen3.5-2B" + +# Cloud model (remote API) +export CLOUD_BASE_URL="https://your-openai-compatible-endpoint/v1" +export CLOUD_API_KEY="your-api-key" +export CLOUD_MODEL="gpt-5.4" + +export CLIENT_KEY="your-client-key" +python main.py +``` + +Verify with: + +```bash +curl http://localhost:8000/api/v1/health +``` + --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +## 🏗 Architecture + +

+ OhMyCaptcha architecture +

+ +**Core components:** + +- **FastAPI** — HTTP API with YesCaptcha protocol +- **TaskManager** — async in-memory task queue with 10-min TTL +- **RecaptchaV3Solver** — Playwright-based reCAPTCHA v3/Enterprise token generation +- **RecaptchaV2Solver** — Playwright-based reCAPTCHA v2 checkbox solving +- **HCaptchaSolver** — Playwright-based hCaptcha solving +- **TurnstileSolver** — Playwright-based Cloudflare Turnstile solving +- **CaptchaRecognizer** — Argus-inspired multimodal image analysis +- **ClassificationSolver** — Vision model-based image classification + +--- + +## 🧠 Task Types + +### Browser-based solving (12 types) + +| Category | Task Types | Solution Field | +|----------|-----------|----------------| +| reCAPTCHA v3 | `RecaptchaV3TaskProxyless`, `RecaptchaV3TaskProxylessM1`, `RecaptchaV3TaskProxylessM1S7`, `RecaptchaV3TaskProxylessM1S9` | `gRecaptchaResponse` | +| reCAPTCHA v3 Enterprise | `RecaptchaV3EnterpriseTask`, `RecaptchaV3EnterpriseTaskM1` | `gRecaptchaResponse` | +| reCAPTCHA v2 | `NoCaptchaTaskProxyless`, `RecaptchaV2TaskProxyless`, `RecaptchaV2EnterpriseTaskProxyless` | `gRecaptchaResponse` | +| hCaptcha | `HCaptchaTaskProxyless` | `gRecaptchaResponse` | +| Cloudflare Turnstile | `TurnstileTaskProxyless`, `TurnstileTaskProxylessM1` | `token` | + +### Image recognition (3 types) + +| Task Type | Solution Field | +|-----------|----------------| +| `ImageToTextTask` | `text` (structured JSON) | +| `ImageToTextTaskMuggle` | `text` | +| `ImageToTextTaskM1` | `text` | + +### Image classification (4 types) + +| Task Type | Solution Field | +|-----------|----------------| +| `HCaptchaClassification` | `objects` / `answer` | +| `ReCaptchaV2Classification` | `objects` | +| `FunCaptchaClassification` | `objects` | +| `AwsClassification` | `objects` | + +--- + +## 🔌 API Surface + +| Endpoint | Purpose | +|----------|---------| +| `POST /createTask` | Create an async captcha task | +| `POST /getTaskResult` | Poll task execution result | +| `POST /getBalance` | Return compatibility balance | +| `GET /api/v1/health` | Health and service status | + +### Example: reCAPTCHA v3 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +### Example: hCaptcha + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaTaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "hcaptcha-site-key" + } + }' +``` + +### Example: Cloudflare Turnstile + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "turnstile-site-key" + } + }' +``` + +### Example: Image classification + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "ReCaptchaV2Classification", + "image": "", + "question": "Select all images with traffic lights" + } + }' +``` + +### Poll result + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{"clientKey": "your-client-key", "taskId": "uuid-from-createTask"}' +``` + +--- + +## ⚙️ Configuration + +### Model backends + +OhMyCaptcha uses two model backends — a **local model** for image tasks and a **cloud model** for complex reasoning: + +| Variable | Description | Default | +|----------|-------------|---------| +| `LOCAL_BASE_URL` | Local inference server (SGLang/vLLM) | `http://localhost:30000/v1` | +| `LOCAL_API_KEY` | Local server API key | `EMPTY` | +| `LOCAL_MODEL` | Local model name | `Qwen/Qwen3.5-2B` | +| `CLOUD_BASE_URL` | Cloud API base URL | External endpoint | +| `CLOUD_API_KEY` | Cloud API key | unset | +| `CLOUD_MODEL` | Cloud model name | `gpt-5.4` | + +### General + +| Variable | Description | Default | +|----------|-------------|---------| +| `CLIENT_KEY` | Client authentication key | unset | +| `CAPTCHA_RETRIES` | Retry count | `3` | +| `CAPTCHA_TIMEOUT` | Model timeout (seconds) | `30` | +| `BROWSER_HEADLESS` | Headless Chromium | `true` | +| `BROWSER_TIMEOUT` | Page load timeout (seconds) | `30` | +| `SERVER_HOST` | Bind host | `0.0.0.0` | +| `SERVER_PORT` | Bind port | `8000` | + +> Legacy vars (`CAPTCHA_BASE_URL`, `CAPTCHA_API_KEY`, `CAPTCHA_MODEL`, `CAPTCHA_MULTIMODAL_MODEL`) are supported as fallbacks. + +--- + +## 🚀 Deployment + +- [Local model (SGLang)](https://shenhao-stu.github.io/ohmycaptcha/deployment/local-model/) — deploy Qwen3.5-2B locally +- [Render deployment](https://shenhao-stu.github.io/ohmycaptcha/deployment/render/) +- [Hugging Face Spaces deployment](https://shenhao-stu.github.io/ohmycaptcha/deployment/huggingface/) +- [Full documentation](https://shenhao-stu.github.io/ohmycaptcha/) + +--- + +## ✅ Test Target + +This service is validated against the public reCAPTCHA v3 score detector: + +- URL: `https://antcpt.com/score_detector/` +- Site key: `6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +--- + +## ⚠️ Limitations + +- Tasks are stored **in memory** with a 10-minute TTL +- `minScore` is accepted for compatibility but not enforced +- Browser-based solving depends on environment, IP reputation, and target-site behavior +- Image classification quality depends on the vision model used +- Not all commercial captcha-service features are replicated + +--- + +## 📢 Disclaimer + +> **This project is intended for legitimate research, security testing, and educational purposes only.** + +- OhMyCaptcha is a self-hostable tool. You are solely responsible for how you deploy and use it. +- CAPTCHA systems exist to protect services from abuse. **Do not use this tool to bypass CAPTCHAs on websites or services without explicit permission from the site owner.** +- Unauthorized automated access to third-party services may violate their Terms of Service, and may be illegal under applicable laws (e.g., the Computer Fraud and Abuse Act, GDPR, or equivalent legislation in your jurisdiction). +- The authors and contributors of this project **accept no liability** for any misuse, legal consequences, or damages arising from the use of this software. +- By using this software, you agree that you are solely responsible for ensuring your usage complies with all relevant laws and terms of service. + +--- + +## 🔧 Development + +```bash +pytest tests/ +npx pyright +python -m mkdocs build --strict +``` + +--- + +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=shenhao-stu/ohmycaptcha&type=Date)](https://www.star-history.com/#shenhao-stu/ohmycaptcha&Date) + +--- + +## 📄 License + +[MIT](LICENSE) — use freely, modify openly, deploy carefully. + +See [DISCLAIMER.md](DISCLAIMER.md) for full terms of use and liability limitations. diff --git a/README.zh-CN.md b/README.zh-CN.md new file mode 100644 index 0000000000000000000000000000000000000000..aaa7c03e08b4fe9f08583fcade6bda6619d4907e --- /dev/null +++ b/README.zh-CN.md @@ -0,0 +1,304 @@ +

+ OhMyCaptcha +
+ Version + License + Task Types + Runtime + Deploy + Docs +

+ +

🧩 OhMyCaptcha

+ +

+ 面向 flow2api 与类似集成场景的可自托管 YesCaptcha 风格验证码服务 +
+ 19 种任务类型 · reCAPTCHA v2/v3 · hCaptcha · Cloudflare Turnstile · 图像分类 +

+ +

+ 快速开始 • + 架构 • + 任务类型 • + 部署 • + 开发 +

+ +

+ English README • + 在线文档 • + Render 部署指南 • + Hugging Face Spaces 指南 +

+ +

+ OhMyCaptcha +

+ +--- + +## ✨ 这是什么? + +**OhMyCaptcha** 是一个可直接部署的自托管验证码解决服务,提供 **YesCaptcha 风格异步 API**,支持 **19 种任务类型**。作为第三方打码工具,专为 **flow2api** 及依赖 `createTask` / `getTaskResult` 语义的系统设计。 + +| 能力 | 详情 | +|------|------| +| **浏览器自动化** | Playwright + Chromium 实现 reCAPTCHA v2/v3、hCaptcha、Cloudflare Turnstile 自动求解 | +| **图片识别** | 本地多模态模型(通过 SGLang 部署 Qwen3.5-2B)进行图片验证码分析 | +| **图像分类** | 本地视觉模型进行 HCaptcha、reCAPTCHA v2、FunCaptcha、AWS 网格分类 | +| **API 兼容** | 完整的 YesCaptcha `createTask`/`getTaskResult`/`getBalance` 协议 | +| **部署方式** | 支持本地、Render、Hugging Face Spaces 的 Docker 部署 | + +--- + +## 📦 快速开始 + +```bash +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +playwright install --with-deps chromium + +# 本地模型(通过 SGLang 自托管部署) +export LOCAL_BASE_URL="http://localhost:30000/v1" +export LOCAL_MODEL="Qwen/Qwen3.5-2B" + +# 云端模型(远程 API) +export CLOUD_BASE_URL="https://your-openai-compatible-endpoint/v1" +export CLOUD_API_KEY="your-api-key" +export CLOUD_MODEL="gpt-5.4" + +export CLIENT_KEY="your-client-key" +python main.py +``` + +验证服务: + +```bash +curl http://localhost:8000/api/v1/health +``` + +--- + +## 🏗 架构 + +

+ OhMyCaptcha 架构图 +

+ +**核心组件:** + +- **FastAPI** — 实现 YesCaptcha 协议的 HTTP API +- **TaskManager** — 异步内存任务队列,10 分钟 TTL +- **RecaptchaV3Solver** — 基于 Playwright 的 reCAPTCHA v3/Enterprise 令牌生成 +- **RecaptchaV2Solver** — 基于 Playwright 的 reCAPTCHA v2 复选框求解 +- **HCaptchaSolver** — 基于 Playwright 的 hCaptcha 求解 +- **TurnstileSolver** — 基于 Playwright 的 Cloudflare Turnstile 求解 +- **CaptchaRecognizer** — 受 Argus 启发的多模态图片分析 +- **ClassificationSolver** — 基于视觉模型的图像分类 + +--- + +## 🧠 任务类型 + +### 浏览器自动化求解(12 种) + +| 分类 | 任务类型 | 返回字段 | +|------|---------|---------| +| reCAPTCHA v3 | `RecaptchaV3TaskProxyless`, `RecaptchaV3TaskProxylessM1`, `RecaptchaV3TaskProxylessM1S7`, `RecaptchaV3TaskProxylessM1S9` | `gRecaptchaResponse` | +| reCAPTCHA v3 企业版 | `RecaptchaV3EnterpriseTask`, `RecaptchaV3EnterpriseTaskM1` | `gRecaptchaResponse` | +| reCAPTCHA v2 | `NoCaptchaTaskProxyless`, `RecaptchaV2TaskProxyless`, `RecaptchaV2EnterpriseTaskProxyless` | `gRecaptchaResponse` | +| hCaptcha | `HCaptchaTaskProxyless` | `gRecaptchaResponse` | +| Cloudflare Turnstile | `TurnstileTaskProxyless`, `TurnstileTaskProxylessM1` | `token` | + +### 图片识别(3 种) + +| 任务类型 | 返回字段 | +|---------|---------| +| `ImageToTextTask` | `text`(结构化 JSON) | +| `ImageToTextTaskMuggle` | `text` | +| `ImageToTextTaskM1` | `text` | + +### 图像分类(4 种) + +| 任务类型 | 返回字段 | +|---------|---------| +| `HCaptchaClassification` | `objects` / `answer` | +| `ReCaptchaV2Classification` | `objects` | +| `FunCaptchaClassification` | `objects` | +| `AwsClassification` | `objects` | + +--- + +## 🔌 API 接口 + +| 接口 | 作用 | +|------|------| +| `POST /createTask` | 创建异步验证码任务 | +| `POST /getTaskResult` | 轮询任务执行结果 | +| `POST /getBalance` | 返回兼容性余额 | +| `GET /api/v1/health` | 健康状态检查 | + +### 示例:reCAPTCHA v3 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +### 示例:hCaptcha + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaTaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "hcaptcha-site-key" + } + }' +``` + +### 示例:Cloudflare Turnstile + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "turnstile-site-key" + } + }' +``` + +### 示例:图像分类 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "ReCaptchaV2Classification", + "image": "", + "question": "Select all images with traffic lights" + } + }' +``` + +### 轮询结果 + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{"clientKey": "your-client-key", "taskId": "uuid-from-createTask"}' +``` + +--- + +## ⚙️ 配置项 + +### 模型后端 + +OhMyCaptcha 使用两种模型后端 —— **本地模型**处理图像任务,**云端模型**处理复杂推理: + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `LOCAL_BASE_URL` | 本地推理服务地址(SGLang/vLLM) | `http://localhost:30000/v1` | +| `LOCAL_API_KEY` | 本地服务密钥 | `EMPTY` | +| `LOCAL_MODEL` | 本地模型名称 | `Qwen/Qwen3.5-2B` | +| `CLOUD_BASE_URL` | 云端 API 基地址 | 外部端点 | +| `CLOUD_API_KEY` | 云端 API 密钥 | 未设置 | +| `CLOUD_MODEL` | 云端模型名称 | `gpt-5.4` | + +### 通用 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `CLIENT_KEY` | 客户端认证密钥 | 未设置 | +| `CAPTCHA_RETRIES` | 重试次数 | `3` | +| `CAPTCHA_TIMEOUT` | 模型请求超时(秒) | `30` | +| `BROWSER_HEADLESS` | 无头浏览器 | `true` | +| `BROWSER_TIMEOUT` | 页面加载超时(秒) | `30` | +| `SERVER_HOST` | 监听地址 | `0.0.0.0` | +| `SERVER_PORT` | 监听端口 | `8000` | + +> 旧版变量(`CAPTCHA_BASE_URL`、`CAPTCHA_API_KEY`、`CAPTCHA_MODEL`、`CAPTCHA_MULTIMODAL_MODEL`)仍支持作为回退。 + +--- + +## 🚀 部署 + +- [本地模型 (SGLang)](https://shenhao-stu.github.io/ohmycaptcha/zh/deployment/local-model/) — 本地部署 Qwen3.5-2B +- [Render 部署](https://shenhao-stu.github.io/ohmycaptcha/zh/deployment/render/) +- [Hugging Face Spaces 部署](https://shenhao-stu.github.io/ohmycaptcha/zh/deployment/huggingface/) +- [完整文档](https://shenhao-stu.github.io/ohmycaptcha/) + +--- + +## ✅ 测试目标 + +本服务针对以下公开 reCAPTCHA v3 检测目标完成验证: + +- URL:`https://antcpt.com/score_detector/` +- Site key:`6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +--- + +## ⚠️ 限制说明 + +- 任务状态保存在**内存中**,TTL 为 10 分钟 +- `minScore` 为兼容性字段,当前不做分数控制 +- 浏览器自动化的稳定性取决于运行环境、IP 信誉和目标站行为 +- 图像分类质量取决于所使用的视觉模型 +- 并非所有商业打码平台功能均已复现 + +--- + +## 📢 免责声明 + +> **本项目仅供合法的安全研究、渗透测试和技术学习使用。** + +- OhMyCaptcha 是一个可自托管的工具。你对自己的部署方式和使用行为负完全责任。 +- CAPTCHA 系统的存在是为了保护服务免受滥用。**未经目标网站或服务所有者明确授权,请勿使用本工具绕过 CAPTCHA。** +- 未经授权地对第三方服务进行自动化访问,可能违反其服务条款,并可能在相关法律管辖区(如《计算机欺诈与滥用法》、GDPR 或当地等效法规)下构成违法行为。 +- 本项目的作者和贡献者**不承担任何因使用本软件而导致的滥用行为、法律后果或损失的责任**。 +- 使用本软件即表示你同意自行确保其使用方式符合所有相关法律法规及服务条款。 + +--- + +## 🔧 开发 + +```bash +pytest tests/ +npx pyright +python -m mkdocs build --strict +``` + +--- + +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=shenhao-stu/ohmycaptcha&type=Date)](https://www.star-history.com/#shenhao-stu/ohmycaptcha&Date) + +--- + +## 📄 License + +[MIT](LICENSE) —— 自由使用,开放修改,谨慎部署。 + +完整使用条款与免责声明请参阅 [DISCLAIMER.zh-CN.md](DISCLAIMER.zh-CN.md)。 diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..298170ecaf50ec060fb912c94c594179eb00161c --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,45 @@ +# Security Policy + +## Supported Versions + +OhMyCaptcha is currently maintained from the `main` branch. Security fixes will be applied there first. + +## Reporting a Vulnerability + +Please do **not** open public GitHub issues for sensitive security reports. + +Instead: + +1. Prepare a minimal reproduction or impact description. +2. Include the affected version, deployment mode, and whether the issue requires authentication. +3. Send the report privately through GitHub Security Advisories if available for the repository, or contact the maintainer through a private channel. + +## What to include + +Please include as much of the following as possible: + +- affected endpoint or component +- reproduction steps +- expected vs actual behavior +- logs or screenshots with secrets removed +- whether the issue is exploitable remotely or only in a local/self-hosted setup + +## Secret handling + +This repository is designed for public use. Do not include any of the following in issues, pull requests, screenshots, or sample files: + +- API keys +- access tokens +- cookies +- private model endpoints +- customer URLs +- personally identifying data + +## Operational guidance + +If you deploy OhMyCaptcha publicly: + +- store secrets in environment variables or your hosting platform's secret manager +- avoid committing `.env` files +- rotate keys if they were ever exposed in logs or history +- consider placing the service behind your own authentication, rate limiting, and monitoring layers diff --git a/docs/acceptance.md b/docs/acceptance.md new file mode 100644 index 0000000000000000000000000000000000000000..e8826979f680dc741513009443d25d2c391e3abd --- /dev/null +++ b/docs/acceptance.md @@ -0,0 +1,151 @@ +# Acceptance + +This page documents acceptance targets for each supported captcha type, including the test URLs, site keys, and observed outcomes during local validation runs. + +## Summary + +| Captcha type | Target | Status | +|-------------|--------|--------| +| reCAPTCHA v3 | `https://antcpt.com/score_detector/` | ✅ Token returned | +| Cloudflare Turnstile | `https://react-turnstile.vercel.app/basic` | ✅ Dummy token returned | +| reCAPTCHA v2 | `https://www.google.com/recaptcha/api2/demo` | ⚠️ Requires audio challenge (see notes) | +| hCaptcha | `https://accounts.hcaptcha.com/demo` | ⚠️ Challenge-dependent | +| Image-to-Text | Local base64 image | ✅ Text returned via vision model | +| Classification | Local base64 grid | ✅ Object indices returned via vision model | + +--- + +## reCAPTCHA v3 — Primary acceptance target + +**URL:** `https://antcpt.com/score_detector/` +**Site key:** `6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +### Acceptance checklist + +1. Install dependencies and Playwright Chromium. +2. Start the service: `python main.py` +3. Confirm `GET /api/v1/health` returns all 19 supported types. +4. Create a `RecaptchaV3TaskProxyless` task. +5. Poll `POST /getTaskResult` until `status=ready`. +6. Confirm a non-empty `solution.gRecaptchaResponse`. + +### Curl example + +```bash +TASK=$(curl -s -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' | python -c "import sys,json; print(json.load(sys.stdin)['taskId'])") + +curl -s -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{"clientKey":"your-key","taskId":"'"$TASK"'"}' +``` + +### Verified outcome + +- Service startup: ✅ +- Health endpoint: ✅ (19 types registered) +- Task creation: ✅ +- Token returned: ✅ (non-empty `gRecaptchaResponse`, length ~1060 chars) + +--- + +## Cloudflare Turnstile + +**URL:** `https://react-turnstile.vercel.app/basic` +**Site key:** `1x00000000000000000000AA` (Cloudflare official test key — always passes) + +### Curl example + +```bash +curl -s -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-key", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://react-turnstile.vercel.app/basic", + "websiteKey": "1x00000000000000000000AA" + } + }' +``` + +### Verified outcome + +- Token returned: ✅ `XXXX.DUMMY.TOKEN.XXXX` (expected for Cloudflare test sitekeys) + +--- + +## reCAPTCHA v2 + +**URL:** `https://www.google.com/recaptcha/api2/demo` +**Site key:** `6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-` + +### Behavior with headless Chromium + +Headless browsers are detected by Google's risk analysis engine. The checkbox click succeeds, but a visual image challenge is presented rather than issuing a token immediately. + +**Implemented mitigation:** The solver falls back to the **audio challenge path** — clicking the audio button in the challenge dialog, downloading the MP3, transcribing via the configured model, and submitting the transcript. + +!!! note "Audio challenge transcription" + The audio challenge requires a language model capable of processing audio or base64-encoded audio data. Accuracy depends on the model endpoint configured via `CAPTCHA_MODEL`. + +### Curl example + +```bash +curl -s -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-key", + "task": { + "type": "NoCaptchaTaskProxyless", + "websiteURL": "https://www.google.com/recaptcha/api2/demo", + "websiteKey": "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-" + } + }' +``` + +### Status + +⚠️ Functionally implemented with audio challenge fallback. Success rate depends on model audio capability and Google's current challenge difficulty. + +--- + +## hCaptcha + +**URL:** `https://accounts.hcaptcha.com/demo` +**Site key:** `10000000-ffff-ffff-ffff-000000000001` (hCaptcha official test key) + +### Behavior + +The hCaptcha test key (`10000000-ffff-ffff-ffff-000000000001`) is designed to always pass — but headless browsers detected as bots still receive an image challenge. The solver clicks the checkbox iframe and polls for a token for up to 30 seconds. + +### Status + +⚠️ Checkbox click succeeds. Token issuance depends on hCaptcha's bot detection score. For test environments, using the [HCaptchaClassification](usage/classification.md) task type (direct image classification) is the recommended integration path. + +--- + +## Image-to-Text + +Any base64-encoded image can be sent to `ImageToTextTask`. The vision model returns a structured description suitable for click/slide/drag_match captcha automation. + +### Status + +✅ Works with any OpenAI-compatible vision model endpoint. Accuracy depends on model capability. + +--- + +## What these results mean + +- ✅ **reCAPTCHA v3** and **Turnstile** are fully functional and pass in every local test run. +- ⚠️ **reCAPTCHA v2** and **hCaptcha** browser-based solving is limited by headless browser detection. These captcha types are primarily intended to be integrated with `HCaptchaClassification` / `ReCaptchaV2Classification` task types for image grid solving, or via audio challenge transcription. +- The service is designed as a **backend solver for flow2api** — in practice, real-world integrations extract the image challenge frames and send them to the classification endpoint, rather than relying on full browser automation to pass the widget. diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000000000000000000000000000000000000..135c8fc0edb8626c97350bc9f7c2548c3aa2b7b5 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,246 @@ +# API Reference + +## Endpoints + +- `POST /createTask` +- `POST /getTaskResult` +- `POST /getBalance` +- `GET /api/v1/health` +- `GET /` + +All task endpoints are JSON-based and follow a YesCaptcha-style async task pattern. + +## `POST /createTask` + +### Request shape + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } +} +``` + +### Supported task types (19 total) + +#### reCAPTCHA v3 (browser-based) + +- `RecaptchaV3TaskProxyless` +- `RecaptchaV3TaskProxylessM1` +- `RecaptchaV3TaskProxylessM1S7` +- `RecaptchaV3TaskProxylessM1S9` +- `RecaptchaV3EnterpriseTask` +- `RecaptchaV3EnterpriseTaskM1` + +Required fields: `websiteURL`, `websiteKey`. Optional: `pageAction`, `minScore`. + +#### reCAPTCHA v2 (browser-based) + +- `NoCaptchaTaskProxyless` +- `RecaptchaV2TaskProxyless` +- `RecaptchaV2EnterpriseTaskProxyless` + +Required fields: `websiteURL`, `websiteKey`. Optional: `isInvisible`. + +#### hCaptcha (browser-based) + +- `HCaptchaTaskProxyless` + +Required fields: `websiteURL`, `websiteKey`. + +#### Cloudflare Turnstile (browser-based) + +- `TurnstileTaskProxyless` +- `TurnstileTaskProxylessM1` + +Required fields: `websiteURL`, `websiteKey`. + +#### Image recognition + +- `ImageToTextTask` +- `ImageToTextTaskMuggle` +- `ImageToTextTaskM1` + +Required fields: `body` (base64-encoded image). + +#### Image classification + +- `HCaptchaClassification` +- `ReCaptchaV2Classification` +- `FunCaptchaClassification` +- `AwsClassification` + +Required fields: `image` or `images` or `queries` (base64-encoded). Optional: `question`. + +### Compatibility note on `minScore` + +The request model accepts `minScore` for compatibility. The current solver implementation does **not** enforce score targeting based on this field. + +### Success response + +```json +{ + "errorId": 0, + "taskId": "uuid-string" +} +``` + +### Common error responses + +```json +{ + "errorId": 1, + "errorCode": "ERROR_TASK_NOT_SUPPORTED", + "errorDescription": "Task type 'X' is not supported." +} +``` + +```json +{ + "errorId": 1, + "errorCode": "ERROR_TASK_PROPERTY_EMPTY", + "errorDescription": "websiteURL and websiteKey are required" +} +``` + +## `POST /getTaskResult` + +### Request + +```json +{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" +} +``` + +### Processing response + +```json +{ + "errorId": 0, + "status": "processing" +} +``` + +### Ready response for reCAPTCHA v2/v3 + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "token..." + } +} +``` + +### Ready response for Cloudflare Turnstile + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "token": "cf-turnstile-token..." + } +} +``` + +### Ready response for `ImageToTextTask` + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "text": "{\"captcha_type\":\"click\", ...}" + } +} +``` + +### Ready response for classification tasks + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "objects": [0, 3, 6] + } +} +``` + +### Not found response + +```json +{ + "errorId": 1, + "errorCode": "ERROR_NO_SUCH_CAPCHA_ID", + "errorDescription": "Task not found" +} +``` + +## `POST /getBalance` + +### Request + +```json +{ + "clientKey": "your-client-key" +} +``` + +### Response + +```json +{ + "errorId": 0, + "balance": 99999.0 +} +``` + +This balance is currently a static compatibility response. + +## `GET /api/v1/health` + +Example response: + +```json +{ + "status": "ok", + "supported_task_types": [ + "RecaptchaV3TaskProxyless", + "RecaptchaV3TaskProxylessM1", + "RecaptchaV3TaskProxylessM1S7", + "RecaptchaV3TaskProxylessM1S9", + "RecaptchaV3EnterpriseTask", + "RecaptchaV3EnterpriseTaskM1", + "NoCaptchaTaskProxyless", + "RecaptchaV2TaskProxyless", + "RecaptchaV2EnterpriseTaskProxyless", + "HCaptchaTaskProxyless", + "TurnstileTaskProxyless", + "TurnstileTaskProxylessM1", + "ImageToTextTask", + "ImageToTextTaskMuggle", + "ImageToTextTaskM1", + "HCaptchaClassification", + "ReCaptchaV2Classification", + "FunCaptchaClassification", + "AwsClassification" + ], + "browser_headless": true, + "captcha_model": "gpt-5.4", + "captcha_multimodal_model": "qwen3.5-2b" +} +``` + +## `GET /` + +The root endpoint returns a compact service description and the registered task types at runtime. diff --git a/docs/assets/.gitkeep b/docs/assets/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docs/assets/ohmycaptcha-diagram.png b/docs/assets/ohmycaptcha-diagram.png new file mode 100644 index 0000000000000000000000000000000000000000..4c8cbf7054ede5916c3889df8f1acba1f93a59d1 --- /dev/null +++ b/docs/assets/ohmycaptcha-diagram.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd1381d65e93395c91e08419f81d9e9744ffb91eefa07c9d29a6ae2fde63170 +size 505888 diff --git a/docs/assets/ohmycaptcha-hero.png b/docs/assets/ohmycaptcha-hero.png new file mode 100644 index 0000000000000000000000000000000000000000..3149ec6ee728d59f15892d731952e8b6829b7605 --- /dev/null +++ b/docs/assets/ohmycaptcha-hero.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1365eae0fdbc27d20813549e864ea3b8b7d22733a644b3fa8672f73e190c55 +size 741379 diff --git a/docs/deployment/huggingface.md b/docs/deployment/huggingface.md new file mode 100644 index 0000000000000000000000000000000000000000..f88d4dfd0d67deda2f5f5b55389d394390f1776a --- /dev/null +++ b/docs/deployment/huggingface.md @@ -0,0 +1,116 @@ +# Hugging Face Spaces Deployment + +This guide shows how to deploy OhMyCaptcha on **Hugging Face Spaces** using a Docker-based Space. + +## When to choose Hugging Face Spaces + +Use Hugging Face Spaces when you want: + +- a simple public or private demo deployment +- a UI-driven hosting workflow +- easy secret management inside the Space settings +- a Docker-based environment without managing a VPS yourself + +## 1. Prepare the repository + +Make sure your repository includes: + +- `Dockerfile.render` +- `main.py` +- `requirements.txt` +- the `src/` application package + +For Hugging Face Spaces, the current Dockerfile can be reused as a starting point because it already installs Python dependencies and Playwright Chromium. + +## 2. Create a Docker Space + +In Hugging Face: + +1. Create a new **Space**. +2. Choose **Docker** as the SDK. +3. Select visibility according to your needs. +4. Connect the Space to this repository or upload the project files. + +## 3. Configure secrets and variables + +In the Space settings, add the following secrets: + +- `CLIENT_KEY` +- `CAPTCHA_API_KEY` + +Add or override variables as needed: + +- `CAPTCHA_BASE_URL` +- `CAPTCHA_MODEL` +- `CAPTCHA_MULTIMODAL_MODEL` +- `BROWSER_HEADLESS=true` +- `BROWSER_TIMEOUT=30` +- `SERVER_PORT=7860` + +Hugging Face Spaces typically expose applications on port `7860`, so set `SERVER_PORT=7860`. + +## 4. Confirm the startup command + +The container should start the app with: + +```bash +python main.py +``` + +The entrypoint already respects environment-based port configuration. + +## 5. Wait for the build to finish + +After the Space starts building: + +- watch the build logs +- confirm dependency installation finishes successfully +- confirm Playwright Chromium installs successfully +- wait for the app to enter the running state + +## 6. Validate the deployment + +Once the Space is live, verify: + +### Root endpoint + +```bash +curl https://.hf.space/ +``` + +### Health endpoint + +```bash +curl https://.hf.space/api/v1/health +``` + +### Create a detector task + +```bash +curl -X POST https://.hf.space/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## Operational notes + +- Hugging Face Spaces are convenient, but cold starts and resource limits can affect Playwright-based workloads. +- Browser automation can be more sensitive to shared-hosting environments than standard API-only apps. +- If you need stricter runtime control, use Render or your own infrastructure. + +## Recommended usage + +Hugging Face Spaces is best suited for: + +- evaluation +- demos +- low-volume internal usage +- fast public documentation-linked deployment diff --git a/docs/deployment/local-model.md b/docs/deployment/local-model.md new file mode 100644 index 0000000000000000000000000000000000000000..84a66a6e9d3543777a6a82911e2a9f36b1508376 --- /dev/null +++ b/docs/deployment/local-model.md @@ -0,0 +1,155 @@ +# Local Model Deployment + +OhMyCaptcha supports running image recognition and classification tasks on a **locally hosted model** served via [SGLang](https://github.com/sgl-project/sglang), [vLLM](https://github.com/vllm-project/vllm), or any OpenAI-compatible inference server. + +This guide covers deploying [Qwen3.5-2B](https://modelscope.cn/models/Qwen/Qwen3.5-2B) locally with SGLang. + +## Architecture: Local vs Cloud + +OhMyCaptcha uses two model backends: + +| Backend | Role | Env vars | Default | +|---------|------|----------|---------| +| **Local model** | Image recognition & classification (high-throughput, self-hosted) | `LOCAL_BASE_URL`, `LOCAL_API_KEY`, `LOCAL_MODEL` | `http://localhost:30000/v1`, `EMPTY`, `Qwen/Qwen3.5-2B` | +| **Cloud model** | Audio transcription & complex reasoning (powerful remote API) | `CLOUD_BASE_URL`, `CLOUD_API_KEY`, `CLOUD_MODEL` | External endpoint, your key, `gpt-5.4` | + +``` +┌────────────────────────────────────────────────────────────┐ +│ OhMyCaptcha │ +│ │ +│ Browser tasks ──► Playwright (reCAPTCHA, Turnstile) │ +│ │ +│ Image tasks ───► Local Model (SGLang / vLLM) │ +│ └─ Qwen3.5-2B on localhost:30000 │ +│ │ +│ Audio tasks ───► Cloud Model (remote API) │ +│ └─ gpt-5.4 via external endpoint │ +└────────────────────────────────────────────────────────────┘ +``` + +## Prerequisites + +- Python 3.10+ +- NVIDIA GPU with CUDA support (recommended: 8GB+ VRAM for Qwen3.5-2B) +- `pip` package manager + +## Step 1: Install SGLang + +```bash +pip install "sglang[all]>=0.4.6.post1" +``` + +## Step 2: Launch the model server + +### From Hugging Face + +```bash +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 +``` + +### From ModelScope (recommended in China) + +```bash +export SGLANG_USE_MODELSCOPE=true +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 +``` + +### With multiple GPUs + +```bash +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 \ + --tensor-parallel-size 2 +``` + +Once started, the server exposes an OpenAI-compatible API at `http://localhost:30000/v1`. + +## Step 3: Verify the model server + +```bash +curl http://localhost:30000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen/Qwen3.5-2B", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 32 + }' +``` + +You should receive a valid JSON response with model output. + +## Step 4: Configure OhMyCaptcha + +Set the local model env vars to point at your SGLang server: + +```bash +# Local model (self-hosted via SGLang) +export LOCAL_BASE_URL="http://localhost:30000/v1" +export LOCAL_API_KEY="EMPTY" +export LOCAL_MODEL="Qwen/Qwen3.5-2B" + +# Cloud model (remote API for audio transcription etc.) +export CLOUD_BASE_URL="https://your-api-endpoint/v1" +export CLOUD_API_KEY="sk-your-key" +export CLOUD_MODEL="gpt-5.4" + +# Other config +export CLIENT_KEY="your-client-key" +export BROWSER_HEADLESS=true +``` + +## Step 5: Start OhMyCaptcha + +```bash +python main.py +``` + +The health endpoint shows both model backends: + +```bash +curl http://localhost:8000/api/v1/health +``` + +```json +{ + "status": "ok", + "supported_task_types": ["RecaptchaV3TaskProxyless", "..."], + "browser_headless": true, + "cloud_model": "gpt-5.4", + "local_model": "Qwen/Qwen3.5-2B" +} +``` + +## Alternative: vLLM + +vLLM can serve the same model with an identical API: + +```bash +pip install vllm +python -m vllm.entrypoints.openai.api_server \ + --model Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 +``` + +No changes to the OhMyCaptcha configuration are needed — both SGLang and vLLM expose `/v1/chat/completions`. + +## Backward compatibility + +The legacy environment variables (`CAPTCHA_BASE_URL`, `CAPTCHA_API_KEY`, `CAPTCHA_MODEL`, `CAPTCHA_MULTIMODAL_MODEL`) are still supported as fallbacks. If you set `CAPTCHA_BASE_URL` without setting `CLOUD_BASE_URL`, the old value will be used. The new `LOCAL_*` and `CLOUD_*` variables take precedence when set. + +## Recommended models + +| Model | Size | Use case | VRAM | +|-------|------|----------|------| +| `Qwen/Qwen3.5-2B` | 2B | Image recognition & classification | ~5 GB | +| `Qwen/Qwen3.5-7B` | 7B | Higher accuracy classification | ~15 GB | +| `Qwen/Qwen3.5-2B-FP8` | 2B (quantized) | Lower VRAM requirement | ~3 GB | diff --git a/docs/deployment/render.md b/docs/deployment/render.md new file mode 100644 index 0000000000000000000000000000000000000000..c304befa8a589b91c1aea7bc6069c208b0058415 --- /dev/null +++ b/docs/deployment/render.md @@ -0,0 +1,112 @@ +# Render Deployment + +This guide walks through a clean **Render** deployment for OhMyCaptcha using the Docker files already included in this repository. + +## When to choose Render + +Render is a good fit when you want: + +- managed deployment with a stable public URL +- easy secret management +- a simple Docker-based workflow +- fewer runtime constraints than demo-oriented hosting platforms + +## 1. Prepare the repository + +This repository already includes the files Render needs: + +- `Dockerfile.render` +- `render.yaml` +- `main.py` +- `requirements.txt` +- `src/` + +The application listens on port `8000` and also respects the `PORT` environment variable injected by Render. + +## 2. Create the Render service + +In Render: + +1. Create a new **Web Service**. +2. Connect your GitHub repository. +3. Choose **Docker** as the runtime. +4. Point Render at: + - Dockerfile: `Dockerfile.render` + - Context: repository root + +You can also import the included `render.yaml` blueprint. + +## 3. Configure environment variables + +### Required secrets + +Set these as protected environment variables in the Render dashboard: + +- `CLIENT_KEY` +- `CAPTCHA_API_KEY` + +### Recommended variables + +- `CAPTCHA_BASE_URL=https://your-openai-compatible-endpoint/v1` +- `CAPTCHA_MODEL=gpt-5.4` +- `CAPTCHA_MULTIMODAL_MODEL=qwen3.5-2b` +- `CAPTCHA_RETRIES=3` +- `CAPTCHA_TIMEOUT=30` +- `BROWSER_HEADLESS=true` +- `BROWSER_TIMEOUT=30` + +## 4. Trigger the first deploy + +After saving the configuration: + +- wait for the image build +- confirm Python dependencies install successfully +- confirm Playwright Chromium installation completes successfully +- wait until the service status becomes healthy + +## 5. Validate the deployment + +Once the Render URL is available, check: + +### Root endpoint + +```bash +curl https://.onrender.com/ +``` + +### Health endpoint + +```bash +curl https://.onrender.com/api/v1/health +``` + +### Create a detector task + +```bash +curl -X POST https://.onrender.com/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## Operational notes + +- Render is generally a better fit than lightweight demo platforms for browser automation. +- Browser-based solving can still be sensitive to cold starts, IP quality, and container resource limits. +- If you need stronger control over runtime behavior, move to your own infrastructure. + +## Recommended usage + +Render is a strong default choice for: + +- persistent public deployments +- flow2api integration testing +- low-to-medium production traffic +- quick managed rollout without maintaining your own host diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 0000000000000000000000000000000000000000..b6c36ad659abae130a1392a83ba6ad35581bf9d5 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,25 @@ +# FAQ + +## Does this fully replace YesCaptcha? + +No. It implements a YesCaptcha-style API surface for the task types available in this repository. It should not be described as full vendor parity. + +## Does `minScore` guarantee a target reCAPTCHA score? + +No. The request model accepts `minScore` for compatibility, but the current solver does not enforce score targeting. + +## Can I use local or self-hosted multimodal models? + +Yes, if they expose an OpenAI-compatible API with image-capable chat completion behavior. + +## Does `ImageToTextTask` return plain OCR text? + +Not necessarily. The current implementation returns structured recognition output serialized into `solution.text`. + +## Is task state persistent? + +No. Task state is stored in memory and cleaned up after the configured TTL window. + +## What affects reCAPTCHA v3 results? + +Common factors include IP quality, browser fingerprint, target site behavior, page action correctness, and runtime environment. diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000000000000000000000000000000000000..895ddf83cb00d88fbf614accf2b4c71e829e5e50 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,67 @@ +# Getting Started + +## Requirements + +- Python 3.10+ +- Chromium available through Playwright +- Network access to: + - target sites you want to solve against + - your configured OpenAI-compatible model endpoint + +## Installation + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +playwright install --with-deps chromium +``` + +## Environment variables + +| Variable | Description | Default | +| --- | --- | --- | +| `CLIENT_KEY` | Client auth key used as `clientKey` | unset | +| `CAPTCHA_BASE_URL` | OpenAI-compatible API base URL | `https://your-openai-compatible-endpoint/v1` | +| `CAPTCHA_API_KEY` | API key for your model provider | unset | +| `CAPTCHA_MODEL` | Strong text model | `gpt-5.4` | +| `CAPTCHA_MULTIMODAL_MODEL` | Multimodal model | `qwen3.5-2b` | +| `CAPTCHA_RETRIES` | Retry count | `3` | +| `CAPTCHA_TIMEOUT` | Model timeout in seconds | `30` | +| `BROWSER_HEADLESS` | Run Chromium headless | `true` | +| `BROWSER_TIMEOUT` | Browser timeout in seconds | `30` | +| `SERVER_HOST` | Bind host | `0.0.0.0` | +| `SERVER_PORT` | Bind port | `8000` | + +## Start the service + +```bash +export CLIENT_KEY="your-client-key" +export CAPTCHA_BASE_URL="https://your-openai-compatible-endpoint/v1" +export CAPTCHA_API_KEY="your-api-key" +export CAPTCHA_MODEL="gpt-5.4" +export CAPTCHA_MULTIMODAL_MODEL="qwen3.5-2b" +python main.py +``` + +## Verify startup + +### Root endpoint + +```bash +curl http://localhost:8000/ +``` + +### Health endpoint + +```bash +curl http://localhost:8000/api/v1/health +``` + +The health response should include the registered task types and current runtime model settings. + +## Local and self-hosted model support + +The image recognition path is built around **OpenAI-compatible APIs**. In practice, this means you can point `CAPTCHA_BASE_URL` at a hosted provider or a self-hosted/local multimodal gateway, as long as it exposes compatible chat-completions semantics and supports image input. + +The project intentionally documents this in generic compatibility terms rather than claiming full validation for every provider stack. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000000000000000000000000000000000000..8742e3dcc641aba4d58f277186790f459530b4ff --- /dev/null +++ b/docs/index.md @@ -0,0 +1,126 @@ +# OhMyCaptcha + +
+ +
+ OhMyCaptcha — self-hostable captcha solving service +
+ +
+ +## ⚡ Self-hostable captcha solving with a clean YesCaptcha-style API + +OhMyCaptcha combines **FastAPI**, **Playwright**, and **OpenAI-compatible multimodal models** into a focused service for **flow2api** and similar integrations. Supports **19 task types** across reCAPTCHA, hCaptcha, Cloudflare Turnstile, and image classification. + +
+ +[Get started](getting-started.md){ .md-button .md-button--primary } +[API reference](api-reference.md){ .md-button } +[GitHub](https://github.com/shenhao-stu/ohmycaptcha){ .md-button } + +
+ +
+ +
+ +## ✨ Highlights + +
+ +- :material-api: **YesCaptcha-style API** + + --- + + Familiar async `createTask` / `getTaskResult` semantics covering reCAPTCHA v2/v3, hCaptcha, Turnstile, and image classification. + +- :material-google-chrome: **Browser-based solving** + + --- + + Playwright + Chromium automate token generation for reCAPTCHA v2/v3, hCaptcha, and Cloudflare Turnstile. + +- :material-image-search: **Multimodal image recognition** + + --- + + Route image captcha analysis through OpenAI-compatible vision models for HCaptcha, reCAPTCHA, FunCaptcha, and AWS classification. + +- :material-cloud-outline: **Self-hosted deployment** + + --- + + Run locally or follow the included Render and Hugging Face Spaces deployment guides. + +
+ +## 🧠 Supported task types + +### Browser-based solving (12 types) + +| Category | Task Types | +|----------|-----------| +| **reCAPTCHA v3** | `RecaptchaV3TaskProxyless`, `RecaptchaV3TaskProxylessM1`, `RecaptchaV3TaskProxylessM1S7`, `RecaptchaV3TaskProxylessM1S9` | +| **reCAPTCHA v3 Enterprise** | `RecaptchaV3EnterpriseTask`, `RecaptchaV3EnterpriseTaskM1` | +| **reCAPTCHA v2** | `NoCaptchaTaskProxyless`, `RecaptchaV2TaskProxyless`, `RecaptchaV2EnterpriseTaskProxyless` | +| **hCaptcha** | `HCaptchaTaskProxyless` | +| **Cloudflare Turnstile** | `TurnstileTaskProxyless`, `TurnstileTaskProxylessM1` | + +### Image recognition (3 types) + +| Task Type | Description | +|-----------|-------------| +| `ImageToTextTask` | Argus-inspired multimodal recognition for click, slide, and drag captchas | +| `ImageToTextTaskMuggle` | Text/alphanumeric image recognition | +| `ImageToTextTaskM1` | Async image text recognition | + +### Image classification (4 types) + +| Task Type | Description | +|-----------|-------------| +| `HCaptchaClassification` | hCaptcha grid image classification | +| `ReCaptchaV2Classification` | reCAPTCHA v2 grid cell selection | +| `FunCaptchaClassification` | FunCaptcha image selection | +| `AwsClassification` | AWS CAPTCHA image classification | + +## 🚀 Quick paths + +
+ +- :material-rocket-launch-outline: **Quick start** + + --- + + Install dependencies, configure environment variables, and launch the service locally. + + [Open quick start](getting-started.md) + +- :material-file-document-outline: **API reference** + + --- + + Review all 19 supported task types, endpoints, and request formats. + + [Open API reference](api-reference.md) + +- :material-play-box-outline: **Acceptance** + + --- + + Validate detector-target behavior and confirm token generation flow. + + [Open acceptance guide](acceptance.md) + +- :material-server-outline: **Deployment** + + --- + + Follow the Render or Hugging Face Spaces guides for a production-facing instance. + + [Open deployment guide](deployment/render.md) + +
+ +## 📌 Scope note + +OhMyCaptcha implements a **YesCaptcha-style API surface covering 19 task types** across reCAPTCHA v2/v3, hCaptcha, Cloudflare Turnstile, and image classification. Browser-based tasks rely on Playwright automation and may require tuning for specific target sites. Image classification leverages multimodal vision models and accuracy depends on model quality. diff --git a/docs/positioning.md b/docs/positioning.md new file mode 100644 index 0000000000000000000000000000000000000000..72931fc8ce5ac07719bf680d29039f1faec75a40 --- /dev/null +++ b/docs/positioning.md @@ -0,0 +1,46 @@ +# Positioning + +## What OhMyCaptcha is + +OhMyCaptcha is a self-hostable captcha solving service with a YesCaptcha-style API for the task types implemented in this repository. + +It is designed for users who want: + +- a service they can run themselves +- compatibility with `createTask` / `getTaskResult` style workflows +- control over browser automation and model backends +- support for OpenAI-compatible multimodal providers, including local or self-hosted gateways + +## Comparison with managed services such as YesCaptcha + +Managed services such as YesCaptcha typically provide: + +- a hosted platform +- a broad task catalog +- commercial SLAs and vendor-managed infrastructure + +OhMyCaptcha instead focuses on: + +- self-hosting +- transparent implementation +- prompt and browser customization +- backend flexibility for multimodal models + +## Scope boundary + +This repository should not be described as a full commercial-vendor replacement for every captcha family or task type. + +A more accurate description is: + +> a self-hostable service that provides a YesCaptcha-style API for the implemented task types and can be integrated into systems such as flow2api. + +## Local and self-hosted model support + +The project uses OpenAI-compatible APIs for multimodal recognition. This makes it possible to connect: + +- hosted providers +- internal gateways +- self-hosted multimodal services +- local model-serving stacks that expose compatible semantics + +The documentation intentionally keeps this phrasing generic. Compatibility depends on whether the backend supports image input and sufficiently consistent chat-completions behavior. diff --git a/docs/skill.md b/docs/skill.md new file mode 100644 index 0000000000000000000000000000000000000000..4d0ea0cb42f8c2c0a73031492b8201be54a6fbe9 --- /dev/null +++ b/docs/skill.md @@ -0,0 +1,58 @@ +# Agent Skill + +OhMyCaptcha ships with reusable skills under `skills/`. + +## Available skills + +- `skills/ohmycaptcha/` — operate, deploy, validate, and integrate the service +- `skills/ohmycaptcha-image/` — create public-safe visuals for README, docs, and launch assets + +## For humans + +If your tool can read a local skill folder directly, copy one or both of these directories into your local skills directory: + +```text +skills/ohmycaptcha/ +skills/ohmycaptcha-image/ +``` + +Then restart your tool if it caches skill metadata. + +## Let an LLM do it + +Paste this into any capable agent environment: + +```text +Install the OhMyCaptcha skills from this repository and make them available in my local skills directory. Then show me how to use the operational skill for deployment and the image skill for generating README or docs visuals. +``` + +## What the operational skill does + +The `ohmycaptcha` skill helps with: + +- local startup +- environment setup +- YesCaptcha-style API usage +- flow2api integration +- Render deployment +- Hugging Face Spaces deployment +- task validation and troubleshooting + +## What the image skill does + +The `ohmycaptcha-image` skill helps with: + +- README hero image prompts +- docs illustrations +- public-safe technical visuals +- architecture-themed artwork +- reusable image-generation prompts for agent workflows + +## Design guarantees + +These skills are designed to: + +- use placeholder credentials only +- stay aligned with the implemented task types +- keep current limitations explicit +- avoid embedding secrets, private endpoints, or customer data diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000000000000000000000000000000000000..8652ca2e4532234793a485ea078dc4749eed840c --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,204 @@ +:root { + --ohmycaptcha-radius: 18px; + --ohmycaptcha-shadow: 0 18px 44px rgba(37, 99, 235, 0.08); +} + +[data-md-color-scheme="slate"] { + --ohmycaptcha-shadow: 0 18px 44px rgba(0, 0, 0, 0.3); +} + +.md-header__option { + order: 20; +} + +.md-header__option:last-of-type { + order: 99; + margin-inline-start: auto; +} + +.md-typeset .caption { + text-align: center; + color: var(--md-default-fg-color--light); + font-size: 0.8rem; +} + +/* ── Hero section ── */ + +.md-typeset .hero { + position: relative; + margin: 0 0 2rem; + border-radius: 24px; + overflow: hidden; + border: 1px solid rgba(37, 99, 235, 0.12); + box-shadow: var(--ohmycaptcha-shadow); + background: #0f172a; +} + +[data-md-color-scheme="slate"] .md-typeset .hero { + border-color: rgba(99, 102, 241, 0.2); +} + +/* Full-width hero image strip */ +.md-typeset .hero__visual { + display: block; + line-height: 0; +} + +.md-typeset .hero__visual img { + width: 100%; + height: auto; + max-height: 340px; + object-fit: cover; + object-position: center top; + display: block; + border-radius: 0; + border: none; + background: transparent; +} + +/* Copy panel sits below the image */ +.md-typeset .hero__copy { + padding: 1.6rem 2rem 1.8rem; + background: linear-gradient(135deg, #ffffff 0%, #f7faff 60%, #eef2ff 100%); +} + +[data-md-color-scheme="slate"] .md-typeset .hero__copy { + background: linear-gradient(135deg, #0f172a 0%, #1e1b4b 60%, #1e293b 100%); +} + +.md-typeset .hero--light h2 { + margin: 0 0 0.7rem; + color: #0f172a; + font-size: clamp(1.3rem, 3vw, 1.9rem); + line-height: 1.2; + letter-spacing: -0.025em; + font-weight: 700; +} + +[data-md-color-scheme="slate"] .md-typeset .hero--light h2 { + color: #e2e8f0; +} + +.md-typeset .hero--light p { + color: #475569; + margin: 0 0 0.5rem; + font-size: 0.95rem; + max-width: 68ch; +} + +[data-md-color-scheme="slate"] .md-typeset .hero--light p { + color: #94a3b8; +} + +.md-typeset .hero__copy > p { + margin: 0.6rem 0 0; + font-size: 0.95rem; + max-width: 68ch; +} + +.md-typeset .hero__actions { + display: flex; + flex-wrap: wrap; + gap: 0.65rem; + margin-top: 1.1rem; +} + +.md-typeset .hero__actions .md-button { + margin: 0; + border-radius: 999px; + font-size: 0.83rem; + padding: 0.4em 1.1em; +} + +.md-typeset .hero__actions .md-button:not(.md-button--primary) { + color: #1d4ed8; + border-color: rgba(37, 99, 235, 0.28); +} + +[data-md-color-scheme="slate"] .md-typeset .hero__actions .md-button:not(.md-button--primary) { + color: #818cf8; + border-color: rgba(129, 140, 248, 0.3); +} + +/* ── Feature cards ── */ + +.md-typeset .feature-cards > ul { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr)); + gap: 0.9rem; + padding: 0; +} + +.md-typeset .feature-cards > ul > li { + list-style: none; + padding: 1.1rem; + border: 1px solid rgba(37, 99, 235, 0.12); + border-radius: var(--ohmycaptcha-radius); + background: #ffffff; + box-shadow: 0 10px 26px rgba(15, 23, 42, 0.04); + transition: transform 0.2s ease, box-shadow 0.2s ease; +} + +.md-typeset .feature-cards > ul > li:hover { + transform: translateY(-2px); + box-shadow: 0 14px 32px rgba(15, 23, 42, 0.08); +} + +[data-md-color-scheme="slate"] .md-typeset .feature-cards > ul > li { + background: rgba(30, 30, 50, 0.6); + border-color: rgba(99, 102, 241, 0.15); + box-shadow: 0 10px 26px rgba(0, 0, 0, 0.2); +} + +.md-typeset .feature-cards strong { + color: #0f172a; +} + +[data-md-color-scheme="slate"] .md-typeset .feature-cards strong { + color: #e2e8f0; +} + +.md-typeset .feature-cards p { + color: #475569; +} + +[data-md-color-scheme="slate"] .md-typeset .feature-cards p { + color: #94a3b8; +} + +.md-typeset .grid.cards > ul { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr)); + gap: 0.8rem; + padding: 0; +} + +.md-typeset .grid.cards > ul > li { + list-style: none; + border-radius: var(--ohmycaptcha-radius); +} + +/* ── Task type tables ── */ + +.md-typeset table code { + font-size: 0.78rem; + padding: 0.1em 0.35em; +} + +/* ── Responsive ── */ + +@media screen and (max-width: 76.1875em) { + .md-typeset .hero__copy { + padding: 1.2rem 1.2rem 1.4rem; + } + + .md-typeset .hero__visual img { + max-height: 240px; + } +} + +@media screen and (max-width: 44.9375em) { + .md-typeset .hero__visual img { + max-height: 180px; + } +} diff --git a/docs/usage/classification.md b/docs/usage/classification.md new file mode 100644 index 0000000000000000000000000000000000000000..08dccfa779370edcc437fb5db3d82fdbaed79f8a --- /dev/null +++ b/docs/usage/classification.md @@ -0,0 +1,163 @@ +# Image Classification Usage + +Image classification tasks send one or more captcha images to an OpenAI-compatible vision model and return the indices of matching cells or a boolean answer. No browser automation is involved — these are pure vision model API calls. + +## Supported task types + +| Task type | Description | +|-----------|-------------| +| `HCaptchaClassification` | hCaptcha 3x3 grid — returns matching cell indices | +| `ReCaptchaV2Classification` | reCAPTCHA v2 3x3 / 4x4 grid — returns matching cell indices | +| `FunCaptchaClassification` | FunCaptcha 2x3 grid — returns the correct cell index | +| `AwsClassification` | AWS CAPTCHA image selection | + +## Solution fields + +| Task type | Solution field | Example | +|-----------|---------------|---------| +| `HCaptchaClassification` | `objects` or `answer` | `[0, 2, 5]` or `true` | +| `ReCaptchaV2Classification` | `objects` | `[0, 3, 6]` | +| `FunCaptchaClassification` | `objects` | `[4]` | +| `AwsClassification` | `objects` | `[1]` | + +## HCaptchaClassification + +### Request shape + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaClassification", + "queries": ["", "", ""], + "question": "Please click each image containing a bicycle" + } +} +``` + +The `queries` field accepts a list of base64-encoded images (one per grid cell). The `question` field is the challenge prompt displayed to the user. + +### Response + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "objects": [1, 4] + } +} +``` + +## ReCaptchaV2Classification + +### Request shape + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "ReCaptchaV2Classification", + "image": "", + "question": "Select all images with traffic lights" + } +} +``` + +The `image` field is a single base64-encoded image of the full reCAPTCHA grid (3×3 = 9 cells or 4×4 = 16 cells). Cells are numbered 0–8 (or 0–15), left-to-right, top-to-bottom. + +### Response + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "objects": [0, 3, 6] + } +} +``` + +## FunCaptchaClassification + +### Request shape + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "FunCaptchaClassification", + "image": "", + "question": "Pick the image that shows a boat facing left" + } +} +``` + +The grid is typically 2×3 (6 cells). Usually one answer is expected. + +### Response + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "objects": [3] + } +} +``` + +## AwsClassification + +### Request shape + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "AwsClassification", + "image": "", + "question": "Select the image that matches" + } +} +``` + +### Response + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "objects": [1] + } +} +``` + +## Create and poll (generic example) + +```bash +# Step 1: create task +TASK_ID=$(curl -s -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "ReCaptchaV2Classification", + "image": "'$(base64 -w0 captcha.png)'", + "question": "Select all images with traffic lights" + } + }' | python -c "import sys,json; print(json.load(sys.stdin)['taskId'])") + +# Step 2: poll result +curl -s -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d "{\"clientKey\":\"your-client-key\",\"taskId\":\"$TASK_ID\"}" +``` + +## Operational notes + +- All classification tasks are **synchronous from the model's perspective** — the `asyncio.create_task` wrapper means the HTTP response is immediate, but the actual model call happens in the background. +- Model accuracy depends entirely on the vision model configured via `CAPTCHA_MULTIMODAL_MODEL` (default: `qwen3.5-2b`). +- For best results with classification, the `CAPTCHA_MODEL` (`gpt-5.4`) can be substituted by setting `CAPTCHA_MULTIMODAL_MODEL=gpt-5.4`. +- Images should not be pre-resized — the solver handles normalization internally. diff --git a/docs/usage/hcaptcha.md b/docs/usage/hcaptcha.md new file mode 100644 index 0000000000000000000000000000000000000000..7582f28342c347f8398e38ccb2b1475458049f9a --- /dev/null +++ b/docs/usage/hcaptcha.md @@ -0,0 +1,98 @@ +# hCaptcha Usage + +hCaptcha presents a CAPTCHA challenge via an iframe widget. The solver visits the target page with a Playwright-controlled Chromium browser, clicks the hCaptcha checkbox, waits for the challenge to resolve, and extracts the `h-captcha-response` token. + +## Supported task type + +| Task type | Description | +|-----------|-------------| +| `HCaptchaTaskProxyless` | Browser-based hCaptcha solving | + +## Required fields + +| Field | Type | Description | +|-------|------|-------------| +| `websiteURL` | string | Full URL of the page containing the captcha | +| `websiteKey` | string | The `data-sitekey` value from the page's HTML | + +## Test targets + +hCaptcha provides official test keys that produce predictable results: + +| URL | Site key | Behavior | +|-----|----------|----------| +| `https://accounts.hcaptcha.com/demo` | `10000000-ffff-ffff-ffff-000000000001` | Always passes (test key) | +| `https://accounts.hcaptcha.com/demo` | `20000000-ffff-ffff-ffff-000000000002` | Enterprise safe-user test | +| `https://demo.hcaptcha.com/` | `10000000-ffff-ffff-ffff-000000000001` | Always passes (test key) | + +## Create a task + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaTaskProxyless", + "websiteURL": "https://accounts.hcaptcha.com/demo", + "websiteKey": "10000000-ffff-ffff-ffff-000000000001" + } + }' +``` + +Response: + +```json +{ + "errorId": 0, + "taskId": "uuid-string" +} +``` + +## Poll for result + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +When ready: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "P1_eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9..." + } +} +``` + +!!! note "Response field name" + The token is returned in `solution.gRecaptchaResponse` for YesCaptcha API compatibility, even though hCaptcha natively uses the `h-captcha-response` field name. + +## Acceptance status + +| Target | Site key | Status | Notes | +|--------|----------|--------|-------| +| `https://accounts.hcaptcha.com/demo` | `10000000-ffff-ffff-ffff-000000000001` | ⚠️ Challenge-dependent | Headless browsers may still receive image challenges | + +### Headless browser note + +Even with the test site key (`10000000-ffff-ffff-ffff-000000000001`), hCaptcha may present an image challenge when the widget detects a headless browser. The solver clicks the checkbox and polls for a token for up to 30 seconds. + +For headless environments, the recommended approach is to use the `HCaptchaClassification` task type to solve the image grid challenge, then inject the token. See [Image Classification](classification.md) for details. + +## Image classification (HCaptchaClassification) + +For programmatic grid classification without browser automation, see [Image Classification](classification.md). + +## Operational notes + +- hCaptcha challenges may require more time than reCAPTCHA v2 — the solver waits up to 5 seconds after clicking. +- Real-world sites with aggressive bot detection may require additional fingerprinting improvements. +- Test keys (`10000000-ffff-ffff-ffff-000000000001`) always pass and are useful for flow validation. diff --git a/docs/usage/image-captcha.md b/docs/usage/image-captcha.md new file mode 100644 index 0000000000000000000000000000000000000000..1b6e3c580f24c4bf1816fca1abb44df4858772f0 --- /dev/null +++ b/docs/usage/image-captcha.md @@ -0,0 +1,55 @@ +# Image CAPTCHA Usage + +## Task type + +- `ImageToTextTask` + +## Request + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "ImageToTextTask", + "body": "" + } +} +``` + +## Implementation notes + +The image solver is implemented in `src/services/recognition.py` and is inspired by Argus-style structured multimodal annotation. + +Current behavior: + +- image input is resized to **1440×900** +- the model is prompted to classify the captcha into structured action types +- the normalized coordinate space starts at `(0, 0)` in the top-left corner + +Supported response styles in the prompt: + +- `click` +- `slide` +- `drag_match` + +## Result shape + +The current API returns the structured model output serialized as a string in `solution.text`. + +Example: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "text": "{\"captcha_type\":\"slide\",\"drag_distance\":270}" + } +} +``` + +## Backend compatibility + +The multimodal path is designed for **OpenAI-compatible** APIs. This makes it suitable for hosted or self-hosted backends as long as they expose compatible image-capable chat completion behavior. + +Accuracy depends heavily on the selected model and provider implementation. diff --git a/docs/usage/recaptcha-v2.md b/docs/usage/recaptcha-v2.md new file mode 100644 index 0000000000000000000000000000000000000000..44b790115cd3126cf3a836221dd6dc1fd62815b4 --- /dev/null +++ b/docs/usage/recaptcha-v2.md @@ -0,0 +1,126 @@ +# reCAPTCHA v2 Usage + +reCAPTCHA v2 presents users with an "I'm not a robot" checkbox. The solver visits the target page with a real Chromium browser, clicks the checkbox, and extracts the resulting `gRecaptchaResponse` token. + +## Supported task types + +| Task type | Description | +|-----------|-------------| +| `NoCaptchaTaskProxyless` | Standard reCAPTCHA v2 checkbox | +| `RecaptchaV2TaskProxyless` | Same as above, alternate naming | +| `RecaptchaV2EnterpriseTaskProxyless` | reCAPTCHA v2 Enterprise variant | + +## Required fields + +| Field | Type | Description | +|-------|------|-------------| +| `websiteURL` | string | Full URL of the page containing the captcha | +| `websiteKey` | string | The `data-sitekey` value from the page's HTML | +| `isInvisible` | bool | Optional. Set `true` for invisible reCAPTCHA | + +## Test target + +The official Google demo page is suitable for acceptance validation: + +- **URL:** `https://www.google.com/recaptcha/api2/demo` +- **Site key:** `6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-` + +## Create a task + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "NoCaptchaTaskProxyless", + "websiteURL": "https://www.google.com/recaptcha/api2/demo", + "websiteKey": "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-" + } + }' +``` + +Response: + +```json +{ + "errorId": 0, + "taskId": "uuid-string" +} +``` + +## Poll for result + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +When ready, you receive: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "03AGdBq24..." + } +} +``` + +## Invisible reCAPTCHA + +For pages using invisible reCAPTCHA (no visible checkbox), add `"isInvisible": true`. The solver will call `grecaptcha.execute()` directly instead of clicking the checkbox: + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "NoCaptchaTaskProxyless", + "websiteURL": "https://www.google.com/recaptcha/api2/demo", + "websiteKey": "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-", + "isInvisible": true + } + }' +``` + +## Acceptance status + +| Target | Status | Notes | +|--------|--------|-------| +| `https://www.google.com/recaptcha/api2/demo` | ⚠️ Audio challenge path | Google detects headless browsers | + +### Headless browser detection + +Google's risk analysis engine reliably detects headless Chromium and presents a visual image challenge rather than issuing a token directly. The solver implements an audio challenge fallback: + +1. Click the checkbox — Google presents a challenge dialog. +2. Click the audio button in the challenge dialog. +3. Download the MP3 audio file. +4. Transcribe via the configured `CAPTCHA_MODEL` endpoint. +5. Submit the transcript to receive the token. + +!!! warning "Audio transcription requirement" + The audio challenge path requires a model endpoint capable of processing audio. The standard `CAPTCHA_MODEL` must support audio/speech input. Accuracy and availability depend on the configured endpoint. + +### Recommended integration path + +For reliable reCAPTCHA v2 solving in production, consider using the **classification task** approach: + +1. Extract the challenge image grid from the page using Playwright. +2. Send the grid image to `ReCaptchaV2Classification` with the challenge question. +3. Use the returned cell indices to programmatically click the matching tiles. + +See [Image Classification](classification.md) for details. + +## Operational notes + +- Token validity is approximately 120 seconds; submit promptly. +- The `RecaptchaV2EnterpriseTaskProxyless` type uses the same browser path. +- On less aggressive sites (not Google's own demo), the checkbox click may succeed without triggering a challenge. diff --git a/docs/usage/recaptcha-v3.md b/docs/usage/recaptcha-v3.md new file mode 100644 index 0000000000000000000000000000000000000000..7ea177e4a296708d74f0e22a34d5180b77347ed4 --- /dev/null +++ b/docs/usage/recaptcha-v3.md @@ -0,0 +1,53 @@ +# reCAPTCHA v3 Usage + +## Target used for acceptance + +This repository was validated against: + +- URL: `https://antcpt.com/score_detector/` +- site key: `6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +## Create a task + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## Poll for result + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +When the task is ready, you should receive `solution.gRecaptchaResponse`. + +## Acceptance result for this codebase + +A local acceptance run against the public detector target successfully: + +- started the service +- created a task +- reached `ready` +- returned a non-empty token + +## Operational caveats + +- A returned token does not imply guaranteed score targeting. +- Site behavior may vary over time. +- IP quality and browser environment can affect outcomes. +- The repository currently uses the same internal solver path for all registered reCAPTCHA v3 task variants. diff --git a/docs/usage/turnstile.md b/docs/usage/turnstile.md new file mode 100644 index 0000000000000000000000000000000000000000..5b7006f34a8eac740a5d8a859bca2a36205522b0 --- /dev/null +++ b/docs/usage/turnstile.md @@ -0,0 +1,108 @@ +# Cloudflare Turnstile Usage + +Cloudflare Turnstile is an invisible or widget-based CAPTCHA alternative. The solver visits the target page with Chromium, interacts with the Turnstile widget, and extracts the resulting token from a hidden `cf-turnstile-response` input field. + +## Supported task types + +| Task type | Description | +|-----------|-------------| +| `TurnstileTaskProxyless` | Standard Turnstile solving | +| `TurnstileTaskProxylessM1` | Same path, alternate tier naming | + +## Required fields + +| Field | Type | Description | +|-------|------|-------------| +| `websiteURL` | string | Full URL of the page containing the Turnstile widget | +| `websiteKey` | string | The Turnstile `data-sitekey` value | + +## Solution field + +Unlike reCAPTCHA tasks, the result is returned in `solution.token` (not `solution.gRecaptchaResponse`): + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "token": "0.ufq5RgSV..." + } +} +``` + +## Test targets + +Cloudflare provides official dummy site keys for testing: + +| Site key | Behavior | URL | +|----------|----------|-----| +| `1x00000000000000000000AA` | Always passes | Any domain | +| `2x00000000000000000000AB` | Always fails | Any domain | +| `3x00000000000000000000FF` | Forces interactive challenge | Any domain | + +The React Turnstile demo is a good live test target: + +- **URL:** `https://react-turnstile.vercel.app/basic` +- **Site key:** `1x00000000000000000000AA` (test key, always passes) + +## Create a task + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://react-turnstile.vercel.app/basic", + "websiteKey": "1x00000000000000000000AA" + } + }' +``` + +Response: + +```json +{ + "errorId": 0, + "taskId": "uuid-string" +} +``` + +## Poll for result + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +When ready: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "token": "XXXX.DUMMY.TOKEN.XXXX" + } +} +``` + +!!! info "Dummy token" + Cloudflare test keys (`1x00000000000000000000AA`) return the dummy token `XXXX.DUMMY.TOKEN.XXXX`. This is the expected and correct behavior for test sitekeys — the token is accepted by Cloudflare's test infrastructure. + +## Acceptance status + +| Target | Site key | Status | +|--------|----------|--------| +| `https://react-turnstile.vercel.app/basic` | `1x00000000000000000000AA` | ✅ Dummy token returned | + +## Operational notes + +- Turnstile auto-solves most of the time without user interaction; the solver polls for the token after page load. +- Real production sitekeys will return a real token (not the dummy token). +- The `TurnstileTaskProxylessM1` type uses the same implementation path. diff --git a/docs/zh/acceptance.md b/docs/zh/acceptance.md new file mode 100644 index 0000000000000000000000000000000000000000..220649565fe158fb90b423fb3231f7546834ae63 --- /dev/null +++ b/docs/zh/acceptance.md @@ -0,0 +1,84 @@ +# 验收测试 + +本页记录各支持验证码类型的验收目标、测试 URL、site key,以及本地验证运行的观察结果。 + +## 总览 + +| 验证码类型 | 目标 | 状态 | +|----------|------|------| +| reCAPTCHA v3 | `https://antcpt.com/score_detector/` | ✅ 已返回令牌 | +| Cloudflare Turnstile | `https://react-turnstile.vercel.app/basic` | ✅ 已返回 Dummy 令牌 | +| reCAPTCHA v2 | `https://www.google.com/recaptcha/api2/demo` | ⚠️ 需要音频挑战(见说明) | +| hCaptcha | `https://accounts.hcaptcha.com/demo` | ⚠️ 依赖挑战类型 | +| Image-to-Text | 本地 base64 图片 | ✅ 视觉模型返回文本 | +| 分类任务 | 本地 base64 网格 | ✅ 视觉模型返回对象索引 | + +--- + +## reCAPTCHA v3 — 主要验收目标 + +**URL:** `https://antcpt.com/score_detector/` +**Site key:** `6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +### 验收步骤 + +1. 安装依赖和 Playwright Chromium。 +2. 启动服务:`python main.py` +3. 确认 `GET /api/v1/health` 返回全部 19 种任务类型。 +4. 创建 `RecaptchaV3TaskProxyless` 任务。 +5. 轮询 `POST /getTaskResult` 直至 `status=ready`。 +6. 确认返回非空的 `solution.gRecaptchaResponse`。 + +### 已验证结果 + +- 服务启动:✅ +- 健康检查端点:✅(19 种类型已注册) +- 任务创建:✅ +- 令牌返回:✅(非空 `gRecaptchaResponse`,长度约 1060 字符) + +--- + +## Cloudflare Turnstile + +**URL:** `https://react-turnstile.vercel.app/basic` +**Site key:** `1x00000000000000000000AA`(Cloudflare 官方测试密钥——始终通过) + +### 已验证结果 + +- 令牌返回:✅ `XXXX.DUMMY.TOKEN.XXXX`(Cloudflare 测试密钥的预期行为) + +--- + +## reCAPTCHA v2 + +**URL:** `https://www.google.com/recaptcha/api2/demo` +**Site key:** `6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-` + +### 无头浏览器行为 + +Google 风险分析引擎会检测无头浏览器。复选框点击成功,但会弹出图像挑战,而不是直接签发令牌。 + +**已实现的缓解措施:** 求解器回退到**音频挑战路径**——在挑战对话框中点击音频按钮,下载 MP3,通过配置的模型转录,并提交转录文本。 + +### 状态 + +⚠️ 已实现音频挑战回退。成功率取决于模型的音频处理能力和 Google 当前的挑战难度。 + +--- + +## hCaptcha + +**URL:** `https://accounts.hcaptcha.com/demo` +**Site key:** `10000000-ffff-ffff-ffff-000000000001`(hCaptcha 官方测试密钥) + +### 状态 + +⚠️ 复选框点击成功。令牌签发取决于 hCaptcha 的机器人检测评分。对于测试环境,推荐使用 [HCaptchaClassification](../usage/classification.md) 任务类型(直接图像分类)作为集成方案。 + +--- + +## 总结 + +- ✅ **reCAPTCHA v3** 和 **Turnstile** 完全可用,每次本地测试均通过。 +- ⚠️ **reCAPTCHA v2** 和 **hCaptcha** 浏览器自动化求解受无头浏览器检测限制。这些类型主要通过 `HCaptchaClassification` / `ReCaptchaV2Classification` 分类任务进行图像网格求解集成。 +- 本服务设计为 **flow2api 的后端打码工具**——实际集成中,通常提取图像挑战帧并发送到分类端点,而非完全依赖浏览器自动化通过组件。 diff --git a/docs/zh/api-reference.md b/docs/zh/api-reference.md new file mode 100644 index 0000000000000000000000000000000000000000..f2a094ab2936e5e85e6c3257802f41326a8edc27 --- /dev/null +++ b/docs/zh/api-reference.md @@ -0,0 +1,180 @@ +# API 参考 + +## 接口列表 + +- `POST /createTask` +- `POST /getTaskResult` +- `POST /getBalance` +- `GET /api/v1/health` +- `GET /` + +所有任务接口都基于 JSON,并遵循 YesCaptcha 风格的异步任务模式。 + +## `POST /createTask` + +### 请求结构 + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } +} +``` + +### 支持的任务类型 + +#### reCAPTCHA v3 + +- `RecaptchaV3TaskProxyless` +- `RecaptchaV3TaskProxylessM1` +- `RecaptchaV3TaskProxylessM1S7` +- `RecaptchaV3TaskProxylessM1S9` + +必填字段: + +- `websiteURL` +- `websiteKey` +- 推荐传入 `pageAction`,该字段会透传给 `grecaptcha.execute()` + +#### 图片识别 + +- `ImageToTextTask` + +必填字段: + +- `body` — base64 编码后的图片 + +### `minScore` 兼容性说明 + +请求模型接受 `minScore` 字段用于兼容,但当前 solver **不会**根据该字段做分数控制。 + +### 成功响应 + +```json +{ + "errorId": 0, + "taskId": "uuid-string" +} +``` + +### 常见错误响应 + +```json +{ + "errorId": 1, + "errorCode": "ERROR_TASK_NOT_SUPPORTED", + "errorDescription": "Task type 'X' is not supported." +} +``` + +```json +{ + "errorId": 1, + "errorCode": "ERROR_TASK_PROPERTY_EMPTY", + "errorDescription": "websiteURL and websiteKey are required" +} +``` + +## `POST /getTaskResult` + +### 请求 + +```json +{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" +} +``` + +### 处理中响应 + +```json +{ + "errorId": 0, + "status": "processing" +} +``` + +### reCAPTCHA v3 完成响应 + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "token..." + } +} +``` + +### `ImageToTextTask` 完成响应 + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "text": "{\"captcha_type\":\"click\", ...}" + } +} +``` + +### 未找到任务响应 + +```json +{ + "errorId": 1, + "errorCode": "ERROR_NO_SUCH_CAPCHA_ID", + "errorDescription": "Task not found" +} +``` + +## `POST /getBalance` + +### 请求 + +```json +{ + "clientKey": "your-client-key" +} +``` + +### 响应 + +```json +{ + "errorId": 0, + "balance": 99999.0 +} +``` + +当前余额为静态兼容性响应。 + +## `GET /api/v1/health` + +示例响应: + +```json +{ + "status": "ok", + "supported_task_types": [ + "RecaptchaV3TaskProxyless", + "RecaptchaV3TaskProxylessM1", + "RecaptchaV3TaskProxylessM1S7", + "RecaptchaV3TaskProxylessM1S9", + "ImageToTextTask" + ], + "browser_headless": true, + "captcha_model": "gpt-5.4", + "captcha_multimodal_model": "qwen3.5-2b" +} +``` + +## `GET /` + +根接口会返回服务简述以及运行时已注册的任务类型。 diff --git a/docs/zh/deployment/huggingface.md b/docs/zh/deployment/huggingface.md new file mode 100644 index 0000000000000000000000000000000000000000..763004ff8548076b74e37d4bab5fa1aac8b277db --- /dev/null +++ b/docs/zh/deployment/huggingface.md @@ -0,0 +1,116 @@ +# Hugging Face Spaces 部署 + +本指南说明如何使用 **Hugging Face Spaces** 的 Docker Space 来部署 OhMyCaptcha。 + +## 什么时候适合用 Hugging Face Spaces + +当你有以下需求时,Hugging Face Spaces 会比较合适: + +- 希望快速部署一个公开或私有演示环境 +- 希望使用图形界面的托管流程 +- 希望在 Space 设置里直接管理密钥 +- 不想自己维护 VPS,但又需要 Docker 部署环境 + +## 1. 准备仓库 + +请确认仓库中已经包含: + +- `Dockerfile.render` +- `main.py` +- `requirements.txt` +- `src/` 应用目录 + +对于 Hugging Face Spaces,当前 Dockerfile 可以直接作为起点,因为它已经包含 Python 依赖安装和 Playwright Chromium 安装步骤。 + +## 2. 创建 Docker Space + +在 Hugging Face 中: + +1. 创建新的 **Space**。 +2. SDK 选择 **Docker**。 +3. 根据需要选择公开或私有。 +4. 将 Space 连接到本仓库,或上传项目文件。 + +## 3. 配置密钥和变量 + +在 Space 设置中添加以下 secrets: + +- `CLIENT_KEY` +- `CAPTCHA_API_KEY` + +按需添加或覆盖变量: + +- `CAPTCHA_BASE_URL` +- `CAPTCHA_MODEL` +- `CAPTCHA_MULTIMODAL_MODEL` +- `BROWSER_HEADLESS=true` +- `BROWSER_TIMEOUT=30` +- `SERVER_PORT=7860` + +Hugging Face Spaces 通常对外暴露 `7860` 端口,因此建议设置 `SERVER_PORT=7860`。 + +## 4. 确认启动命令 + +容器应通过以下命令启动应用: + +```bash +python main.py +``` + +当前入口已经支持通过环境变量读取端口。 + +## 5. 等待构建完成 + +当 Space 开始构建后: + +- 观察构建日志 +- 确认依赖安装成功 +- 确认 Playwright Chromium 安装成功 +- 等待应用进入运行状态 + +## 6. 验证部署结果 + +当 Space 可访问后,先验证: + +### 根接口 + +```bash +curl https://.hf.space/ +``` + +### 健康检查 + +```bash +curl https://.hf.space/api/v1/health +``` + +### 创建 detector 任务 + +```bash +curl -X POST https://.hf.space/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## 运行说明 + +- Hugging Face Spaces 部署方便,但冷启动和资源限制会影响 Playwright 这类浏览器自动化负载。 +- 相比纯 API 服务,浏览器自动化对共享托管环境更敏感。 +- 如果你需要更强的运行时控制,建议使用 Render 或自有基础设施。 + +## 推荐用途 + +Hugging Face Spaces 更适合: + +- 验证 +- 演示 +- 低流量内部使用 +- 作为文档中可直接访问的公开部署示例 diff --git a/docs/zh/deployment/local-model.md b/docs/zh/deployment/local-model.md new file mode 100644 index 0000000000000000000000000000000000000000..9b0d70da7f9d039af278be9fffce088908448a50 --- /dev/null +++ b/docs/zh/deployment/local-model.md @@ -0,0 +1,106 @@ +# 本地模型部署 + +OhMyCaptcha 支持使用 [SGLang](https://github.com/sgl-project/sglang)、[vLLM](https://github.com/vllm-project/vllm) 或任何 OpenAI 兼容推理服务在本地部署图像识别和分类模型。 + +本指南以 [Qwen3.5-2B](https://modelscope.cn/models/Qwen/Qwen3.5-2B) + SGLang 为例。 + +## 架构:本地模型 vs 云端模型 + +OhMyCaptcha 使用两种模型后端: + +| 后端 | 角色 | 环境变量 | 默认值 | +|------|------|---------|-------| +| **本地模型** | 图像识别与分类(高吞吐,自托管) | `LOCAL_BASE_URL`, `LOCAL_API_KEY`, `LOCAL_MODEL` | `http://localhost:30000/v1`, `EMPTY`, `Qwen/Qwen3.5-2B` | +| **云端模型** | 音频转录与复杂推理(强大远程 API) | `CLOUD_BASE_URL`, `CLOUD_API_KEY`, `CLOUD_MODEL` | 外部端点, 你的密钥, `gpt-5.4` | + +## 前置要求 + +- Python 3.10+ +- NVIDIA GPU + CUDA(推荐 8GB+ 显存用于 Qwen3.5-2B) + +## 第一步:安装 SGLang + +```bash +pip install "sglang[all]>=0.4.6.post1" +``` + +## 第二步:启动模型服务 + +### 从 ModelScope 下载(国内推荐) + +```bash +export SGLANG_USE_MODELSCOPE=true +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 +``` + +### 从 Hugging Face 下载 + +```bash +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 +``` + +### 多 GPU 部署 + +```bash +python -m sglang.launch_server \ + --model-path Qwen/Qwen3.5-2B \ + --host 0.0.0.0 \ + --port 30000 \ + --tensor-parallel-size 2 +``` + +启动后,服务在 `http://localhost:30000/v1` 提供 OpenAI 兼容 API。 + +## 第三步:验证模型服务 + +```bash +curl http://localhost:30000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen/Qwen3.5-2B", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 32 + }' +``` + +## 第四步:配置 OhMyCaptcha + +```bash +# 本地模型(SGLang 自托管) +export LOCAL_BASE_URL="http://localhost:30000/v1" +export LOCAL_API_KEY="EMPTY" +export LOCAL_MODEL="Qwen/Qwen3.5-2B" + +# 云端模型(远程 API,用于音频转录等) +export CLOUD_BASE_URL="https://your-api-endpoint/v1" +export CLOUD_API_KEY="sk-your-key" +export CLOUD_MODEL="gpt-5.4" + +# 其他配置 +export CLIENT_KEY="your-client-key" +export BROWSER_HEADLESS=true +``` + +## 第五步:启动 OhMyCaptcha + +```bash +python main.py +``` + +## 向后兼容 + +旧版环境变量(`CAPTCHA_BASE_URL`、`CAPTCHA_API_KEY`、`CAPTCHA_MODEL`、`CAPTCHA_MULTIMODAL_MODEL`)仍然支持。新的 `LOCAL_*` 和 `CLOUD_*` 变量优先生效。 + +## 推荐模型 + +| 模型 | 大小 | 用途 | 显存 | +|------|------|------|------| +| `Qwen/Qwen3.5-2B` | 2B | 图像识别与分类 | ~5 GB | +| `Qwen/Qwen3.5-7B` | 7B | 更高精度分类 | ~15 GB | +| `Qwen/Qwen3.5-2B-FP8` | 2B(量化) | 低显存需求 | ~3 GB | diff --git a/docs/zh/deployment/render.md b/docs/zh/deployment/render.md new file mode 100644 index 0000000000000000000000000000000000000000..fef1d7837ef4c8d0a2468a3c0ce2fdec1018f0a7 --- /dev/null +++ b/docs/zh/deployment/render.md @@ -0,0 +1,112 @@ +# Render 部署 + +本指南说明如何使用仓库中已经提供的 Docker 文件,把 OhMyCaptcha 部署到 **Render**。 + +## 什么时候适合用 Render + +当你有以下需求时,Render 是一个很合适的选择: + +- 希望获得稳定的公网地址 +- 希望方便地管理密钥和环境变量 +- 希望使用简单的 Docker 部署流程 +- 希望比演示型托管平台拥有更稳定的运行环境 + +## 1. 准备仓库 + +当前仓库已经包含 Render 所需文件: + +- `Dockerfile.render` +- `render.yaml` +- `main.py` +- `requirements.txt` +- `src/` + +应用默认监听 `8000` 端口,同时也兼容 Render 注入的 `PORT` 环境变量。 + +## 2. 创建 Render 服务 + +在 Render 中: + +1. 创建新的 **Web Service**。 +2. 连接你的 GitHub 仓库。 +3. Runtime 选择 **Docker**。 +4. 指向以下配置: + - Dockerfile:`Dockerfile.render` + - Context:仓库根目录 + +你也可以直接导入仓库中的 `render.yaml` blueprint。 + +## 3. 配置环境变量 + +### 必需密钥 + +请在 Render 控制台中配置以下受保护变量: + +- `CLIENT_KEY` +- `CAPTCHA_API_KEY` + +### 建议变量 + +- `CAPTCHA_BASE_URL=https://your-openai-compatible-endpoint/v1` +- `CAPTCHA_MODEL=gpt-5.4` +- `CAPTCHA_MULTIMODAL_MODEL=qwen3.5-2b` +- `CAPTCHA_RETRIES=3` +- `CAPTCHA_TIMEOUT=30` +- `BROWSER_HEADLESS=true` +- `BROWSER_TIMEOUT=30` + +## 4. 触发首次部署 + +保存配置后: + +- 等待镜像构建完成 +- 确认 Python 依赖安装成功 +- 确认 Playwright Chromium 安装成功 +- 等待服务进入 healthy 状态 + +## 5. 验证部署结果 + +当 Render 提供 URL 后,先检查: + +### 根接口 + +```bash +curl https://.onrender.com/ +``` + +### 健康检查 + +```bash +curl https://.onrender.com/api/v1/health +``` + +### 创建 detector 任务 + +```bash +curl -X POST https://.onrender.com/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## 运行说明 + +- 相比轻量演示型托管平台,Render 更适合浏览器自动化类服务。 +- 但浏览器求解依然会受到冷启动、IP 质量和容器资源限制的影响。 +- 如果你需要更强的运行时控制,建议迁移到自有基础设施。 + +## 推荐用途 + +Render 很适合作为以下场景的默认部署方案: + +- 持续在线的公网服务 +- flow2api 联调 +- 低到中等流量的生产环境 +- 不想自己维护服务器时的快速上线 diff --git a/docs/zh/faq.md b/docs/zh/faq.md new file mode 100644 index 0000000000000000000000000000000000000000..1459b3d464ee435ad1343e6f38482246cb5b4deb --- /dev/null +++ b/docs/zh/faq.md @@ -0,0 +1,25 @@ +# 常见问题 + +## 这能完全替代 YesCaptcha 吗? + +不能。它实现的是本仓库所支持任务类型对应的 YesCaptcha 风格 API,而不是对所有商业平台功能的完整覆盖。 + +## `minScore` 能保证目标 reCAPTCHA 分数吗? + +不能。请求模型里保留了 `minScore` 字段用于兼容,但当前 solver 不会根据它做分数控制。 + +## 可以使用本地或自托管多模态模型吗? + +可以,前提是它们提供支持图像输入的 OpenAI-compatible API。 + +## `ImageToTextTask` 返回的是纯 OCR 文本吗? + +不一定。当前实现会把结构化识别结果序列化后放入 `solution.text`。 + +## 任务状态会持久化吗? + +不会。任务状态保存在内存中,并会在 TTL 到期后清理。 + +## 哪些因素会影响 reCAPTCHA v3 结果? + +常见因素包括 IP 质量、浏览器指纹、目标站行为、`pageAction` 是否正确,以及运行环境本身。 diff --git a/docs/zh/getting-started.md b/docs/zh/getting-started.md new file mode 100644 index 0000000000000000000000000000000000000000..6223477bbe66915aa7be83d79f08a2d5dc157ce0 --- /dev/null +++ b/docs/zh/getting-started.md @@ -0,0 +1,67 @@ +# 快速开始 + +## 环境要求 + +- Python 3.10+ +- 通过 Playwright 安装 Chromium +- 具备访问以下资源的网络能力: + - 目标网站 + - 你配置的 OpenAI-compatible 模型接口 + +## 安装 + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +playwright install --with-deps chromium +``` + +## 环境变量 + +| 变量 | 说明 | 默认值 | +| --- | --- | --- | +| `CLIENT_KEY` | 客户端鉴权密钥 | 未设置 | +| `CAPTCHA_BASE_URL` | OpenAI-compatible API 地址 | `https://your-openai-compatible-endpoint/v1` | +| `CAPTCHA_API_KEY` | 模型接口密钥 | 未设置 | +| `CAPTCHA_MODEL` | 强文本模型 | `gpt-5.4` | +| `CAPTCHA_MULTIMODAL_MODEL` | 多模态模型 | `qwen3.5-2b` | +| `CAPTCHA_RETRIES` | 重试次数 | `3` | +| `CAPTCHA_TIMEOUT` | 模型超时(秒) | `30` | +| `BROWSER_HEADLESS` | 是否无头运行 Chromium | `true` | +| `BROWSER_TIMEOUT` | 浏览器超时(秒) | `30` | +| `SERVER_HOST` | 监听地址 | `0.0.0.0` | +| `SERVER_PORT` | 监听端口 | `8000` | + +## 启动服务 + +```bash +export CLIENT_KEY="your-client-key" +export CAPTCHA_BASE_URL="https://your-openai-compatible-endpoint/v1" +export CAPTCHA_API_KEY="your-api-key" +export CAPTCHA_MODEL="gpt-5.4" +export CAPTCHA_MULTIMODAL_MODEL="qwen3.5-2b" +python main.py +``` + +## 验证启动 + +### 根接口 + +```bash +curl http://localhost:8000/ +``` + +### 健康检查 + +```bash +curl http://localhost:8000/api/v1/health +``` + +健康检查响应中应包含已注册任务类型以及当前运行时模型配置。 + +## 本地 / 自托管模型支持 + +图片识别路径基于 **OpenAI-compatible API** 设计。因此,只要你的后端具备兼容的 chat-completions 语义并支持图像输入,就可以把 `CAPTCHA_BASE_URL` 指向托管服务、内部网关或本地/自托管多模态网关。 + +文档采用通用兼容性表述,而不是对每一种模型服务栈做完整验证承诺。 diff --git a/docs/zh/index.md b/docs/zh/index.md new file mode 100644 index 0000000000000000000000000000000000000000..61d40c3b1a9bacbe7e8dfa19f7167afd1fff8283 --- /dev/null +++ b/docs/zh/index.md @@ -0,0 +1,126 @@ +# OhMyCaptcha + +
+ +
+ OhMyCaptcha — 自托管验证码求解服务 +
+ +
+ +## ⚡ 面向自托管场景的 YesCaptcha 风格验证码服务 + +OhMyCaptcha 将 **FastAPI**、**Playwright** 与 **OpenAI-compatible 多模态模型** 组合为一个全面的验证码求解服务,支持 **19 种任务类型**,适用于 **flow2api** 与类似集成场景。 + +
+ +[快速开始](getting-started.md){ .md-button .md-button--primary } +[API 参考](api-reference.md){ .md-button } +[GitHub](https://github.com/shenhao-stu/ohmycaptcha){ .md-button } + +
+ +
+ +
+ +## ✨ 项目亮点 + +
+ +- :material-api: **YesCaptcha 风格 API** + + --- + + 覆盖 reCAPTCHA v2/v3、hCaptcha、Turnstile 和图像分类的异步 `createTask` / `getTaskResult` 语义。 + +- :material-google-chrome: **浏览器自动化求解** + + --- + + Playwright + Chromium 为 reCAPTCHA v2/v3、hCaptcha 和 Cloudflare Turnstile 生成令牌。 + +- :material-image-search: **多模态图片识别** + + --- + + 通过 OpenAI-compatible 视觉模型进行 HCaptcha、reCAPTCHA、FunCaptcha、AWS 图像分类。 + +- :material-cloud-outline: **自托管部署** + + --- + + 支持本地运行,配合 Render 和 Hugging Face Spaces 指南完成生产部署。 + +
+ +## 🧠 支持的任务类型 + +### 浏览器自动化求解(12 种) + +| 分类 | 任务类型 | +|------|---------| +| **reCAPTCHA v3** | `RecaptchaV3TaskProxyless`, `RecaptchaV3TaskProxylessM1`, `RecaptchaV3TaskProxylessM1S7`, `RecaptchaV3TaskProxylessM1S9` | +| **reCAPTCHA v3 企业版** | `RecaptchaV3EnterpriseTask`, `RecaptchaV3EnterpriseTaskM1` | +| **reCAPTCHA v2** | `NoCaptchaTaskProxyless`, `RecaptchaV2TaskProxyless`, `RecaptchaV2EnterpriseTaskProxyless` | +| **hCaptcha** | `HCaptchaTaskProxyless` | +| **Cloudflare Turnstile** | `TurnstileTaskProxyless`, `TurnstileTaskProxylessM1` | + +### 图片识别(3 种) + +| 任务类型 | 说明 | +|---------|------| +| `ImageToTextTask` | 受 Argus 启发的多模态识别 | +| `ImageToTextTaskMuggle` | 文本/字母数字识别 | +| `ImageToTextTaskM1` | 异步图片文本识别 | + +### 图像分类(4 种) + +| 任务类型 | 说明 | +|---------|------| +| `HCaptchaClassification` | hCaptcha 网格图像分类 | +| `ReCaptchaV2Classification` | reCAPTCHA v2 网格选择 | +| `FunCaptchaClassification` | FunCaptcha 图像选择 | +| `AwsClassification` | AWS 验证码图像分类 | + +## 🚀 快速入口 + +
+ +- :material-rocket-launch-outline: **快速开始** + + --- + + 安装依赖、配置环境变量,并在本地启动服务。 + + [打开快速开始](getting-started.md) + +- :material-file-document-outline: **API 参考** + + --- + + 查看全部 19 种任务类型、接口和请求格式。 + + [打开 API 参考](api-reference.md) + +- :material-play-box-outline: **验收说明** + + --- + + 验证 detector 目标流程,并确认 token 返回行为。 + + [打开验收指南](acceptance.md) + +- :material-server-outline: **部署指南** + + --- + + 按 Render 或 Hugging Face Spaces 路径部署你的服务实例。 + + [打开部署指南](deployment/render.md) + +
+ +## 📌 范围说明 + +OhMyCaptcha 实现了**覆盖 19 种任务类型的 YesCaptcha 风格 API**,涵盖 reCAPTCHA v2/v3、hCaptcha、Cloudflare Turnstile 和图像分类。浏览器自动化任务依赖 Playwright,可能需要针对特定目标站点调优。图像分类利用多模态视觉模型,准确性取决于模型质量。 diff --git a/docs/zh/positioning.md b/docs/zh/positioning.md new file mode 100644 index 0000000000000000000000000000000000000000..73703387ee3e278a2771fc6805647d80db8118db --- /dev/null +++ b/docs/zh/positioning.md @@ -0,0 +1,46 @@ +# 项目定位 + +## OhMyCaptcha 是什么 + +OhMyCaptcha 是一个可自托管的验证码解决服务,为本仓库已经实现的任务类型提供 YesCaptcha 风格的 API。 + +它适合以下场景: + +- 希望自托管验证码服务 +- 希望兼容 `createTask` / `getTaskResult` 风格工作流 +- 希望可控地管理浏览器自动化和模型后端 +- 希望支持 OpenAI-compatible 多模态提供方,包括本地或自托管网关 + +## 与 YesCaptcha 等托管服务的对比 + +YesCaptcha 等托管服务通常提供: + +- 托管平台 +- 更广泛的任务类型目录 +- 商业 SLA 与供应商托管基础设施 + +OhMyCaptcha 更关注: + +- 自托管 +- 实现透明 +- 提示词和浏览器逻辑可自定义 +- 多模态模型后端灵活可替换 + +## 范围边界 + +本仓库不应被描述为对商业平台所有验证码家族与任务类型的完整替代。 + +更准确的表述是: + +> 一个针对已实现任务类型提供 YesCaptcha 风格 API 的自托管服务,可集成到 flow2api 等系统中。 + +## 本地 / 自托管模型支持 + +项目在多模态识别路径上采用 OpenAI-compatible API,因此可以连接: + +- 托管服务 +- 内部网关 +- 自托管多模态服务 +- 语义兼容的本地模型服务栈 + +文档刻意保持通用兼容性表述。实际兼容性仍取决于后端是否支持图像输入,以及 chat-completions 行为是否足够兼容。 diff --git a/docs/zh/skill.md b/docs/zh/skill.md new file mode 100644 index 0000000000000000000000000000000000000000..ae227f6e54483f6c3e0053ff26b60951bcde171c --- /dev/null +++ b/docs/zh/skill.md @@ -0,0 +1,58 @@ +# Agent Skill + +OhMyCaptcha 在 `skills/` 目录下附带了可复用的 skills。 + +## 可用 skills + +- `skills/ohmycaptcha/` — 用于部署、验证、集成和运维服务 +- `skills/ohmycaptcha-image/` — 用于生成 README、文档和发布素材所需的公开安全图片 + +## For humans + +如果你的工具支持直接读取本地 skill 目录,可以把下面这些目录复制到你的本地 skills 目录中: + +```text +skills/ohmycaptcha/ +skills/ohmycaptcha-image/ +``` + +如果你的工具会缓存 skill 元信息,请复制后重启。 + +## Let an LLM do it + +你也可以把下面这段话直接贴给支持工具调用的 LLM agent: + +```text +Install the OhMyCaptcha skills from this repository and make them available in my local skills directory. Then show me how to use the operational skill for deployment and the image skill for generating README or docs visuals. +``` + +## 运维 skill 的作用 + +`ohmycaptcha` skill 主要覆盖: + +- 本地启动 +- 环境变量配置 +- YesCaptcha 风格 API 使用 +- flow2api 集成 +- Render 部署 +- Hugging Face Spaces 部署 +- 任务验收与排障 + +## 图片 skill 的作用 + +`ohmycaptcha-image` skill 主要覆盖: + +- README Hero 图 prompt +- 文档插图 +- 面向公开仓库的安全技术视觉素材 +- 架构风格图片 +- 面向 agent 工作流的可复用图像生成 prompt + +## 设计保证 + +这些 skill 的设计目标包括: + +- 只使用占位符密钥 +- 与当前已实现任务类型保持一致 +- 明确说明当前限制 +- 避免嵌入 secrets、私有接口地址或客户数据 diff --git a/docs/zh/usage/classification.md b/docs/zh/usage/classification.md new file mode 100644 index 0000000000000000000000000000000000000000..5d5f82b86b3ee84af6193f1a4e86e9a1b2586df9 --- /dev/null +++ b/docs/zh/usage/classification.md @@ -0,0 +1,70 @@ +# 图像分类使用指南 + +图像分类任务将一张或多张验证码图片发送给 OpenAI-compatible 视觉模型,返回匹配格子的索引或布尔值答案。无需浏览器自动化,这些均为纯视觉模型 API 调用。 + +## 支持的任务类型 + +| 任务类型 | 说明 | +|---------|------| +| `HCaptchaClassification` | hCaptcha 3×3 网格——返回匹配格子索引 | +| `ReCaptchaV2Classification` | reCAPTCHA v2 3×3/4×4 网格——返回匹配格子索引 | +| `FunCaptchaClassification` | FunCaptcha 2×3 网格——返回正确格子索引 | +| `AwsClassification` | AWS 验证码图像选择 | + +## 返回字段 + +| 任务类型 | 返回字段 | 示例 | +|---------|---------|------| +| `HCaptchaClassification` | `objects` 或 `answer` | `[0, 2, 5]` 或 `true` | +| `ReCaptchaV2Classification` | `objects` | `[0, 3, 6]` | +| `FunCaptchaClassification` | `objects` | `[4]` | +| `AwsClassification` | `objects` | `[1]` | + +## HCaptchaClassification 示例 + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaClassification", + "queries": ["", "", ""], + "question": "Please click each image containing a bicycle" + } +} +``` + +`queries` 字段接受 base64 编码的图片列表(每个格子一张)。 + +## ReCaptchaV2Classification 示例 + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "ReCaptchaV2Classification", + "image": "", + "question": "Select all images with traffic lights" + } +} +``` + +`image` 字段为完整的 reCAPTCHA 网格图片(3×3 = 9 格,或 4×4 = 16 格)。格子编号从 0 开始,从左到右、从上到下。 + +## FunCaptchaClassification 示例 + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "FunCaptchaClassification", + "image": "", + "question": "Pick the image that shows a boat facing left" + } +} +``` + +## 注意事项 + +- 所有分类任务均通过 `CAPTCHA_MULTIMODAL_MODEL`(默认:`qwen3.5-2b`)指定的视觉模型处理。 +- 模型准确性取决于所配置的视觉模型质量。 +- 图片无需预先缩放,求解器内部会处理归一化。 diff --git a/docs/zh/usage/hcaptcha.md b/docs/zh/usage/hcaptcha.md new file mode 100644 index 0000000000000000000000000000000000000000..2fb6375ed6e83e6fadc93c574fa12127b37617d1 --- /dev/null +++ b/docs/zh/usage/hcaptcha.md @@ -0,0 +1,74 @@ +# hCaptcha 使用指南 + +hCaptcha 通过 iframe 组件展示验证码挑战。求解器使用 Playwright 控制的 Chromium 访问目标页面,点击 hCaptcha 复选框,等待挑战完成,提取响应令牌。 + +## 支持的任务类型 + +| 任务类型 | 说明 | +|---------|------| +| `HCaptchaTaskProxyless` | 基于浏览器的 hCaptcha 求解 | + +## 必填字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `websiteURL` | string | 包含验证码的页面完整 URL | +| `websiteKey` | string | 页面 HTML 中的 `data-sitekey` 值 | + +## 测试目标 + +hCaptcha 提供官方测试密钥: + +| URL | Site key | 行为 | +|-----|----------|------| +| `https://accounts.hcaptcha.com/demo` | `10000000-ffff-ffff-ffff-000000000001` | 始终通过(测试密钥) | +| `https://demo.hcaptcha.com/` | `10000000-ffff-ffff-ffff-000000000001` | 始终通过(测试密钥) | + +## 创建任务 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "HCaptchaTaskProxyless", + "websiteURL": "https://accounts.hcaptcha.com/demo", + "websiteKey": "10000000-ffff-ffff-ffff-000000000001" + } + }' +``` + +## 轮询结果 + +就绪时返回: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "P1_eyJ0eXAiOiJKV1QiLC..." + } +} +``` + +!!! note "字段命名" + 令牌以 `solution.gRecaptchaResponse` 返回,保持与 YesCaptcha API 的兼容性。 + +## 验收状态 + +| 目标 | Site key | 状态 | 说明 | +|------|----------|------|------| +| `https://accounts.hcaptcha.com/demo` | `10000000-ffff-ffff-ffff-000000000001` | ⚠️ 依赖挑战类型 | 无头浏览器可能仍会收到图像挑战 | + +### 无头浏览器说明 + +即使使用测试密钥(`10000000-ffff-ffff-ffff-000000000001`),hCaptcha 在检测到无头浏览器时仍可能弹出图像挑战。求解器在点击复选框后最多等待 30 秒以获取令牌。 + +对于无头环境,推荐使用 `HCaptchaClassification` 任务类型求解图像网格挑战,然后注入令牌。 + +## 注意事项 + +- hCaptcha 挑战通常比 reCAPTCHA v2 需要更多时间,求解器点击后最多等待 30 秒。 +- 测试密钥(`10000000-ffff-ffff-ffff-000000000001`)在低风险环境下会立即通过。 diff --git a/docs/zh/usage/image-captcha.md b/docs/zh/usage/image-captcha.md new file mode 100644 index 0000000000000000000000000000000000000000..ff0a5a5e5511d85b67a4131f4bda955d107cc801 --- /dev/null +++ b/docs/zh/usage/image-captcha.md @@ -0,0 +1,55 @@ +# 图片验证码使用指南 + +## 任务类型 + +- `ImageToTextTask` + +## 请求示例 + +```json +{ + "clientKey": "your-client-key", + "task": { + "type": "ImageToTextTask", + "body": "" + } +} +``` + +## 实现说明 + +图片 solver 位于 `src/services/recognition.py`,采用受 Argus 启发的结构化多模态标注思路。 + +当前行为: + +- 输入图片会被缩放到 **1440×900** +- 模型会被提示识别验证码类型并输出结构化结果 +- 归一化坐标空间以左上角 `(0, 0)` 为原点 + +提示词当前支持的结构化类型包括: + +- `click` +- `slide` +- `drag_match` + +## 返回结构 + +当前 API 会把模型输出的结构化 JSON 序列化后放在 `solution.text` 中返回。 + +示例: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "text": "{\"captcha_type\":\"slide\",\"drag_distance\":270}" + } +} +``` + +## 后端兼容性 + +多模态路径面向 **OpenAI-compatible** 接口设计,因此只要后端支持图像输入并具备兼容的 chat completion 行为,就可以接托管或自托管服务。 + +实际准确率会强烈依赖所选模型与供应商实现质量。 diff --git a/docs/zh/usage/recaptcha-v2.md b/docs/zh/usage/recaptcha-v2.md new file mode 100644 index 0000000000000000000000000000000000000000..330b27d2e4c6c0ed8b23397bc0754b5550525260 --- /dev/null +++ b/docs/zh/usage/recaptcha-v2.md @@ -0,0 +1,94 @@ +# reCAPTCHA v2 使用指南 + +reCAPTCHA v2 向用户展示"我不是机器人"复选框。求解器使用真实 Chromium 浏览器访问目标页面,点击复选框,并提取生成的 `gRecaptchaResponse` 令牌。 + +## 支持的任务类型 + +| 任务类型 | 说明 | +|---------|------| +| `NoCaptchaTaskProxyless` | 标准 reCAPTCHA v2 复选框 | +| `RecaptchaV2TaskProxyless` | 同上,备用命名 | +| `RecaptchaV2EnterpriseTaskProxyless` | reCAPTCHA v2 企业版 | + +## 必填字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `websiteURL` | string | 包含验证码的页面完整 URL | +| `websiteKey` | string | 页面 HTML 中的 `data-sitekey` 值 | +| `isInvisible` | bool | 可选。不可见 reCAPTCHA 设为 `true` | + +## 测试目标 + +Google 官方 Demo 页面适合验收测试: + +- **URL:** `https://www.google.com/recaptcha/api2/demo` +- **Site key:** `6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-` + +## 创建任务 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "NoCaptchaTaskProxyless", + "websiteURL": "https://www.google.com/recaptcha/api2/demo", + "websiteKey": "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-" + } + }' +``` + +## 轮询结果 + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +就绪时返回: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "gRecaptchaResponse": "03AGdBq24..." + } +} +``` + +## 验收状态 + +| 目标 | 状态 | 说明 | +|------|------|------| +| `https://www.google.com/recaptcha/api2/demo` | ⚠️ 音频挑战路径 | Google 检测到无头浏览器 | + +### 无头浏览器检测 + +Google 的风险分析引擎可靠地检测到无头 Chromium,会弹出图像挑战而非直接签发令牌。求解器实现了音频挑战回退方案: + +1. 点击复选框——Google 弹出挑战对话框。 +2. 在挑战对话框中点击音频按钮。 +3. 下载 MP3 音频文件。 +4. 通过配置的 `CAPTCHA_MODEL` 端点进行转录。 +5. 提交转录文本以获得令牌。 + +### 推荐的集成方案 + +对于生产环境中可靠的 reCAPTCHA v2 求解,建议使用**分类任务**方案: + +1. 使用 Playwright 从页面提取挑战图像网格。 +2. 将网格图片发送给 `ReCaptchaV2Classification`,附上挑战问题。 +3. 使用返回的格子索引以编程方式点击匹配的格子。 + +## 注意事项 + +- 令牌有效期约 120 秒,请尽快提交。 +- `RecaptchaV2EnterpriseTaskProxyless` 类型使用相同的浏览器路径。 +- 在侵入性较低的网站(非 Google 自己的 Demo),复选框点击可能直接成功而无需挑战。 diff --git a/docs/zh/usage/recaptcha-v3.md b/docs/zh/usage/recaptcha-v3.md new file mode 100644 index 0000000000000000000000000000000000000000..f02830c7122163e704dd2c5c3bbc3919fedec828 --- /dev/null +++ b/docs/zh/usage/recaptcha-v3.md @@ -0,0 +1,53 @@ +# reCAPTCHA v3 使用指南 + +## 验收目标 + +本仓库使用以下目标完成了验证: + +- URL:`https://antcpt.com/score_detector/` +- site key:`6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +## 创建任务 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://antcpt.com/score_detector/", + "websiteKey": "6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf", + "pageAction": "homepage" + } + }' +``` + +## 轮询结果 + +```bash +curl -X POST http://localhost:8000/getTaskResult \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "taskId": "uuid-from-createTask" + }' +``` + +当任务完成时,你会收到 `solution.gRecaptchaResponse`。 + +## 当前代码库的验收结果 + +一次本地验收已经成功完成以下流程: + +- 启动服务 +- 创建任务 +- 轮询到 `ready` +- 返回非空 token + +## 运行注意事项 + +- 返回 token 不代表可以保证指定 score。 +- 目标站行为可能随时间变化。 +- IP 质量与浏览器环境会影响结果。 +- 当前仓库中,所有已注册的 reCAPTCHA v3 变体共享同一套内部 solver 路径。 diff --git a/docs/zh/usage/turnstile.md b/docs/zh/usage/turnstile.md new file mode 100644 index 0000000000000000000000000000000000000000..89740ec26aaed591abf21108045b24d9279b42f9 --- /dev/null +++ b/docs/zh/usage/turnstile.md @@ -0,0 +1,75 @@ +# Cloudflare Turnstile 使用指南 + +Cloudflare Turnstile 是一种无感或组件式验证码替代方案。求解器使用 Chromium 访问目标页面,与 Turnstile 组件交互,并从隐藏的 `cf-turnstile-response` 输入字段中提取令牌。 + +## 支持的任务类型 + +| 任务类型 | 说明 | +|---------|------| +| `TurnstileTaskProxyless` | 标准 Turnstile 求解 | +| `TurnstileTaskProxylessM1` | 同上,备用命名 | + +## 必填字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `websiteURL` | string | 包含 Turnstile 组件的页面完整 URL | +| `websiteKey` | string | Turnstile `data-sitekey` 值 | + +## 返回字段 + +结果在 `solution.token`(而非 `solution.gRecaptchaResponse`)中返回: + +```json +{ + "errorId": 0, + "status": "ready", + "solution": { + "token": "XXXX.DUMMY.TOKEN.XXXX" + } +} +``` + +## 测试目标 + +Cloudflare 提供官方测试密钥: + +| Site key | 行为 | +|----------|------| +| `1x00000000000000000000AA` | 始终通过(可见组件) | +| `2x00000000000000000000AB` | 始终失败 | +| `3x00000000000000000000FF` | 强制交互式挑战 | + +推荐测试页面: + +- **URL:** `https://react-turnstile.vercel.app/basic` +- **Site key:** `1x00000000000000000000AA`(测试密钥,始终通过) + +## 创建任务 + +```bash +curl -X POST http://localhost:8000/createTask \ + -H "Content-Type: application/json" \ + -d '{ + "clientKey": "your-client-key", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://react-turnstile.vercel.app/basic", + "websiteKey": "1x00000000000000000000AA" + } + }' +``` + +## 验收状态 + +| 目标 | Site key | 状态 | +|------|----------|------| +| `https://react-turnstile.vercel.app/basic` | `1x00000000000000000000AA` | ✅ 已返回令牌 | + +!!! info "Dummy token 说明" + Cloudflare 测试密钥返回 `XXXX.DUMMY.TOKEN.XXXX`,这是预期行为,表明组件已正常识别。 + +## 注意事项 + +- Turnstile 大多数情况下自动完成,无需用户交互。 +- 生产环境的真实密钥将返回真实令牌(非 dummy)。 diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..eff997445d2b7cbc7b83d354d1789bcc75e3cfa7 --- /dev/null +++ b/main.py @@ -0,0 +1,19 @@ +"""Service entrypoint compatible with Render-style deployment.""" + +import os + +import uvicorn + +from src.main import app + + +if __name__ == "__main__": + from src.core.config import config + + port = int(os.environ.get("PORT", config.server_port)) + uvicorn.run( + "src.main:app", + host=config.server_host, + port=port, + reload=False, + ) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..786f49f238c6e40d212dce7b6448f734e7ed1c2b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,134 @@ +site_name: OhMyCaptcha +site_description: Self-hostable YesCaptcha-compatible captcha solver built with FastAPI, Playwright, and OpenAI-compatible multimodal models. +site_url: https://shenhao-stu.github.io/ohmycaptcha/ +repo_url: https://github.com/shenhao-stu/ohmycaptcha +repo_name: shenhao-stu/ohmycaptcha +edit_uri: edit/main/docs/ + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/shenhao-stu/ohmycaptcha + +extra_css: + - stylesheets/extra.css + +theme: + name: material + language: en + icon: + repo: fontawesome/brands/github + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + primary: white + accent: indigo + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/weather-sunny + name: Switch to light mode + features: + - navigation.sections + - navigation.expand + - navigation.top + - toc.follow + - content.code.copy + - content.action.edit + - search.suggest + - search.highlight + +plugins: + - search + - i18n: + docs_structure: folder + fallback_to_default: true + reconfigure_material: true + reconfigure_search: true + languages: + - locale: en + name: English + default: true + build: true + nav_translations: + Home: Home + Getting Started: Getting Started + API Reference: API Reference + Usage: Usage + reCAPTCHA v3: reCAPTCHA v3 + reCAPTCHA v2: reCAPTCHA v2 + hCaptcha: hCaptcha + Cloudflare Turnstile: Cloudflare Turnstile + Image Classification: Image Classification + Image CAPTCHA: Image CAPTCHA + Positioning: Positioning + Deployment: Deployment + Local Model (SGLang): Local Model (SGLang) + Render: Render + Hugging Face Spaces: Hugging Face Spaces + Agent Skill: Agent Skill + Acceptance: Acceptance + FAQ: FAQ + - locale: zh + name: 简体中文 + build: true + site_name: OhMyCaptcha + nav_translations: + Home: 首页 + Getting Started: 快速开始 + API Reference: API 参考 + Usage: 使用指南 + reCAPTCHA v3: reCAPTCHA v3 + reCAPTCHA v2: reCAPTCHA v2 + hCaptcha: hCaptcha + Cloudflare Turnstile: Cloudflare Turnstile + Image Classification: 图像分类 + Image CAPTCHA: 图片验证码 + Positioning: 项目定位 + Deployment: 部署 + Local Model (SGLang): 本地模型 (SGLang) + Render: Render + Hugging Face Spaces: Hugging Face Spaces + Agent Skill: Agent Skill + Acceptance: 验收 + FAQ: 常见问题 + - redirects + +markdown_extensions: + - admonition + - attr_list + - md_in_html + - tables + - toc: + permalink: true + - pymdownx.highlight + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.details + - pymdownx.tabbed: + alternate_style: true + +nav: + - Home: index.md + - Getting Started: getting-started.md + - API Reference: api-reference.md + - Usage: + - reCAPTCHA v3: usage/recaptcha-v3.md + - reCAPTCHA v2: usage/recaptcha-v2.md + - hCaptcha: usage/hcaptcha.md + - Cloudflare Turnstile: usage/turnstile.md + - Image Classification: usage/classification.md + - Image CAPTCHA: usage/image-captcha.md + - Positioning: positioning.md + - Deployment: + - Local Model (SGLang): deployment/local-model.md + - Render: deployment/render.md + - Hugging Face Spaces: deployment/huggingface.md + - Agent Skill: skill.md + - Acceptance: acceptance.md + - FAQ: faq.md diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..7c8dc12e455e8f73e793d480517121754434da01 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,15 @@ +{ + "include": [ + "main.py", + "src", + "tests" + ], + "extraPaths": [ + ".", + "/home/shenhao/recaptcha" + ], + "stubPath": "typings", + "venvPath": ".", + "venv": ".venv", + "typeCheckingMode": "standard" +} diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c38df7e6900286b88526da3e81b7c77e0093c2e1 --- /dev/null +++ b/render.yaml @@ -0,0 +1,27 @@ +services: + - type: web + name: captcha-solver + runtime: docker + dockerfilePath: ./Dockerfile.render + dockerContext: . + envVars: + - key: PYTHONUNBUFFERED + value: "1" + - key: CLIENT_KEY + sync: false + - key: CAPTCHA_BASE_URL + value: "https://your-openai-compatible-endpoint/v1" + - key: CAPTCHA_API_KEY + sync: false + - key: CAPTCHA_MODEL + value: "gpt-5.4" + - key: CAPTCHA_MULTIMODAL_MODEL + value: "qwen3.5-2b" + - key: CAPTCHA_RETRIES + value: "3" + - key: CAPTCHA_TIMEOUT + value: "30" + - key: BROWSER_HEADLESS + value: "true" + - key: BROWSER_TIMEOUT + value: "30" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b522437f7aa7bceb3b0fe23c0677754ef7619e9e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +fastapi==0.119.0 +uvicorn[standard]==0.32.1 +httpx==0.28.1 +pydantic==2.10.4 +pytest==8.3.5 +playwright==1.49.1 +openai==1.82.0 +Pillow==11.1.0 +mkdocs==1.6.1 +mkdocs-material==9.6.7 +mkdocs-redirects==1.2.2 +mkdocs-static-i18n==1.3.0 +pymdown-extensions==10.14.3 diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b7b506c66fb8f0ec54978a9dd6c973e8b688f03 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,32 @@ +# OhMyCaptcha Skills + +Reusable skills for Claude Code, Cursor, OpenCode, and similar agent environments. + +## Included skills + +| Skill | Purpose | +|-------|---------| +| `skills/ohmycaptcha/` | Deploy, validate, and integrate OhMyCaptcha (local model setup, API usage, deployment) | +| `skills/ohmycaptcha-image/` | Generate public-safe visuals for README and documentation | + +## Installation + +Copy the skill folder(s) into your skills directory: + +```bash +# For Cursor +cp -r skills/ohmycaptcha ~/.cursor/skills/ + +# For project-scoped usage +cp -r skills/ohmycaptcha .cursor/skills/ +``` + +## Key concepts + +The operational skill (`ohmycaptcha`) covers: +- **Local model deployment** — SGLang/vLLM serving Qwen3.5-2B for image tasks +- **Cloud model configuration** — remote API (gpt-5.4) for audio transcription +- **19 task types** — reCAPTCHA v2/v3, hCaptcha, Turnstile, image classification +- **Deployment** — local, Render, Hugging Face Spaces + +The image skill (`ohmycaptcha-image`) provides prompting guidance for generating repository art and documentation visuals. diff --git a/skills/ohmycaptcha-image/SKILL.md b/skills/ohmycaptcha-image/SKILL.md new file mode 100644 index 0000000000000000000000000000000000000000..eece24fbd8dcda47c5a67d2a5786b3121074457e --- /dev/null +++ b/skills/ohmycaptcha-image/SKILL.md @@ -0,0 +1,42 @@ +--- +name: ohmycaptcha-image +description: Generate product visuals, README banners, documentation illustrations, and architecture diagrams for OhMyCaptcha. Use when the user asks for repository art, hero images, deployment diagrams, or marketing visuals for captcha infrastructure projects. +--- + +# OhMyCaptcha Image Skill + +Generate clean, public-safe visuals for the OhMyCaptcha repository and documentation. + +## Use cases + +- README hero images +- Documentation illustrations +- Architecture diagrams +- Deployment flow visuals + +## Principles + +1. Keep visuals product-oriented and privacy-safe. +2. Never embed real tokens, private URLs, or customer data. +3. Prefer abstract infrastructure motifs: browser automation, API flows, tokens, multimodal vision, cloud deployment. +4. Style: polished, modern, minimal, open-source-friendly. +5. For README visuals, use 16:9 landscape ratio. + +## Prompting template + +Include in every image prompt: +- **Subject**: OhMyCaptcha as a self-hostable captcha-solving service +- **Motifs**: browser automation, API requests (`/createTask`, `/getTaskResult`), reCAPTCHA/hCaptcha/Turnstile widgets, multimodal vision model, SGLang local deployment +- **Style**: polished dark UI aesthetic with blue/indigo accents +- **Constraints**: no secrets, no vendor logos, no text resembling real keys + +## Example prompt + +> Create a polished open-source hero illustration for OhMyCaptcha, showing a modern self-hosted captcha-solving pipeline with browser automation, API flow cards, a local SGLang model server, and cloud deployment badges. Dark UI with blue/indigo accents. No secrets or vendor logos. + +## Output recommendations + +- 16:9 or wide landscape composition +- Dark or neutral background +- Cyan / blue / indigo accent palette +- Enough negative space for cropping diff --git a/skills/ohmycaptcha-image/evals/evals.json b/skills/ohmycaptcha-image/evals/evals.json new file mode 100644 index 0000000000000000000000000000000000000000..72131f8a894fd991ff62db8a34120a6922ab15bb --- /dev/null +++ b/skills/ohmycaptcha-image/evals/evals.json @@ -0,0 +1,23 @@ +{ + "skill_name": "ohmycaptcha-image", + "evals": [ + { + "id": 1, + "prompt": "Create a README hero image concept for OhMyCaptcha showing its dual-model architecture (local SGLang + cloud API).", + "expected_output": "A public-safe hero image prompt emphasizing the local/cloud model split, API flows, browser automation, and multimodal vision without exposing secrets.", + "files": [] + }, + { + "id": 2, + "prompt": "Generate a documentation illustration for the local model deployment guide using SGLang and Qwen3.5-2B.", + "expected_output": "A clear visual concept showing SGLang serving a local model with an OpenAI-compatible API endpoint feeding into OhMyCaptcha's image classification pipeline.", + "files": [] + }, + { + "id": 3, + "prompt": "I need an architecture diagram showing how OhMyCaptcha routes browser tasks to Playwright and image tasks to the local model.", + "expected_output": "A technical diagram prompt showing task routing: browser tasks → Playwright, image tasks → local model (SGLang), audio tasks → cloud model.", + "files": [] + } + ] +} diff --git a/skills/ohmycaptcha/SKILL.md b/skills/ohmycaptcha/SKILL.md new file mode 100644 index 0000000000000000000000000000000000000000..d212aa333911534bd6a9461392c9700bc0e45ad8 --- /dev/null +++ b/skills/ohmycaptcha/SKILL.md @@ -0,0 +1,61 @@ +--- +name: ohmycaptcha +description: Deploy, configure, validate, and integrate the OhMyCaptcha captcha-solving service. Use when working with YesCaptcha-style APIs, flow2api integration, reCAPTCHA/hCaptcha/Turnstile task creation, image classification, SGLang local model deployment, Render/Hugging Face cloud deployment, or OpenAI-compatible multimodal model setup. Also use when the user asks how to self-host a captcha-solving service or wants request/response examples for OhMyCaptcha. +--- + +# OhMyCaptcha Skill + +Operational guidance for deploying and integrating the OhMyCaptcha service. + +## Model architecture + +OhMyCaptcha uses two model backends: + +- **Local model** — self-hosted via SGLang/vLLM (e.g. `Qwen/Qwen3.5-2B`). Handles image recognition and classification tasks. Configured via `LOCAL_BASE_URL`, `LOCAL_API_KEY`, `LOCAL_MODEL`. +- **Cloud model** — remote OpenAI-compatible API (e.g. `gpt-5.4`). Handles audio transcription and complex reasoning. Configured via `CLOUD_BASE_URL`, `CLOUD_API_KEY`, `CLOUD_MODEL`. + +## Supported task types (19 total) + +### Browser-based (12) +`RecaptchaV3TaskProxyless`, `RecaptchaV3TaskProxylessM1`, `RecaptchaV3TaskProxylessM1S7`, `RecaptchaV3TaskProxylessM1S9`, `RecaptchaV3EnterpriseTask`, `RecaptchaV3EnterpriseTaskM1`, `NoCaptchaTaskProxyless`, `RecaptchaV2TaskProxyless`, `RecaptchaV2EnterpriseTaskProxyless`, `HCaptchaTaskProxyless`, `TurnstileTaskProxyless`, `TurnstileTaskProxylessM1` + +### Image recognition (3) +`ImageToTextTask`, `ImageToTextTaskMuggle`, `ImageToTextTaskM1` + +### Image classification (4) +`HCaptchaClassification`, `ReCaptchaV2Classification`, `FunCaptchaClassification`, `AwsClassification` + +## Local model setup (SGLang) + +```bash +pip install "sglang[all]>=0.4.6.post1" +# From ModelScope (China): +export SGLANG_USE_MODELSCOPE=true +python -m sglang.launch_server --model-path Qwen/Qwen3.5-2B --port 30000 +``` + +Then configure OhMyCaptcha: +```bash +export LOCAL_BASE_URL="http://localhost:30000/v1" +export LOCAL_MODEL="Qwen/Qwen3.5-2B" +``` + +## Startup checklist + +1. Install dependencies: `pip install -r requirements.txt && playwright install --with-deps chromium` +2. Start local model server (SGLang on port 30000) +3. Set env vars: `LOCAL_BASE_URL`, `CLOUD_BASE_URL`, `CLOUD_API_KEY`, `CLIENT_KEY` +4. Start service: `python main.py` +5. Verify: `curl http://localhost:8000/api/v1/health` +6. Test: create a reCAPTCHA v3 task against `https://antcpt.com/score_detector/` with key `6LcR_okUAAAAAPYrPe-HK_0RULO1aZM15ENyM-Mf` + +## Response rules + +1. Prefer the repository's documented behavior over assumptions. +2. Use placeholder credentials only. Never expose real secrets. +3. Be explicit about limitations: + - `minScore` is compatibility-only + - Task storage is in-memory with 10-min TTL + - reCAPTCHA v2 and hCaptcha may require image classification fallback in headless environments +4. For deployment help, reference `docs/deployment/local-model.md`, `docs/deployment/render.md`, `docs/deployment/huggingface.md`. +5. For API usage, reference `docs/api-reference.md` and the usage guides under `docs/usage/`. diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18b665ed7daf5f6c10390a2177cdbd3a70145ee4 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +"""Application package.""" diff --git a/src/api/__init__.py b/src/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dff53e5af3f99340bcfd48c954f9ba7d6c2f12fa --- /dev/null +++ b/src/api/__init__.py @@ -0,0 +1 @@ +"""API package.""" diff --git a/src/api/routes.py b/src/api/routes.py new file mode 100644 index 0000000000000000000000000000000000000000..48c100ef1dc6e376198e8192eb3f2eeecdd52c34 --- /dev/null +++ b/src/api/routes.py @@ -0,0 +1,171 @@ +"""YesCaptcha / AntiCaptcha compatible HTTP routes.""" + +from __future__ import annotations + +import logging + +from fastapi import APIRouter + +from ..core.config import config +from ..models.task import ( + CreateTaskRequest, + CreateTaskResponse, + GetBalanceRequest, + GetBalanceResponse, + GetTaskResultRequest, + GetTaskResultResponse, + SolutionObject, +) +from ..services.task_manager import TaskStatus, task_manager + +log = logging.getLogger(__name__) + +router = APIRouter() + +_BROWSER_TASK_TYPES = { + "RecaptchaV3TaskProxyless", + "RecaptchaV3TaskProxylessM1", + "RecaptchaV3TaskProxylessM1S7", + "RecaptchaV3TaskProxylessM1S9", + "RecaptchaV3EnterpriseTask", + "RecaptchaV3EnterpriseTaskM1", + "NoCaptchaTaskProxyless", + "RecaptchaV2TaskProxyless", + "RecaptchaV2EnterpriseTaskProxyless", + "HCaptchaTaskProxyless", + "TurnstileTaskProxyless", + "TurnstileTaskProxylessM1", +} + +_IMAGE_TASK_TYPES = { + "ImageToTextTask", + "ImageToTextTaskMuggle", + "ImageToTextTaskM1", +} + +_CLASSIFICATION_TASK_TYPES = { + "HCaptchaClassification", + "ReCaptchaV2Classification", + "FunCaptchaClassification", + "AwsClassification", +} + + +def _check_client_key(client_key: str) -> CreateTaskResponse | None: + """Return an error response if the client key is invalid, else None.""" + if config.client_key and client_key != config.client_key: + return CreateTaskResponse( + errorId=1, + errorCode="ERROR_KEY_DOES_NOT_EXIST", + errorDescription="Invalid clientKey", + ) + return None + + +@router.post("/createTask", response_model=CreateTaskResponse) +async def create_task(request: CreateTaskRequest) -> CreateTaskResponse: + err = _check_client_key(request.clientKey) + if err: + return err + + supported = task_manager.supported_types() + if request.task.type not in supported: + return CreateTaskResponse( + errorId=1, + errorCode="ERROR_TASK_NOT_SUPPORTED", + errorDescription=f"Task type '{request.task.type}' is not supported. " + f"Supported: {supported}", + ) + + # Validate required fields for browser-based tasks + if request.task.type in _BROWSER_TASK_TYPES: + if not request.task.websiteURL or not request.task.websiteKey: + return CreateTaskResponse( + errorId=1, + errorCode="ERROR_TASK_PROPERTY_EMPTY", + errorDescription="websiteURL and websiteKey are required", + ) + + # Validate required fields for ImageToText tasks + if request.task.type in _IMAGE_TASK_TYPES: + if not request.task.body: + return CreateTaskResponse( + errorId=1, + errorCode="ERROR_TASK_PROPERTY_EMPTY", + errorDescription="body (base64 image) is required", + ) + + # Validate required fields for classification tasks + if request.task.type in _CLASSIFICATION_TASK_TYPES: + has_image = ( + request.task.image + or request.task.images + or request.task.body + or request.task.queries + ) + if not has_image: + return CreateTaskResponse( + errorId=1, + errorCode="ERROR_TASK_PROPERTY_EMPTY", + errorDescription="image data is required for classification tasks", + ) + + params = request.task.model_dump(exclude_none=True) + task_id = task_manager.create_task(request.task.type, params) + + log.info("Created task %s (type=%s)", task_id, request.task.type) + return CreateTaskResponse(errorId=0, taskId=task_id) + + +@router.post("/getTaskResult", response_model=GetTaskResultResponse) +async def get_task_result( + request: GetTaskResultRequest, +) -> GetTaskResultResponse: + if config.client_key and request.clientKey != config.client_key: + return GetTaskResultResponse( + errorId=1, + errorCode="ERROR_KEY_DOES_NOT_EXIST", + errorDescription="Invalid clientKey", + ) + + task = task_manager.get_task(request.taskId) + if task is None: + return GetTaskResultResponse( + errorId=1, + errorCode="ERROR_NO_SUCH_CAPCHA_ID", + errorDescription="Task not found", + ) + + if task.status == TaskStatus.PROCESSING: + return GetTaskResultResponse(errorId=0, status="processing") + + if task.status == TaskStatus.READY: + return GetTaskResultResponse( + errorId=0, + status="ready", + solution=SolutionObject(**(task.solution or {})), + ) + + return GetTaskResultResponse( + errorId=1, + errorCode=task.error_code or "ERROR_CAPTCHA_UNSOLVABLE", + errorDescription=task.error_description, + ) + + +@router.post("/getBalance", response_model=GetBalanceResponse) +async def get_balance(request: GetBalanceRequest) -> GetBalanceResponse: + if config.client_key and request.clientKey != config.client_key: + return GetBalanceResponse(errorId=1, balance=0) + return GetBalanceResponse(errorId=0, balance=99999.0) + + +@router.get("/api/v1/health") +async def health() -> dict[str, object]: + return { + "status": "ok", + "supported_task_types": task_manager.supported_types(), + "browser_headless": config.browser_headless, + "cloud_model": config.cloud_model, + "local_model": config.local_model, + } diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3220b9a333ea39717f902cd0803032ebbeb7e6b1 --- /dev/null +++ b/src/core/__init__.py @@ -0,0 +1 @@ +"""Core application helpers.""" diff --git a/src/core/config.py b/src/core/config.py new file mode 100644 index 0000000000000000000000000000000000000000..a7519b1df53522f5fd3c05e0d5368de0b9cbff41 --- /dev/null +++ b/src/core/config.py @@ -0,0 +1,106 @@ +"""Environment-driven application configuration. + +Two model backends are supported: + + Cloud model — a remote OpenAI-compatible API (e.g. gpt-5.4 via a hosted + endpoint). Used as the powerful multimodal backbone for + tasks like audio transcription. + + Local model — a self-hosted model served via SGLang, vLLM, or any + OpenAI-compatible server (e.g. Qwen3.5-2B on localhost). + Used for high-throughput image recognition / classification. + +Both backends expose ``/v1/chat/completions``; the only difference is the +base URL, API key, and model name. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Config: + server_host: str + server_port: int + + # Auth: YesCaptcha clientKey + client_key: str | None + + # ── Cloud model (remote API) ── + cloud_base_url: str + cloud_api_key: str + cloud_model: str + + # ── Local model (self-hosted via SGLang / vLLM) ── + local_base_url: str + local_api_key: str + local_model: str + + captcha_retries: int + captcha_timeout: int + + # Playwright browser + browser_headless: bool + browser_timeout: int # seconds + + # ── Convenience aliases (backward-compat) ── + + @property + def captcha_base_url(self) -> str: + return self.cloud_base_url + + @property + def captcha_api_key(self) -> str: + return self.cloud_api_key + + @property + def captcha_model(self) -> str: + return self.cloud_model + + @property + def captcha_multimodal_model(self) -> str: + return self.local_model + + +def load_config() -> Config: + return Config( + server_host=os.environ.get("SERVER_HOST", "0.0.0.0"), + server_port=int(os.environ.get("SERVER_PORT", "8000")), + client_key=os.environ.get("CLIENT_KEY", "").strip() or None, + # Cloud model + cloud_base_url=os.environ.get( + "CLOUD_BASE_URL", + os.environ.get("CAPTCHA_BASE_URL", "https://your-openai-compatible-endpoint/v1"), + ), + cloud_api_key=os.environ.get( + "CLOUD_API_KEY", + os.environ.get("CAPTCHA_API_KEY", ""), + ), + cloud_model=os.environ.get( + "CLOUD_MODEL", + os.environ.get("CAPTCHA_MODEL", "gpt-5.4"), + ), + # Local model + local_base_url=os.environ.get( + "LOCAL_BASE_URL", + os.environ.get("CAPTCHA_BASE_URL", "http://localhost:30000/v1"), + ), + local_api_key=os.environ.get( + "LOCAL_API_KEY", + os.environ.get("CAPTCHA_API_KEY", "EMPTY"), + ), + local_model=os.environ.get( + "LOCAL_MODEL", + os.environ.get("CAPTCHA_MULTIMODAL_MODEL", "Qwen/Qwen3.5-2B"), + ), + captcha_retries=int(os.environ.get("CAPTCHA_RETRIES", "3")), + captcha_timeout=int(os.environ.get("CAPTCHA_TIMEOUT", "30")), + browser_headless=os.environ.get("BROWSER_HEADLESS", "true").strip().lower() + in {"1", "true", "yes"}, + browser_timeout=int(os.environ.get("BROWSER_TIMEOUT", "30")), + ) + + +config = load_config() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..20a4fabdf83165924ecebed45312bc86418ec288 --- /dev/null +++ b/src/main.py @@ -0,0 +1,132 @@ +"""FastAPI application with Playwright lifecycle management.""" + +from __future__ import annotations + +import logging +from contextlib import asynccontextmanager +from typing import AsyncIterator + +from fastapi import FastAPI + +from .api.routes import router +from .core.config import config +from .services.classification import ClassificationSolver +from .services.hcaptcha import HCaptchaSolver +from .services.recognition import CaptchaRecognizer +from .services.recaptcha_v2 import RecaptchaV2Solver +from .services.recaptcha_v3 import RecaptchaV3Solver +from .services.task_manager import task_manager +from .services.turnstile import TurnstileSolver + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", +) +log = logging.getLogger(__name__) + +_RECAPTCHA_V3_TYPES = [ + "RecaptchaV3TaskProxyless", + "RecaptchaV3TaskProxylessM1", + "RecaptchaV3TaskProxylessM1S7", + "RecaptchaV3TaskProxylessM1S9", + "RecaptchaV3EnterpriseTask", + "RecaptchaV3EnterpriseTaskM1", +] + +_RECAPTCHA_V2_TYPES = [ + "NoCaptchaTaskProxyless", + "RecaptchaV2TaskProxyless", + "RecaptchaV2EnterpriseTaskProxyless", +] + +_HCAPTCHA_TYPES = [ + "HCaptchaTaskProxyless", +] + +_TURNSTILE_TYPES = [ + "TurnstileTaskProxyless", + "TurnstileTaskProxylessM1", +] + +_CLASSIFICATION_TYPES = [ + "HCaptchaClassification", + "ReCaptchaV2Classification", + "FunCaptchaClassification", + "AwsClassification", +] + +_IMAGE_TEXT_TYPES = [ + "ImageToTextTask", + "ImageToTextTaskMuggle", + "ImageToTextTaskM1", +] + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncIterator[None]: + # ── startup ── + v3_solver = RecaptchaV3Solver(config) + await v3_solver.start() + for task_type in _RECAPTCHA_V3_TYPES: + task_manager.register_solver(task_type, v3_solver) + log.info("Registered reCAPTCHA v3 solver for types: %s", _RECAPTCHA_V3_TYPES) + + v2_solver = RecaptchaV2Solver(config) + await v2_solver.start() + for task_type in _RECAPTCHA_V2_TYPES: + task_manager.register_solver(task_type, v2_solver) + log.info("Registered reCAPTCHA v2 solver for types: %s", _RECAPTCHA_V2_TYPES) + + hcaptcha_solver = HCaptchaSolver(config) + await hcaptcha_solver.start() + for task_type in _HCAPTCHA_TYPES: + task_manager.register_solver(task_type, hcaptcha_solver) + log.info("Registered hCaptcha solver for types: %s", _HCAPTCHA_TYPES) + + turnstile_solver = TurnstileSolver(config) + await turnstile_solver.start() + for task_type in _TURNSTILE_TYPES: + task_manager.register_solver(task_type, turnstile_solver) + log.info("Registered Turnstile solver for types: %s", _TURNSTILE_TYPES) + + recognizer = CaptchaRecognizer(config) + for task_type in _IMAGE_TEXT_TYPES: + task_manager.register_solver(task_type, recognizer) + log.info("Registered image captcha recognizer for types: %s", _IMAGE_TEXT_TYPES) + + classifier = ClassificationSolver(config) + for task_type in _CLASSIFICATION_TYPES: + task_manager.register_solver(task_type, classifier) + log.info("Registered classification solver for types: %s", _CLASSIFICATION_TYPES) + + yield + # ── shutdown ── + await v3_solver.stop() + await v2_solver.stop() + await hcaptcha_solver.stop() + await turnstile_solver.stop() + + +app = FastAPI( + title="Captcha Solver Service", + version="3.0.0", + description="YesCaptcha-compatible captcha solving service for flow2api.", + lifespan=lifespan, +) + +app.include_router(router) + + +@app.get("/") +async def root() -> dict[str, object]: + return { + "service": "captcha-solver", + "version": "3.0.0", + "endpoints": { + "createTask": "/createTask", + "getTaskResult": "/getTaskResult", + "getBalance": "/getBalance", + "health": "/api/v1/health", + }, + "supported_task_types": task_manager.supported_types(), + } diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..208fedbbcf843521e4b76b238d899131fc7f8241 --- /dev/null +++ b/src/models/__init__.py @@ -0,0 +1 @@ +"""Pydantic API models.""" diff --git a/src/models/task.py b/src/models/task.py new file mode 100644 index 0000000000000000000000000000000000000000..6ae849293a78664587cf9d5bea66747ef8eb1b80 --- /dev/null +++ b/src/models/task.py @@ -0,0 +1,70 @@ +"""YesCaptcha / AntiCaptcha compatible API models.""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + + +# ── createTask ────────────────────────────────────────────── + +class TaskObject(BaseModel): + type: str + websiteURL: str | None = None + websiteKey: str | None = None + pageAction: str | None = None + minScore: float | None = None + isInvisible: bool | None = None + # Image captcha / classification fields + body: str | None = None + image: str | None = None + images: list[str] | None = None + question: str | None = None + queries: list[str] | str | None = None + project_name: str | None = None + + +class CreateTaskRequest(BaseModel): + clientKey: str + task: TaskObject + + +class CreateTaskResponse(BaseModel): + errorId: int = 0 + taskId: str | None = None + errorCode: str | None = None + errorDescription: str | None = None + + +# ── getTaskResult ─────────────────────────────────────────── + +class GetTaskResultRequest(BaseModel): + clientKey: str + taskId: str + + +class SolutionObject(BaseModel): + gRecaptchaResponse: str | None = None + text: str | None = None + token: str | None = None + objects: list[int] | None = None + answer: bool | list[int] | None = None + userAgent: str | None = None + + +class GetTaskResultResponse(BaseModel): + errorId: int = 0 + status: str | None = None + solution: SolutionObject | None = None + errorCode: str | None = None + errorDescription: str | None = None + + +# ── getBalance ────────────────────────────────────────────── + +class GetBalanceRequest(BaseModel): + clientKey: str + + +class GetBalanceResponse(BaseModel): + errorId: int = 0 + balance: float = 99999.0 diff --git a/src/services/__init__.py b/src/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f4f478c36fb96d972bbbbe56690e64775c8f4fcf --- /dev/null +++ b/src/services/__init__.py @@ -0,0 +1 @@ +"""Service layer package.""" diff --git a/src/services/classification.py b/src/services/classification.py new file mode 100644 index 0000000000000000000000000000000000000000..bdeccef62cd7359e921391f6a1f42c01224c9553 --- /dev/null +++ b/src/services/classification.py @@ -0,0 +1,203 @@ +"""Image classification solvers for various captcha types. + +Supports HCaptchaClassification, ReCaptchaV2Classification, +FunCaptchaClassification, and AwsClassification task types. + +All classification tasks send images + question text to an OpenAI-compatible +vision model for analysis and return structured coordinate/index results. +""" + +from __future__ import annotations + +import base64 +import io +import json +import logging +import re +from typing import Any + +from openai import AsyncOpenAI +from PIL import Image + +from ..core.config import Config + +log = logging.getLogger(__name__) + +HCAPTCHA_SYSTEM_PROMPT = """\ +You are an image classification assistant for HCaptcha challenges. +Given a question and one or more base64-encoded images, determine which images match the question. + +Return STRICT JSON only. No markdown, no extra text. + +For single-image questions (is this image X?): +{"answer": true} or {"answer": false} + +For multi-image grid questions (select all images containing X): +{"answer": [0, 2, 5]} +where numbers are 0-indexed positions of matching images. + +Rules: +- Return ONLY the JSON object, nothing else. +- Be precise with your classification. +""" + +RECAPTCHA_V2_SYSTEM_PROMPT = """\ +You are an image classification assistant for reCAPTCHA v2 challenges. +Given a question and a grid image (3x3 or 4x4), identify which cells match the question. + +The image cells are numbered 0-8 (3x3) or 0-15 (4x4), left-to-right, top-to-bottom. + +Return STRICT JSON only: +{"objects": [0, 3, 6]} +where numbers are 0-indexed positions of matching cells. + +Rules: +- Return ONLY the JSON object, nothing else. +- If no cells match, return {"objects": []}. +""" + +FUNCAPTCHA_SYSTEM_PROMPT = """\ +You are an image classification assistant for FunCaptcha challenges. +Given a question and a grid image (typically 2x3 = 6 cells), identify which cell +is the correct answer. + +Cells are numbered 0-5, left-to-right, top-to-bottom. + +Return STRICT JSON only: +{"objects": [3]} +where the number is the 0-indexed position of the correct cell. + +Rules: +- Return ONLY the JSON object, nothing else. +- Usually only one cell is correct. +""" + +AWS_SYSTEM_PROMPT = """\ +You are an image classification assistant for AWS CAPTCHA challenges. +Given a question and one or more images, identify the correct answer. + +Return STRICT JSON only: +{"objects": [1]} +where the number is the 0-indexed position of the matching image. + +Rules: +- Return ONLY the JSON object, nothing else. +""" + + +class ClassificationSolver: + """Solves image classification captchas using a vision model.""" + + def __init__(self, config: Config) -> None: + self._config = config + self._client = AsyncOpenAI( + base_url=config.local_base_url, + api_key=config.local_api_key, + ) + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + task_type = params.get("type", "") + system_prompt = self._get_system_prompt(task_type) + question = params.get("question", "") or params.get("queries", "") + + # Handle different image field names across task types + images = self._extract_images(params) + if not images: + raise ValueError("No image data provided") + + result = await self._classify(system_prompt, question, images) + return result + + @staticmethod + def _get_system_prompt(task_type: str) -> str: + prompts = { + "HCaptchaClassification": HCAPTCHA_SYSTEM_PROMPT, + "ReCaptchaV2Classification": RECAPTCHA_V2_SYSTEM_PROMPT, + "FunCaptchaClassification": FUNCAPTCHA_SYSTEM_PROMPT, + "AwsClassification": AWS_SYSTEM_PROMPT, + } + return prompts.get(task_type, RECAPTCHA_V2_SYSTEM_PROMPT) + + @staticmethod + def _extract_images(params: dict[str, Any]) -> list[str]: + """Extract base64 image(s) from various param formats.""" + images: list[str] = [] + + if "image" in params: + images.append(params["image"]) + + if "images" in params: + imgs = params["images"] + if isinstance(imgs, list): + images.extend(imgs) + elif isinstance(imgs, str): + images.append(imgs) + + if "body" in params and not images: + images.append(params["body"]) + + # HCaptcha queries format: list of base64 strings + if "queries" in params and isinstance(params["queries"], list): + images.extend(params["queries"]) + + return images + + @staticmethod + def _prepare_image(b64_data: str) -> str: + """Ensure image is properly formatted as a data URL.""" + if b64_data.startswith("data:image"): + return b64_data + try: + img_bytes = base64.b64decode(b64_data) + img = Image.open(io.BytesIO(img_bytes)) + fmt = img.format or "PNG" + mime = f"image/{fmt.lower()}" + return f"data:{mime};base64,{b64_data}" + except Exception: + return f"data:image/png;base64,{b64_data}" + + async def _classify( + self, system_prompt: str, question: str, images: list[str] + ) -> dict[str, Any]: + content: list[dict[str, Any]] = [] + + for img_b64 in images: + data_url = self._prepare_image(img_b64) + content.append({ + "type": "image_url", + "image_url": {"url": data_url, "detail": "high"}, + }) + + user_text = question if question else "Classify this captcha image." + content.append({"type": "text", "text": user_text}) + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + response = await self._client.chat.completions.create( + model=self._config.captcha_multimodal_model, + temperature=0.05, + max_tokens=512, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": content}, + ], + ) + raw = response.choices[0].message.content or "" + return self._parse_json(raw) + except Exception as exc: + last_error = exc + log.warning("Classification attempt %d failed: %s", attempt + 1, exc) + + raise RuntimeError( + f"Classification failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + @staticmethod + def _parse_json(text: str) -> dict[str, Any]: + match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL) + cleaned = match.group(1) if match else text.strip() + data = json.loads(cleaned) + if not isinstance(data, dict): + raise ValueError(f"Expected JSON object, got {type(data).__name__}") + return data diff --git a/src/services/hcaptcha.py b/src/services/hcaptcha.py new file mode 100644 index 0000000000000000000000000000000000000000..e427ec03a1b7d9f536143b49763d6182bd56f5ab --- /dev/null +++ b/src/services/hcaptcha.py @@ -0,0 +1,142 @@ +"""HCaptcha solver using Playwright browser automation. + +Supports HCaptchaTaskProxyless task type. +Visits the target page, interacts with the hCaptcha widget, and extracts the response token. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from playwright.async_api import Browser, Playwright, async_playwright + +from ..core.config import Config + +log = logging.getLogger(__name__) + +_STEALTH_JS = """ +Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); +Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); +Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]}); +window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}}; +""" + +_EXTRACT_HCAPTCHA_TOKEN_JS = """ +() => { + const textarea = document.querySelector('[name="h-captcha-response"]') + || document.querySelector('[name="g-recaptcha-response"]'); + if (textarea && textarea.value && textarea.value.length > 20) { + return textarea.value; + } + if (window.hcaptcha && typeof window.hcaptcha.getResponse === 'function') { + const resp = window.hcaptcha.getResponse(); + if (resp && resp.length > 20) return resp; + } + return null; +} +""" + + +class HCaptchaSolver: + """Solves HCaptchaTaskProxyless tasks via headless Chromium.""" + + def __init__(self, config: Config, browser: Browser | None = None) -> None: + self._config = config + self._playwright: Playwright | None = None + self._browser: Browser | None = browser + self._owns_browser = browser is None + + async def start(self) -> None: + if self._browser is not None: + return + self._playwright = await async_playwright().start() + self._browser = await self._playwright.chromium.launch( + headless=self._config.browser_headless, + args=[ + "--disable-blink-features=AutomationControlled", + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + ], + ) + log.info("HCaptchaSolver browser started") + + async def stop(self) -> None: + if self._owns_browser: + if self._browser: + await self._browser.close() + if self._playwright: + await self._playwright.stop() + log.info("HCaptchaSolver stopped") + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + website_url = params["websiteURL"] + website_key = params["websiteKey"] + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + token = await self._solve_once(website_url, website_key) + return {"gRecaptchaResponse": token} + except Exception as exc: + last_error = exc + log.warning( + "HCaptcha attempt %d/%d failed: %s", + attempt + 1, + self._config.captcha_retries, + exc, + ) + if attempt < self._config.captcha_retries - 1: + await asyncio.sleep(2) + + raise RuntimeError( + f"HCaptcha failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + async def _solve_once(self, website_url: str, website_key: str) -> str: + assert self._browser is not None + + context = await self._browser.new_context( + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/131.0.0.0 Safari/537.36" + ), + viewport={"width": 1920, "height": 1080}, + locale="en-US", + ) + page = await context.new_page() + await page.add_init_script(_STEALTH_JS) + + try: + timeout_ms = self._config.browser_timeout * 1000 + await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms) + + await page.mouse.move(400, 300) + await asyncio.sleep(1) + + # Click only the checkbox iframe — match by specific title to avoid the challenge iframe + iframe_element = page.frame_locator( + 'iframe[title="Widget containing checkbox for hCaptcha security challenge"]' + ) + checkbox = iframe_element.locator("#checkbox") + await checkbox.click(timeout=10_000) + + # Wait for token — may require challenge completion; poll up to 30s + for _ in range(6): + await asyncio.sleep(5) + token = await page.evaluate(_EXTRACT_HCAPTCHA_TOKEN_JS) + if isinstance(token, str) and len(token) > 20: + break + else: + token = None + + if not isinstance(token, str) or len(token) < 20: + raise RuntimeError(f"Invalid hCaptcha token: {token!r}") + + log.info("Got hCaptcha token (len=%d)", len(token)) + return token + finally: + await context.close() diff --git a/src/services/recaptcha_v2.py b/src/services/recaptcha_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..cd72a0ff694626e8a421f78deeb785a2317f95f2 --- /dev/null +++ b/src/services/recaptcha_v2.py @@ -0,0 +1,278 @@ +"""reCAPTCHA v2 solver using Playwright browser automation. + +Supports NoCaptchaTaskProxyless, RecaptchaV2TaskProxyless, +and RecaptchaV2EnterpriseTaskProxyless task types. + +Strategy: + 1. Visit the target page with a realistic browser context. + 2. Click the reCAPTCHA checkbox. + 3. If the challenge dialog appears (bot detected), switch to the audio + challenge, download the audio file, transcribe it via the configured + speech-to-text model, and submit the text. + 4. Extract the gRecaptchaResponse token. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +import httpx +from playwright.async_api import Browser, Playwright, async_playwright + +from ..core.config import Config + +log = logging.getLogger(__name__) + +_STEALTH_JS = """ +Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); +Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); +Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]}); +window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}}; +""" + +_EXTRACT_TOKEN_JS = """ +() => { + const textarea = document.querySelector('#g-recaptcha-response') + || document.querySelector('[name="g-recaptcha-response"]'); + if (textarea && textarea.value && textarea.value.length > 20) { + return textarea.value; + } + const gr = window.grecaptcha?.enterprise || window.grecaptcha; + if (gr && typeof gr.getResponse === 'function') { + const resp = gr.getResponse(); + if (resp && resp.length > 20) return resp; + } + return null; +} +""" + + +class RecaptchaV2Solver: + """Solves reCAPTCHA v2 tasks via headless Chromium with checkbox clicking. + + Falls back to the audio challenge path when Google presents a visual + challenge to the headless browser. + """ + + def __init__(self, config: Config, browser: Browser | None = None) -> None: + self._config = config + self._playwright: Playwright | None = None + self._browser: Browser | None = browser + self._owns_browser = browser is None + + async def start(self) -> None: + if self._browser is not None: + return + self._playwright = await async_playwright().start() + self._browser = await self._playwright.chromium.launch( + headless=self._config.browser_headless, + args=[ + "--disable-blink-features=AutomationControlled", + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + ], + ) + log.info("RecaptchaV2Solver browser started") + + async def stop(self) -> None: + if self._owns_browser: + if self._browser: + await self._browser.close() + if self._playwright: + await self._playwright.stop() + log.info("RecaptchaV2Solver stopped") + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + website_url = params["websiteURL"] + website_key = params["websiteKey"] + is_invisible = params.get("isInvisible", False) + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + token = await self._solve_once(website_url, website_key, is_invisible) + return {"gRecaptchaResponse": token} + except Exception as exc: + last_error = exc + log.warning( + "reCAPTCHA v2 attempt %d/%d failed: %s", + attempt + 1, + self._config.captcha_retries, + exc, + ) + if attempt < self._config.captcha_retries - 1: + await asyncio.sleep(2) + + raise RuntimeError( + f"reCAPTCHA v2 failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + async def _solve_once( + self, website_url: str, website_key: str, is_invisible: bool + ) -> str: + assert self._browser is not None + + context = await self._browser.new_context( + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/131.0.0.0 Safari/537.36" + ), + viewport={"width": 1920, "height": 1080}, + locale="en-US", + ) + page = await context.new_page() + await page.add_init_script(_STEALTH_JS) + + try: + timeout_ms = self._config.browser_timeout * 1000 + await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms) + await page.mouse.move(400, 300) + await asyncio.sleep(0.5) + + if is_invisible: + token = await page.evaluate( + """ + ([key]) => new Promise((resolve, reject) => { + const gr = window.grecaptcha?.enterprise || window.grecaptcha; + if (!gr) { reject(new Error('grecaptcha not found')); return; } + gr.ready(() => { + gr.execute(key).then(resolve).catch(reject); + }); + }) + """, + [website_key], + ) + else: + token = await self._solve_checkbox(page) + + if not isinstance(token, str) or len(token) < 20: + raise RuntimeError(f"Invalid reCAPTCHA v2 token: {token!r}") + + log.info("Got reCAPTCHA v2 token (len=%d)", len(token)) + return token + finally: + await context.close() + + async def _solve_checkbox(self, page: Any) -> str | None: + """Click the reCAPTCHA checkbox. If a visual challenge appears, try audio path.""" + # The checkbox iframe always has title="reCAPTCHA" + checkbox_frame = page.frame_locator('iframe[title="reCAPTCHA"]').first + checkbox = checkbox_frame.locator("#recaptcha-anchor") + await checkbox.click(timeout=10_000) + await asyncio.sleep(2) + + # Check if token was issued immediately (low-risk sessions) + token = await page.evaluate(_EXTRACT_TOKEN_JS) + if isinstance(token, str) and len(token) > 20: + return token + + # Challenge dialog appeared — try audio challenge path + log.info("reCAPTCHA challenge detected, attempting audio path") + try: + token = await self._solve_audio_challenge(page) + except Exception as exc: + log.warning("Audio challenge path failed: %s", exc) + token = None + + return token + + async def _solve_audio_challenge(self, page: Any) -> str | None: + """Click the audio button in the bframe and transcribe the audio.""" + # The challenge bframe has title containing "recaptcha challenge" + bframe = page.frame_locator('iframe[title*="recaptcha challenge"]') + + # Click the audio challenge button + audio_btn = bframe.locator("#recaptcha-audio-button") + await audio_btn.click(timeout=8_000) + + # Wait for the audio challenge iframe to load its content + await asyncio.sleep(3) + + # After clicking audio, a new bframe is rendered with the audio player + bframe = page.frame_locator('iframe[title*="recaptcha challenge"]') + + # Get the audio source URL — try multiple selectors + audio_src = None + for selector in [ + ".rc-audiochallenge-tdownload-link", + "a[href*='.mp3']", + "audio source", + ]: + try: + element = bframe.locator(selector).first + audio_src = await element.get_attribute("href", timeout=5_000) or await element.get_attribute("src", timeout=1_000) + if audio_src: + break + except Exception: + continue + + if not audio_src: + raise RuntimeError("Could not find audio challenge download link") + + # Download the audio file + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get(audio_src) + resp.raise_for_status() + audio_bytes = resp.content + + # Transcribe via the vision/language model (base64 audio → text) + transcript = await self._transcribe_audio(audio_bytes) + log.info("Audio transcribed: %r", transcript[:40] if transcript else None) + + if not transcript: + raise RuntimeError("Audio transcription returned empty result") + + # Submit the transcript + audio_input = bframe.locator("#audio-response") + await audio_input.fill(transcript.strip().lower()) + verify_btn = bframe.locator("#recaptcha-verify-button") + await verify_btn.click(timeout=8_000) + await asyncio.sleep(2) + + return await page.evaluate(_EXTRACT_TOKEN_JS) + + async def _transcribe_audio(self, audio_bytes: bytes) -> str | None: + """Send audio bytes to the OpenAI-compatible audio transcription endpoint.""" + import base64 + + audio_b64 = base64.b64encode(audio_bytes).decode() + payload = { + "model": self._config.captcha_model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": ( + "This is a reCAPTCHA audio challenge. " + "The audio contains spoken digits or words. " + "Transcribe exactly what is spoken, digits only, " + "separated by spaces. Reply with only the transcription." + ), + }, + { + "type": "image_url", + "image_url": {"url": f"data:audio/mp3;base64,{audio_b64}"}, + }, + ], + } + ], + "max_tokens": 50, + "temperature": 0, + } + + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.post( + f"{self._config.captcha_base_url}/chat/completions", + headers={"Authorization": f"Bearer {self._config.captcha_api_key}"}, + json=payload, + ) + if resp.status_code != 200: + raise RuntimeError(f"Transcription API error {resp.status_code}: {resp.text[:200]}") + data = resp.json() + return data["choices"][0]["message"]["content"].strip() diff --git a/src/services/recaptcha_v3.py b/src/services/recaptcha_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..9c6b57c92fca8cae3927fd14441853128916df6f --- /dev/null +++ b/src/services/recaptcha_v3.py @@ -0,0 +1,160 @@ +"""reCAPTCHA v3 solver using Playwright browser automation.""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from playwright.async_api import Browser, Playwright, async_playwright + +from ..core.config import Config + +log = logging.getLogger(__name__) + +# JS executed inside the browser to obtain a reCAPTCHA v3 token. +# Handles both standard and enterprise reCAPTCHA libraries. +_EXECUTE_JS = """ +([key, action]) => new Promise((resolve, reject) => { + const gr = window.grecaptcha?.enterprise || window.grecaptcha; + if (gr && typeof gr.execute === 'function') { + gr.ready(() => { + gr.execute(key, {action}).then(resolve).catch(reject); + }); + return; + } + // grecaptcha not loaded yet — inject the script ourselves + const script = document.createElement('script'); + script.src = 'https://www.google.com/recaptcha/api.js?render=' + key; + script.onerror = () => reject(new Error('Failed to load reCAPTCHA script')); + script.onload = () => { + const g = window.grecaptcha; + if (!g) { reject(new Error('grecaptcha still undefined after script load')); return; } + g.ready(() => { + g.execute(key, {action}).then(resolve).catch(reject); + }); + }; + document.head.appendChild(script); +}) +""" + +# Basic anti-detection init script +_STEALTH_JS = """ +Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); +Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); +Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]}); +window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}}; +""" + + +class RecaptchaV3Solver: + """Solves RecaptchaV3TaskProxyless tasks via headless Chromium.""" + + def __init__(self, config: Config) -> None: + self._config = config + self._playwright: Playwright | None = None + self._browser: Browser | None = None + + async def start(self) -> None: + self._playwright = await async_playwright().start() + self._browser = await self._playwright.chromium.launch( + headless=self._config.browser_headless, + args=[ + "--disable-blink-features=AutomationControlled", + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + ], + ) + log.info( + "Playwright browser started (headless=%s)", self._config.browser_headless + ) + + async def stop(self) -> None: + if self._browser: + await self._browser.close() + if self._playwright: + await self._playwright.stop() + log.info("Playwright browser stopped") + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + website_url = params["websiteURL"] + website_key = params["websiteKey"] + page_action = params.get("pageAction", "verify") + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + token = await self._solve_once( + website_url, website_key, page_action + ) + return {"gRecaptchaResponse": token} + except Exception as exc: + last_error = exc + log.warning( + "Attempt %d/%d failed for %s: %s", + attempt + 1, + self._config.captcha_retries, + website_url, + exc, + ) + if attempt < self._config.captcha_retries - 1: + await asyncio.sleep(2) + + raise RuntimeError( + f"Failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + async def _solve_once( + self, website_url: str, website_key: str, page_action: str + ) -> str: + assert self._browser is not None + + context = await self._browser.new_context( + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/131.0.0.0 Safari/537.36" + ), + viewport={"width": 1920, "height": 1080}, + locale="en-US", + ) + + page = await context.new_page() + await page.add_init_script(_STEALTH_JS) + + try: + timeout_ms = self._config.browser_timeout * 1000 + await page.goto( + website_url, wait_until="networkidle", timeout=timeout_ms + ) + + # Simulate minimal human-like behaviour to improve score + await page.mouse.move(400, 300) + await asyncio.sleep(1) + await page.mouse.move(600, 400) + await asyncio.sleep(0.5) + + # Wait for reCAPTCHA to become available (may already be on page) + try: + await page.wait_for_function( + "(typeof grecaptcha !== 'undefined' && typeof grecaptcha.execute === 'function') " + "|| (typeof grecaptcha !== 'undefined' && typeof grecaptcha?.enterprise?.execute === 'function')", + timeout=10_000, + ) + except Exception: + log.info( + "grecaptcha not detected on page, will attempt script injection" + ) + + token = await page.evaluate(_EXECUTE_JS, [website_key, page_action]) + + if not isinstance(token, str) or len(token) < 20: + raise RuntimeError(f"Invalid token received: {token!r}") + + log.info( + "Got reCAPTCHA token for %s (len=%d)", website_url, len(token) + ) + return token + finally: + await context.close() diff --git a/src/services/recognition.py b/src/services/recognition.py new file mode 100644 index 0000000000000000000000000000000000000000..b13c0220e556c5051904c8a7961510fb47ff9e16 --- /dev/null +++ b/src/services/recognition.py @@ -0,0 +1,171 @@ +"""Image-based captcha recognition using OpenAI-compatible vision models. + +Inspired by Argus (https://github.com/AmethystDev-Labs/Argus). +Sends captcha images to a multimodal LLM for analysis. +Images are resized to 1440x900 for consistent coordinate space. +""" + +from __future__ import annotations + +import base64 +import io +import json +import logging +import re +from typing import Any + +from openai import AsyncOpenAI +from PIL import Image + +from ..core.config import Config + +log = logging.getLogger(__name__) + +SYSTEM_PROMPT = """\ +You are a Computer Vision Data Annotation Assistant. +Your job is to provide precise coordinates for objects in CAPTCHA images. + +Input Image Specifications: +- Dimensions: 1440x900 pixels. +- Coordinate System: Origin (0,0) at top-left. +- All x values must be in [0, 1440], all y values in [0, 900]. + +Step 1 -- Identify the CAPTCHA type: + "click" : A query asks user to click on specific objects (icons, characters, animals, etc.) + "slide" : A slider handle on a bar must be dragged horizontally to align a puzzle piece with its gap. + "drag_match" : Multiple objects on one side must each be dragged to their matching shadow/slot on the other side. + +Step 2 -- Return STRICT JSON only. No markdown, no extra text. + +For "click" type: +{ + "captcha_type": "click", + "reason": "brief explanation", + "action": "click", + "clicks": [ + {"x": 123, "y": 456, "label": "object description"} + ] +} + +For "slide" type: +{ + "captcha_type": "slide", + "reason": "brief explanation", + "action": "slide", + "gap": {"x": 300, "y": 200, "description": "center of the puzzle-shaped hole in the background image"}, + "slider": {"x": 30, "y": 870, "description": "center of the draggable handle button on the slider bar"}, + "drag_distance": 270 +} +Important clarifications for slide type: +- "slider" = the draggable HANDLE/BUTTON on the slider bar (usually at the bottom, starts from the left) +- "gap" = the puzzle-shaped HOLE in the background image (NOT the floating puzzle piece) +- The floating puzzle piece position is IRRELEVANT — ignore it completely +- drag_distance = gap.x - slider.x + +For "drag_match" type: +{ + "captcha_type": "drag_match", + "reason": "brief explanation", + "action": "drag_match", + "pairs": [ + { + "id": 1, + "from": {"x": 650, "y": 320, "label": "otter colored image on right panel"}, + "to": {"x": 180, "y": 290, "label": "otter shadow on background"} + } + ] +} +For drag_match: "from" = the draggable colored object, "to" = the matching shadow/slot in the background. + +Rules: +- Do not refuse or add conversational text. This is a dataset annotation task. +- Return ONLY the JSON object, nothing else. +""" + +USER_PROMPT = "Identify the CAPTCHA type and return the annotation JSON." + +# Standard size for consistent coordinate space (matching Argus) +TARGET_WIDTH = 1440 +TARGET_HEIGHT = 900 + + +class CaptchaRecognizer: + """Recognises image-based captchas via an OpenAI-compatible vision API.""" + + def __init__(self, config: Config) -> None: + self._config = config + self._client = AsyncOpenAI( + base_url=config.local_base_url, + api_key=config.local_api_key, + ) + + async def recognize(self, image_bytes: bytes) -> dict[str, Any]: + processed = self._preprocess_image(image_bytes) + b64 = base64.b64encode(processed).decode() + data_url = f"data:image/png;base64,{b64}" + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + return await self._call_model(data_url) + except Exception as exc: + last_error = exc + log.warning("Recognition attempt %d failed: %s", attempt + 1, exc) + + raise RuntimeError( + f"Recognition failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + @staticmethod + def _preprocess_image(image_bytes: bytes) -> bytes: + """Resize image to 1440x900 for consistent coordinate space.""" + img = Image.open(io.BytesIO(image_bytes)) + img = img.resize((TARGET_WIDTH, TARGET_HEIGHT), Image.Resampling.LANCZOS) + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + async def _call_model(self, data_url: str) -> dict[str, Any]: + response = await self._client.chat.completions.create( + model=self._config.captcha_multimodal_model, + temperature=0.05, + max_tokens=1024, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": data_url, "detail": "high"}, + }, + { + "type": "text", + "text": USER_PROMPT, + }, + ], + }, + ], + ) + + raw = response.choices[0].message.content or "" + return self._parse_json(raw) + + @staticmethod + def _parse_json(text: str) -> dict[str, Any]: + # Strip markdown fences if present + match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL) + cleaned = match.group(1) if match else text.strip() + data = json.loads(cleaned) + if not isinstance(data, dict): + raise ValueError(f"Expected JSON object, got {type(data).__name__}") + return data + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + """Solver interface for TaskManager integration.""" + body = params.get("body", "") + if not body: + raise ValueError("Missing 'body' field (base64 image)") + image_bytes = base64.b64decode(body) + result = await self.recognize(image_bytes) + return {"text": json.dumps(result)} diff --git a/src/services/task_manager.py b/src/services/task_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..4dffba32a4513fcb97593006abdb1aab92fd6958 --- /dev/null +++ b/src/services/task_manager.py @@ -0,0 +1,92 @@ +"""In-memory async task manager for captcha solving tasks.""" + +from __future__ import annotations + +import asyncio +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from enum import Enum +from typing import Any, Protocol + +log = logging.getLogger(__name__) + + +class TaskStatus(str, Enum): + PROCESSING = "processing" + READY = "ready" + FAILED = "failed" + + +@dataclass +class Task: + id: str + type: str + params: dict[str, Any] + status: TaskStatus = TaskStatus.PROCESSING + solution: dict[str, Any] | None = None + error_code: str | None = None + error_description: str | None = None + created_at: datetime = field(default_factory=datetime.utcnow) + + +class Solver(Protocol): + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: ... + + +class TaskManager: + TASK_TTL = timedelta(minutes=10) + + def __init__(self) -> None: + self._tasks: dict[str, Task] = {} + self._solvers: dict[str, Solver] = {} + + def register_solver(self, task_type: str, solver: Solver) -> None: + self._solvers[task_type] = solver + + def create_task(self, task_type: str, params: dict[str, Any]) -> str: + self._cleanup_expired() + task_id = str(uuid.uuid4()) + task = Task(id=task_id, type=task_type, params=params) + self._tasks[task_id] = task + asyncio.create_task(self._process_task(task)) + return task_id + + def get_task(self, task_id: str) -> Task | None: + return self._tasks.get(task_id) + + def supported_types(self) -> list[str]: + return list(self._solvers.keys()) + + async def _process_task(self, task: Task) -> None: + solver = self._solvers.get(task.type) + if not solver: + task.status = TaskStatus.FAILED + task.error_code = "ERROR_TASK_NOT_SUPPORTED" + task.error_description = f"Task type '{task.type}' is not supported" + return + + try: + solution = await solver.solve(task.params) + task.solution = solution + task.status = TaskStatus.READY + log.info("Task %s completed successfully", task.id) + except Exception as exc: + task.status = TaskStatus.FAILED + task.error_code = "ERROR_CAPTCHA_UNSOLVABLE" + task.error_description = str(exc) + log.error("Task %s failed: %s", task.id, exc) + + def _cleanup_expired(self) -> None: + now = datetime.utcnow() + expired = [ + tid + for tid, t in self._tasks.items() + if now - t.created_at > self.TASK_TTL + ] + for tid in expired: + del self._tasks[tid] + + +task_manager = TaskManager() diff --git a/src/services/turnstile.py b/src/services/turnstile.py new file mode 100644 index 0000000000000000000000000000000000000000..89c2089c0dba23db0460d8b1ab1ff7caba925c9f --- /dev/null +++ b/src/services/turnstile.py @@ -0,0 +1,144 @@ +"""Cloudflare Turnstile solver using Playwright browser automation. + +Supports TurnstileTaskProxyless and TurnstileTaskProxylessM1 task types. +Visits the target page, interacts with the Turnstile widget, and extracts the token. +""" + +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from playwright.async_api import Browser, Playwright, async_playwright + +from ..core.config import Config + +log = logging.getLogger(__name__) + +_STEALTH_JS = """ +Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); +Object.defineProperty(navigator, 'languages', {get: () => ['en-US', 'en']}); +Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]}); +window.chrome = {runtime: {}, loadTimes: () => {}, csi: () => {}}; +""" + +_EXTRACT_TURNSTILE_TOKEN_JS = """ +() => { + // Check for Turnstile response input + const input = document.querySelector('[name="cf-turnstile-response"]') + || document.querySelector('input[name*="turnstile"]'); + if (input && input.value && input.value.length > 20) { + return input.value; + } + // Try the turnstile API + if (window.turnstile && typeof window.turnstile.getResponse === 'function') { + const resp = window.turnstile.getResponse(); + if (resp && resp.length > 20) return resp; + } + return null; +} +""" + + +class TurnstileSolver: + """Solves Cloudflare Turnstile tasks via headless Chromium.""" + + def __init__(self, config: Config, browser: Browser | None = None) -> None: + self._config = config + self._playwright: Playwright | None = None + self._browser: Browser | None = browser + self._owns_browser = browser is None + + async def start(self) -> None: + if self._browser is not None: + return + self._playwright = await async_playwright().start() + self._browser = await self._playwright.chromium.launch( + headless=self._config.browser_headless, + args=[ + "--disable-blink-features=AutomationControlled", + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + ], + ) + log.info("TurnstileSolver browser started") + + async def stop(self) -> None: + if self._owns_browser: + if self._browser: + await self._browser.close() + if self._playwright: + await self._playwright.stop() + log.info("TurnstileSolver stopped") + + async def solve(self, params: dict[str, Any]) -> dict[str, Any]: + website_url = params["websiteURL"] + website_key = params["websiteKey"] + + last_error: Exception | None = None + for attempt in range(self._config.captcha_retries): + try: + token = await self._solve_once(website_url, website_key) + return {"token": token} + except Exception as exc: + last_error = exc + log.warning( + "Turnstile attempt %d/%d failed: %s", + attempt + 1, + self._config.captcha_retries, + exc, + ) + if attempt < self._config.captcha_retries - 1: + await asyncio.sleep(2) + + raise RuntimeError( + f"Turnstile failed after {self._config.captcha_retries} attempts: {last_error}" + ) + + async def _solve_once(self, website_url: str, website_key: str) -> str: + assert self._browser is not None + + context = await self._browser.new_context( + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/131.0.0.0 Safari/537.36" + ), + viewport={"width": 1920, "height": 1080}, + locale="en-US", + ) + page = await context.new_page() + await page.add_init_script(_STEALTH_JS) + + try: + timeout_ms = self._config.browser_timeout * 1000 + await page.goto(website_url, wait_until="networkidle", timeout=timeout_ms) + + await page.mouse.move(400, 300) + await asyncio.sleep(1) + + # Try clicking the Turnstile checkbox + try: + iframe_element = page.frame_locator( + 'iframe[src*="challenges.cloudflare.com"], iframe[src*="turnstile"]' + ) + checkbox = iframe_element.locator( + 'input[type="checkbox"], .ctp-checkbox-label, label' + ) + await checkbox.click(timeout=8_000) + except Exception: + log.info("No Turnstile checkbox found, waiting for auto-solve") + + # Wait for the token to appear + for _ in range(15): + await asyncio.sleep(2) + token = await page.evaluate(_EXTRACT_TURNSTILE_TOKEN_JS) + if token: + log.info("Got Turnstile token (len=%d)", len(token)) + return token + + raise RuntimeError("Turnstile token not obtained within timeout") + finally: + await context.close() diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000000000000000000000000000000000000..a64454a50bad58bbe3f2e5d1062b283e8a0feb79 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,336 @@ +"""Tests for the YesCaptcha-compatible captcha solver API.""" + +from __future__ import annotations + +import importlib +import os +import sys +from pathlib import Path +from unittest.mock import AsyncMock + +import pytest + +PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(PROJECT_ROOT) not in sys.path: + _ = sys.path.insert(0, str(PROJECT_ROOT)) + +from fastapi.testclient import TestClient + + +def _load_app(*, client_key: str | None = None) -> TestClient: + """Reload modules with fresh env vars and return a test client.""" + os.environ.pop("CLIENT_KEY", None) + os.environ.setdefault("CAPTCHA_BASE_URL", "https://example.com/v1") + os.environ.setdefault("CAPTCHA_API_KEY", "test-key") + os.environ.setdefault("CAPTCHA_MODEL", "gpt-5.4") + os.environ.setdefault("CAPTCHA_MULTIMODAL_MODEL", "qwen3.5-2b") + os.environ.setdefault("BROWSER_HEADLESS", "true") + if client_key is not None: + os.environ["CLIENT_KEY"] = client_key + + config_mod = importlib.import_module("src.core.config") + routes_mod = importlib.import_module("src.api.routes") + task_mgr_mod = importlib.import_module("src.services.task_manager") + main_mod = importlib.import_module("src.main") + + _ = importlib.reload(config_mod) + _ = importlib.reload(task_mgr_mod) + _ = importlib.reload(routes_mod) + main_mod = importlib.reload(main_mod) + + return TestClient(getattr(main_mod, "app")) + + +ALL_TASK_TYPES = [ + "RecaptchaV3TaskProxyless", + "RecaptchaV3TaskProxylessM1", + "RecaptchaV3TaskProxylessM1S7", + "RecaptchaV3TaskProxylessM1S9", + "RecaptchaV3EnterpriseTask", + "RecaptchaV3EnterpriseTaskM1", + "NoCaptchaTaskProxyless", + "RecaptchaV2TaskProxyless", + "RecaptchaV2EnterpriseTaskProxyless", + "HCaptchaTaskProxyless", + "TurnstileTaskProxyless", + "TurnstileTaskProxylessM1", + "ImageToTextTask", + "ImageToTextTaskMuggle", + "ImageToTextTaskM1", + "HCaptchaClassification", + "ReCaptchaV2Classification", + "FunCaptchaClassification", + "AwsClassification", +] + + +def test_health_endpoint() -> None: + client = _load_app() + response = client.get("/api/v1/health") + assert response.status_code == 200 + body = response.json() + assert body["status"] == "ok" + assert "cloud_model" in body + assert "local_model" in body + + +def test_root_endpoint() -> None: + client = _load_app() + response = client.get("/") + assert response.status_code == 200 + body = response.json() + assert body["service"] == "captcha-solver" + assert body["version"] == "3.0.0" + assert "createTask" in body["endpoints"] + assert isinstance(body["supported_task_types"], list) + + +def test_root_endpoint_reports_all_supported_types() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + for task_type in ALL_TASK_TYPES: + mgr.register_solver(task_type, AsyncMock()) + response = client.get("/") + body = response.json() + assert set(body["supported_task_types"]) == set(ALL_TASK_TYPES) + + +def test_get_balance() -> None: + client = _load_app() + response = client.post("/getBalance", json={"clientKey": "any"}) + assert response.status_code == 200 + body = response.json() + assert body["errorId"] == 0 + assert body["balance"] > 0 + + +def test_get_balance_requires_client_key() -> None: + client = _load_app(client_key="secret") + bad = client.post("/getBalance", json={"clientKey": "wrong"}) + good = client.post("/getBalance", json={"clientKey": "secret"}) + assert bad.json()["errorId"] == 1 + assert good.json()["errorId"] == 0 + + +def test_create_task_unsupported_type() -> None: + client = _load_app() + response = client.post( + "/createTask", + json={ + "clientKey": "any", + "task": {"type": "UnsupportedType", "websiteURL": "https://example.com"}, + }, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_NOT_SUPPORTED" + + +def test_create_task_missing_fields_recaptcha_v3() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("RecaptchaV3TaskProxyless", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "RecaptchaV3TaskProxyless"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("RecaptchaV3TaskProxyless", None) + + +def test_create_task_missing_fields_recaptcha_v2() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("NoCaptchaTaskProxyless", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "NoCaptchaTaskProxyless"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("NoCaptchaTaskProxyless", None) + + +def test_create_task_missing_fields_hcaptcha() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("HCaptchaTaskProxyless", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "HCaptchaTaskProxyless"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("HCaptchaTaskProxyless", None) + + +def test_create_task_missing_fields_turnstile() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("TurnstileTaskProxyless", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "TurnstileTaskProxyless"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("TurnstileTaskProxyless", None) + + +def test_create_task_missing_fields_image() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("ImageToTextTask", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "ImageToTextTask"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("ImageToTextTask", None) + + +def test_create_task_missing_fields_classification() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mgr.register_solver("HCaptchaClassification", AsyncMock()) + try: + response = client.post( + "/createTask", + json={"clientKey": "any", "task": {"type": "HCaptchaClassification"}}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_TASK_PROPERTY_EMPTY" + finally: + mgr._solvers.pop("HCaptchaClassification", None) + + +def test_create_task_invalid_client_key() -> None: + client = _load_app(client_key="correct-key") + response = client.post( + "/createTask", + json={ + "clientKey": "wrong-key", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "key123", + }, + }, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_KEY_DOES_NOT_EXIST" + + +def test_get_task_result_not_found() -> None: + client = _load_app() + response = client.post( + "/getTaskResult", + json={"clientKey": "any", "taskId": "nonexistent-id"}, + ) + body = response.json() + assert body["errorId"] == 1 + assert body["errorCode"] == "ERROR_NO_SUCH_CAPCHA_ID" + + +def test_create_recaptcha_v3_task_accepted() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mock_solver = AsyncMock(return_value={"gRecaptchaResponse": "tok"}) + mock_solver.solve = mock_solver + mgr.register_solver("RecaptchaV3TaskProxyless", mock_solver) + try: + resp = client.post( + "/createTask", + json={ + "clientKey": "any", + "task": { + "type": "RecaptchaV3TaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "test-key", + }, + }, + ) + body = resp.json() + assert body["errorId"] == 0 + assert body["taskId"] is not None + finally: + mgr._solvers.pop("RecaptchaV3TaskProxyless", None) + + +def test_create_turnstile_task_accepted() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mock_solver = AsyncMock(return_value={"token": "cf-tok"}) + mock_solver.solve = mock_solver + mgr.register_solver("TurnstileTaskProxyless", mock_solver) + try: + resp = client.post( + "/createTask", + json={ + "clientKey": "any", + "task": { + "type": "TurnstileTaskProxyless", + "websiteURL": "https://example.com", + "websiteKey": "1x000", + }, + }, + ) + body = resp.json() + assert body["errorId"] == 0 + assert body["taskId"] is not None + finally: + mgr._solvers.pop("TurnstileTaskProxyless", None) + + +def test_create_classification_task_accepted() -> None: + client = _load_app() + task_mgr_mod = importlib.import_module("src.services.task_manager") + mgr = getattr(task_mgr_mod, "task_manager") + mock_solver = AsyncMock(return_value={"objects": [0, 3]}) + mock_solver.solve = mock_solver + mgr.register_solver("ReCaptchaV2Classification", mock_solver) + try: + resp = client.post( + "/createTask", + json={ + "clientKey": "any", + "task": { + "type": "ReCaptchaV2Classification", + "image": "aGVsbG8=", + "question": "Select traffic lights", + }, + }, + ) + body = resp.json() + assert body["errorId"] == 0 + assert body["taskId"] is not None + finally: + mgr._solvers.pop("ReCaptchaV2Classification", None) diff --git a/typings/fastapi/__init__.pyi b/typings/fastapi/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..ef6019598f23afba4d6a68aa122777d957c0d83c --- /dev/null +++ b/typings/fastapi/__init__.pyi @@ -0,0 +1,38 @@ +from typing import Any, Callable, Generic, TypeVar + +T = TypeVar("T") + + +class Header: + def __init__(self, default: Any = ...) -> None: ... + + +class HTTPException(Exception): + status_code: int + detail: Any + def __init__(self, status_code: int, detail: Any = ...) -> None: ... + + +class APIRouter: + def __init__(self, *, prefix: str = ...) -> None: ... + def get(self, path: str, *, response_model: Any = ...) -> Callable[[Callable[..., T]], Callable[..., T]]: ... + def post(self, path: str, *, response_model: Any = ...) -> Callable[[Callable[..., T]], Callable[..., T]]: ... + + +class _Client: + host: str + + +class _Headers: + def get(self, key: str, default: str = ...) -> str: ... + + +class Request: + headers: _Headers + client: _Client | None + + +class FastAPI: + def __init__(self, *, title: str = ..., version: str = ..., description: str = ..., lifespan: Any = ...) -> None: ... + def include_router(self, router: APIRouter) -> None: ... + def get(self, path: str) -> Callable[[Callable[..., T]], Callable[..., T]]: ... diff --git a/typings/fastapi/testclient.pyi b/typings/fastapi/testclient.pyi new file mode 100644 index 0000000000000000000000000000000000000000..b84186a22ce99da497e62a9fff51a10cb179ea99 --- /dev/null +++ b/typings/fastapi/testclient.pyi @@ -0,0 +1,12 @@ +from typing import Any + + +class Response: + status_code: int + def json(self) -> Any: ... + + +class TestClient: + def __init__(self, app: Any) -> None: ... + def get(self, url: str, **kwargs: Any) -> Response: ... + def post(self, url: str, **kwargs: Any) -> Response: ... diff --git a/typings/uvicorn/__init__.pyi b/typings/uvicorn/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..78bbb8e6ff582d6dad076a8d9d0f10bf95b823cd --- /dev/null +++ b/typings/uvicorn/__init__.pyi @@ -0,0 +1,4 @@ +from typing import Any + + +def run(app: str, *, host: str, port: int, reload: bool = ...) -> None: ...