Spaces:

doniramdani820
/

recaptcha-solver-3x3

Paused

App Files Files Community

doniramdani820 commited on Nov 7, 2025

Commit

a786ad9

verified ·

1 Parent(s): 67bcaa5

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +31 -0
README.md +142 -10
app.py +398 -0
best.onnx +3 -0
data.yaml +25 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Dockerfile for reCAPTCHA 3x3 Detection Space
+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first (for caching)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code and models
+COPY app.py .
+COPY best.onnx .
+COPY data.yaml .
+# Expose port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import requests; requests.get('http://localhost:7860/health')"
+# Run the application
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,142 @@
----
-title: Recaptcha Solver 3x3
-emoji: 😻
-colorFrom: yellow
-colorTo: blue
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: reCAPTCHA Solver 3x3
+emoji: 🤖
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+license: mit
+---
+# 🤖 reCAPTCHA 3x3 Detection API
+Solves 3x3 grid reCAPTCHA challenges using YOLO object detection.
+## 🎯 Features
+- ⚡ Fast inference (2-3s per request)
+- 🎯 High accuracy with YOLO detection
+- 🔄 Auto-lowering confidence for better detection
+- 📊 Built-in metrics and monitoring
+- 🌐 CORS enabled for browser extensions
+## 📡 API Endpoints
+### POST /predict
+Predict which tiles to click in a 3x3 grid.
+**Request:**
+```json
+{
+  "image": "data:image/png;base64,iVBOR...",
+  "challenge_title": "crosswalks"
+}
+```
+**Response:**
+```json
+{
+  "success": true,
+  "tiles_to_click": [2, 5, 8],
+  "num_detections": 12,
+  "confidence_used": 0.20,
+  "latency_s": 2.341
+}
+```
+### GET /health
+Check API health and statistics.
+**Response:**
+```json
+{
+  "status": "healthy",
+  "model_loaded": true,
+  "requests_total": 42,
+  "requests_successful": 40,
+  "avg_latency_s": 2.5
+}
+```
+## 🚀 Usage
+### From Browser Extension
+```javascript
+const response = await fetch('https://YOUR-SPACE.hf.space/predict', {
+    method: 'POST',
+    headers: {'Content-Type': 'application/json'},
+    body: JSON.stringify({
+        image: screenshotBase64,
+        challenge_title: 'crosswalks'
+    })
+});
+const result = await response.json();
+console.log('Tiles to click:', result.tiles_to_click);
+```
+### From Python
+```python
+import requests
+import base64
+with open('screenshot.png', 'rb') as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+response = requests.post('https://YOUR-SPACE.hf.space/predict', json={
+    'image': f'data:image/png;base64,{image_b64}',
+    'challenge_title': 'crosswalks'
+})
+print(response.json())
+```
+## 🔧 Model Details
+- **Architecture:** YOLOv8 Detection
+- **Input:** 640x640 RGB image
+- **Output:** Bounding boxes with confidence scores
+- **Classes:** Multiple object types (vehicles, crosswalks, traffic lights, etc.)
+## 📊 Performance
+- **Cold start:** ~15 seconds (first request)
+- **Warm inference:** 2-3 seconds per request
+- **Memory usage:** ~1.5GB
+- **Concurrent requests:** 2-3 simultaneous
+## 🎯 Tile Mapping
+The API maps detected objects to a 3x3 grid:
+```
+[0] [1] [2]
+[3] [4] [5]
+[6] [7] [8]
+```
+Objects are mapped based on their center point within the grid.
+## 🛡️ Rate Limits
+- **Free tier:** 100 requests per hour
+- **Timeout:** 30 seconds per request
+- **Max image size:** 10MB
+## 📝 License
+MIT License - See LICENSE file for details
+## 🔗 Related
+- [4x4 Segmentation Space](https://huggingface.co/spaces/YOUR-USERNAME/recaptcha-solver-4x4)
+- [Browser Extension](https://github.com/YOUR-REPO)
+---
+**Note:** This API is for educational and research purposes only. Use responsibly and respect website terms of service.

app.py ADDED Viewed

	@@ -0,0 +1,398 @@

+"""
+reCAPTCHA 3x3 Detection API - Hugging Face Space
+Lightweight API for 3x3 grid challenge solving using YOLO detection
+"""
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import cv2
+import numpy as np
+import onnxruntime as ort
+import yaml
+import base64
+import io
+from PIL import Image
+import time
+import os
+from functools import lru_cache
+app = Flask(__name__)
+CORS(app)  # Enable CORS for browser extension
+# Global variables
+model_session = None
+class_names = None
+model_load_time = 0
+request_count = 0
+successful_count = 0
+failed_count = 0
+total_latency = 0.0
+# Configuration
+MODEL_FOLDER = "."  # Models in root folder (no subfolder)
+CONFIDENCE_THRESHOLD = 0.20
+INPUT_SIZE = 640
+print("="*60)
+print("🚀 reCAPTCHA 3x3 Detection API")
+print("="*60)
+@lru_cache(maxsize=1)
+def load_model():
+    """Load ONNX model and class names (cached)"""
+    global model_session, class_names, model_load_time
+    start_time = time.time()
+    print(f"📦 Loading model from {MODEL_FOLDER}/...")
+    try:
+        # Load ONNX model
+        model_path = os.path.join(MODEL_FOLDER, "best.onnx")
+        model_session = ort.InferenceSession(
+            model_path,
+            providers=['CPUExecutionProvider']
+        )
+        print(f"   ✓ Model loaded: {model_path}")
+        # Load class names
+        data_yaml_path = os.path.join(MODEL_FOLDER, "data.yaml")
+        with open(data_yaml_path, 'r') as f:
+            data = yaml.safe_load(f)
+            class_names = data['names']
+        print(f"   ✓ Classes loaded: {len(class_names)} classes")
+        model_load_time = time.time() - start_time
+        print(f"   ⏱️  Load time: {model_load_time:.2f}s")
+        return True
+    except Exception as e:
+        print(f"   ✗ Error loading model: {e}")
+        return False
+def base64_to_image(base64_string):
+    """Convert base64 string to OpenCV image"""
+    try:
+        # Remove data URL prefix if present
+        if ',' in base64_string:
+            base64_string = base64_string.split(',')[1]
+        # Decode base64
+        image_data = base64.b64decode(base64_string)
+        # Convert to PIL Image
+        pil_image = Image.open(io.BytesIO(image_data))
+        # Convert to OpenCV format
+        opencv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+        return opencv_image
+    except Exception as e:
+        print(f"Error converting base64 to image: {e}")
+        return None
+def preprocess_image(img):
+    """Preprocess image for YOLO model"""
+    orig_h, orig_w = img.shape[:2]
+    # Resize to 640x640
+    img_resized = cv2.resize(img, (INPUT_SIZE, INPUT_SIZE))
+    # Convert BGR to RGB
+    img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
+    # Normalize and transpose
+    img_normalized = img_rgb.astype(np.float32) / 255.0
+    img_transposed = np.transpose(img_normalized, (2, 0, 1))
+    # Add batch dimension
+    img_batch = np.expand_dims(img_transposed, axis=0)
+    return img_batch, orig_w, orig_h
+def run_inference(img_batch):
+    """Run ONNX inference"""
+    global model_session
+    input_name = model_session.get_inputs()[0].name
+    outputs = model_session.run(None, {input_name: img_batch})
+    return outputs
+def parse_detections(outputs, orig_w, orig_h, conf_threshold=0.20):
+    """Parse YOLO detection output"""
+    if not outputs or len(outputs) == 0:
+        return []
+    output = outputs[0]
+    # YOLOv8 detection format: [batch, num_classes + 4, num_anchors]
+    # Expected: [1, 18, 8400] → Need [8400, 18]
+    output = output[0]  # Remove batch: [18, 8400] or [8400, 18]
+    # Ensure correct shape: [num_predictions, num_classes + 4]
+    if output.shape[0] < output.shape[1]:  # If [18, 8400]
+        output = output.T  # Transpose to [8400, 18]
+    num_classes = output.shape[1] - 4
+    results = []
+    for detection in output:
+        # Extract box coordinates
+        x_center, y_center, width, height = detection[:4]
+        # Get class scores
+        class_scores = detection[4:]
+        class_id = np.argmax(class_scores)
+        confidence = class_scores[class_id]
+        if confidence < conf_threshold:
+            continue
+        # Scale to original image
+        x_center = x_center * orig_w / INPUT_SIZE
+        y_center = y_center * orig_h / INPUT_SIZE
+        width = width * orig_w / INPUT_SIZE
+        height = height * orig_h / INPUT_SIZE
+        # Convert to x1, y1, x2, y2
+        x1 = int(x_center - width / 2)
+        y1 = int(y_center - height / 2)
+        x2 = int(x_center + width / 2)
+        y2 = int(y_center + height / 2)
+        # VALIDATION: Skip if class_id is out of range
+        if class_names and class_id >= len(class_names):
+            print(f"   ⚠ Skipping detection with invalid class_id={class_id} (max={len(class_names)-1})")
+            continue
+        results.append({
+            'box': [x1, y1, x2, y2],
+            'center': [x_center, y_center],
+            'confidence': float(confidence),
+            'class_id': int(class_id),
+            'class_name': class_names[class_id] if class_names else str(class_id)
+        })
+    # Apply NMS
+    if len(results) > 0:
+        boxes = [r['box'] for r in results]
+        scores = [r['confidence'] for r in results]
+        indices = cv2.dnn.NMSBoxes(boxes, scores, conf_threshold, 0.45)
+        if len(indices) > 0:
+            results = [results[i] for i in indices.flatten()]
+    return results
+def normalize_text(text):
+    """Normalize challenge text"""
+    text = text.lower().strip()
+    # Remove articles "a " and "the " for better matching
+    text = text.replace('a ', '').replace('the ', '')
+    # Singular/plural mapping
+    mappings = {
+        'bicycle': 'bicycles',
+        'bus': 'buses',
+        'car': 'cars',
+        'fire hydrant': 'fire hydrant',  # Keep singular! class name is "a fire hydrant"
+        'motorcycle': 'motorcycles',
+        'traffic light': 'traffic lights',
+        'crosswalk': 'crosswalks',
+        'vehicle': 'vehicles',
+        'bridge': 'bridges',
+        'boat': 'boats',
+        'taxi': 'taxis',
+        'stair': 'stairs',
+        'chimney': 'chimneys',
+        'parking meter': 'parking meters'
+    }
+    for singular, plural in mappings.items():
+        if singular in text:
+            return plural
+    return text
+def get_tiles_to_click(detections, challenge_title, img_width, img_height, max_tiles=3):
+    """Map detections to 3x3 tiles"""
+    if not detections or not challenge_title:
+        return []
+    # Normalize challenge title
+    normalized_title = normalize_text(challenge_title)
+    # Calculate tile dimensions
+    tile_width = img_width / 3
+    tile_height = img_height / 3
+    # Map detections to tiles
+    tile_scores = {}
+    for det in detections:
+        det_class = det['class_name'].lower()
+        # Also remove articles from detection class for consistent matching
+        det_class = det_class.replace('a ', '').replace('the ', '')
+        # Check if detection matches challenge
+        if normalized_title not in det_class and det_class not in normalized_title:
+            continue
+        # Get center point
+        center_x, center_y = det['center']
+        # Determine which tile
+        col = int(center_x // tile_width)
+        row = int(center_y // tile_height)
+        # Clamp to valid range
+        col = max(0, min(2, col))
+        row = max(0, min(2, row))
+        # Calculate tile ID (0-8, left to right, top to bottom)
+        tile_id = row * 3 + col
+        # Store best score for this tile
+        if tile_id not in tile_scores or det['confidence'] > tile_scores[tile_id]:
+            tile_scores[tile_id] = det['confidence']
+    # Sort by confidence and take top N
+    sorted_tiles = sorted(tile_scores.items(), key=lambda x: x[1], reverse=True)
+    tiles_to_click = [tile_id for tile_id, _ in sorted_tiles[:max_tiles]]
+    return sorted(tiles_to_click)
+def auto_lower_confidence(img_batch, orig_w, orig_h, challenge_title, img_width, img_height):
+    """Auto-lower confidence if < 3 tiles found"""
+    conf_thresholds = [0.20, 0.15, 0.10, 0.05]
+    for conf in conf_thresholds:
+        outputs = run_inference(img_batch)
+        detections = parse_detections(outputs, orig_w, orig_h, conf_threshold=conf)
+        tiles = get_tiles_to_click(detections, challenge_title, img_width, img_height, max_tiles=3)
+        if len(tiles) >= 3:
+            print(f"   ✓ Got {len(tiles)} tiles at conf={conf}")
+            return tiles, len(detections), conf
+    # Return what we have
+    print(f"   ⚠ Could only find {len(tiles)} tiles")
+    return tiles, len(detections), conf_thresholds[-1]
+@app.route('/health', methods=['GET'])
+def health():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'healthy',
+        'model_loaded': model_session is not None,
+        'model_load_time_s': model_load_time,
+        'requests_total': request_count,
+        'requests_successful': successful_count,
+        'requests_failed': failed_count,
+        'avg_latency_s': total_latency / max(request_count, 1)
+    })
+@app.route('/predict', methods=['POST'])
+def predict():
+    """Main prediction endpoint"""
+    global request_count, successful_count, failed_count, total_latency
+    start_time = time.time()
+    request_count += 1
+    try:
+        # Parse request
+        data = request.json
+        if not data or 'image' not in data:
+            failed_count += 1
+            return jsonify({'error': 'Missing image data'}), 400
+        challenge_title = data.get('challenge_title', '')
+        # Convert base64 to image
+        img = base64_to_image(data['image'])
+        if img is None:
+            failed_count += 1
+            return jsonify({'error': 'Invalid image data'}), 400
+        img_height, img_width = img.shape[:2]
+        # Preprocess
+        img_batch, orig_w, orig_h = preprocess_image(img)
+        # Predict with auto-lower confidence
+        tiles, num_detections, used_conf = auto_lower_confidence(
+            img_batch, orig_w, orig_h, challenge_title, img_width, img_height
+        )
+        # Calculate latency
+        latency = time.time() - start_time
+        total_latency += latency
+        successful_count += 1
+        return jsonify({
+            'success': True,
+            'tiles_to_click': tiles,
+            'num_detections': num_detections,
+            'confidence_used': used_conf,
+            'latency_s': round(latency, 3),
+            'challenge_title': challenge_title
+        })
+    except Exception as e:
+        failed_count += 1
+        latency = time.time() - start_time
+        total_latency += latency
+        print(f"Error in predict: {e}")
+        import traceback
+        traceback.print_exc()
+        return jsonify({
+            'success': False,
+            'error': str(e),
+            'latency_s': round(latency, 3)
+        }), 500
+@app.route('/', methods=['GET'])
+def index():
+    """Root endpoint"""
+    return jsonify({
+        'name': 'reCAPTCHA 3x3 Detection API',
+        'version': '1.0.0',
+        'model': '3X3 YOLO Detection',
+        'endpoints': {
+            'POST /predict': 'Predict tiles to click',
+            'GET /health': 'Health check',
+            'GET /': 'This page'
+        }
+    })
+if __name__ == '__main__':
+    print("\n🚀 Starting 3x3 Detection API...")
+    # Load model on startup
+    if load_model():
+        print("✅ Model loaded successfully!\n")
+    else:
+        print("❌ Failed to load model!\n")
+        exit(1)
+    # Run Flask app
+    app.run(host='0.0.0.0', port=7860, debug=False)

best.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6ab75da65bbc50b0b12f8fd0778fbcb076b28999fe5229a90532f3d16672f31
+size 44752029

data.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+names:
+- a fire hydrant
+- bicycles
+- bridge
+- bus
+- car
+- chimney
+- crosswalk
+- ladder
+- motorcycle
+- other
+- parking meters
+- tractor
+- traffic light
+- tree
+nc: 14
+roboflow:
+  license: CC BY 4.0
+  project: rere-6ebeg
+  url: https://universe.roboflow.com/rereeee/rere-6ebeg/dataset/6
+  version: 6
+  workspace: rereeee
+test: ../test/images
+train: ../train/images
+val: ../valid/images

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask==3.0.0
+flask-cors==4.0.0
+opencv-python-headless==4.8.1.78
+onnxruntime
+pyyaml==6.0.1
+Pillow==10.1.0
+numpy==1.24.3