File size: 3,110 Bytes
f04e83b
 
 
 
 
225f368
f04e83b
 
 
 
 
 
 
225f368
 
 
 
 
 
f04e83b
 
 
 
 
 
225f368
 
f04e83b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from flask import Flask, request, jsonify
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
import io
import os
import requests

# ==========================================
# Initialize Flask App
# ==========================================
app = Flask(__name__)

# ==========================================
# Fix Hugging Face cache directory permission issue
# ==========================================
os.environ["HF_HOME"] = "/app/cache"
os.makedirs("/app/cache", exist_ok=True)

# ==========================================
# Load Model and Processor
# ==========================================
MODEL_NAME = "anuashok/ocr-captcha-v3"
print("🚀 Loading model...")

processor = TrOCRProcessor.from_pretrained(MODEL_NAME, cache_dir="/app/cache")
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME, cache_dir="/app/cache")

# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

print("✅ Model loaded successfully on", device)

# ==========================================
# API Endpoint (Root)
# ==========================================
@app.route("/", methods=["GET", "POST"])
def root():
    """
    POST / -> Send image file
    GET /?url=image_url -> Send image URL
    Returns: JSON with recognized text
    """
    try:
        # ---------------------------
        # Handle GET with ?url=
        # ---------------------------
        if request.method == "GET":
            image_url = request.args.get("url")
            if not image_url:
                return jsonify({
                    "message": "OCR Captcha API running. Use POST (file) or GET ?url=image_url."
                })
            try:
                img_data = requests.get(image_url, timeout=10).content
                image = Image.open(io.BytesIO(img_data)).convert("RGB")
            except Exception as e:
                return jsonify({"error": f"Failed to fetch image from URL: {str(e)}"}), 400

        # ---------------------------
        # Handle POST with file upload
        # ---------------------------
        elif request.method == "POST":
            if "file" not in request.files:
                return jsonify({"error": "No file provided"}), 400

            file = request.files["file"]
            image = Image.open(io.BytesIO(file.read())).convert("RGB")

        # ---------------------------
        # Process Image with Model
        # ---------------------------
        pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
        with torch.no_grad():
            generated_ids = model.generate(pixel_values)
            text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return jsonify({"text": text})

    except Exception as e:
        return jsonify({"error": str(e)}), 500


# ==========================================
# Run Server
# ==========================================
if __name__ == "__main__":
    # Host on 0.0.0.0 so Hugging Face Space can reach it
    app.run(host="0.0.0.0", port=7860)