OCR / app.py
Inayatgaming's picture
Update app.py
225f368 verified
from flask import Flask, request, jsonify
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
import io
import os
import requests
# ==========================================
# Initialize Flask App
# ==========================================
app = Flask(__name__)
# ==========================================
# Fix Hugging Face cache directory permission issue
# ==========================================
os.environ["HF_HOME"] = "/app/cache"
os.makedirs("/app/cache", exist_ok=True)
# ==========================================
# Load Model and Processor
# ==========================================
MODEL_NAME = "anuashok/ocr-captcha-v3"
print("🚀 Loading model...")
processor = TrOCRProcessor.from_pretrained(MODEL_NAME, cache_dir="/app/cache")
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME, cache_dir="/app/cache")
# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
print("✅ Model loaded successfully on", device)
# ==========================================
# API Endpoint (Root)
# ==========================================
@app.route("/", methods=["GET", "POST"])
def root():
"""
POST / -> Send image file
GET /?url=image_url -> Send image URL
Returns: JSON with recognized text
"""
try:
# ---------------------------
# Handle GET with ?url=
# ---------------------------
if request.method == "GET":
image_url = request.args.get("url")
if not image_url:
return jsonify({
"message": "OCR Captcha API running. Use POST (file) or GET ?url=image_url."
})
try:
img_data = requests.get(image_url, timeout=10).content
image = Image.open(io.BytesIO(img_data)).convert("RGB")
except Exception as e:
return jsonify({"error": f"Failed to fetch image from URL: {str(e)}"}), 400
# ---------------------------
# Handle POST with file upload
# ---------------------------
elif request.method == "POST":
if "file" not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files["file"]
image = Image.open(io.BytesIO(file.read())).convert("RGB")
# ---------------------------
# Process Image with Model
# ---------------------------
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
with torch.no_grad():
generated_ids = model.generate(pixel_values)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return jsonify({"text": text})
except Exception as e:
return jsonify({"error": str(e)}), 500
# ==========================================
# Run Server
# ==========================================
if __name__ == "__main__":
# Host on 0.0.0.0 so Hugging Face Space can reach it
app.run(host="0.0.0.0", port=7860)