Captcha_OCR / app.py
Steven1310
Initial Captcha OCR Space
24f7ea5
import torch
import onnx
import onnxruntime as rt
from torchvision import transforms as T
from pathlib import Path
from PIL import Image
from utils.tokenizer_base import Tokenizer
import gradio as gr
import io
import base64
import os
# =====================
# MODEL SETUP
# =====================
model_file = Path(__file__).parent / "models/model.onnx"
if not model_file.exists():
raise RuntimeError(f"Model not found at {model_file}")
img_size = (32, 128)
vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
tokenizer = Tokenizer(vocab)
transform = T.Compose([
T.Resize(img_size, T.InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(0.5, 0.5),
])
session = rt.InferenceSession(str(model_file))
def to_numpy(t):
return t.detach().cpu().numpy()
def infer(img: Image.Image):
x = transform(img.convert("RGB")).unsqueeze(0)
logits = session.run(None, {session.get_inputs()[0].name: to_numpy(x)})[0]
probs = torch.tensor(logits).softmax(-1)
preds, _ = tokenizer.decode(probs)
return preds[0]
# =====================
# GRADIO FUNCTIONS
# =====================
def predict_image(img):
return infer(img)
def predict_base64(b64: str):
img_bytes = base64.b64decode(b64)
img = Image.open(io.BytesIO(img_bytes))
return infer(img)
# =====================
# GRADIO APP (REQUIRED)
# =====================
with gr.Blocks(title="Captcha OCR") as demo:
gr.Markdown("# Captcha OCR")
gr.Markdown("OCR for captcha images (letters & numbers)")
with gr.Tab("Image Upload"):
img = gr.Image(type="pil")
out = gr.Textbox()
gr.Button("Predict").click(predict_image, img, out)
with gr.Tab("Base64 API"):
b64 = gr.Textbox(label="Base64 Image")
out2 = gr.Textbox()
gr.Button("Predict").click(predict_base64, b64, out2)
demo.queue()
demo.launch()