Spaces:

Achilles13
/

Captcha_OCR

Running

Captcha_OCR / app.py

Steven1310

Initial Captcha OCR Space

24f7ea5 about 2 months ago

1.91 kB

	import torch
	import onnx
	import onnxruntime as rt
	from torchvision import transforms as T
	from pathlib import Path
	from PIL import Image
	from utils.tokenizer_base import Tokenizer
	import gradio as gr
	import io
	import base64
	import os

	# =====================
	# MODEL SETUP
	# =====================
	model_file = Path(__file__).parent / "models/model.onnx"
	if not model_file.exists():
	raise RuntimeError(f"Model not found at {model_file}")

	img_size = (32, 128)
	vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~"
	tokenizer = Tokenizer(vocab)

	transform = T.Compose([
	T.Resize(img_size, T.InterpolationMode.BICUBIC),
	T.ToTensor(),
	T.Normalize(0.5, 0.5),
	])

	session = rt.InferenceSession(str(model_file))


	def to_numpy(t):
	return t.detach().cpu().numpy()


	def infer(img: Image.Image):
	x = transform(img.convert("RGB")).unsqueeze(0)
	logits = session.run(None, {session.get_inputs()[0].name: to_numpy(x)})[0]
	probs = torch.tensor(logits).softmax(-1)
	preds, _ = tokenizer.decode(probs)
	return preds[0]


	# =====================
	# GRADIO FUNCTIONS
	# =====================
	def predict_image(img):
	return infer(img)


	def predict_base64(b64: str):
	img_bytes = base64.b64decode(b64)
	img = Image.open(io.BytesIO(img_bytes))
	return infer(img)


	# =====================
	# GRADIO APP (REQUIRED)
	# =====================
	with gr.Blocks(title="Captcha OCR") as demo:
	gr.Markdown("# Captcha OCR")
	gr.Markdown("OCR for captcha images (letters & numbers)")

	with gr.Tab("Image Upload"):
	img = gr.Image(type="pil")
	out = gr.Textbox()
	gr.Button("Predict").click(predict_image, img, out)

	with gr.Tab("Base64 API"):
	b64 = gr.Textbox(label="Base64 Image")
	out2 = gr.Textbox()
	gr.Button("Predict").click(predict_base64, b64, out2)

	demo.queue()
	demo.launch()