captchabreaker / app.py
golyew's picture
Update app.py
23ea8c2 verified
import onnx
import onnxruntime as rt
from PIL import Image
import numpy as np
from tokenizer_base import Tokenizer
import gradio as gr
# Параметры модели
MODEL_FILE = "captcha.onnx"
IMG_SIZE = (32, 128)
CHARSET = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
tokenizer_base = Tokenizer(CHARSET)
# Надёжный препроцессинг
def preprocess_image(img, img_size):
img = img.convert("RGB")
img = img.resize(img_size, Image.BICUBIC)
img = np.array(img).astype(np.float32) / 255.0
img = np.transpose(img, (2, 0, 1)) # HWC → CHW
img = (img - 0.5) / 0.5
img = np.expand_dims(img, axis=0) # batch dim
return img
# softmax на numpy
def softmax(x, axis=-1):
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return e_x / e_x.sum(axis=axis, keepdims=True)
# Инициализация модели
def initialize_model(model_file):
onnx_model = onnx.load(model_file)
onnx.checker.check_model(onnx_model)
ort_session = rt.InferenceSession(model_file)
return ort_session
# Распознавание текста
def get_text(img_org):
x = preprocess_image(img_org, IMG_SIZE)
ort_inputs = {ort_session.get_inputs()[0].name: x}
logits = ort_session.run(None, ort_inputs)[0]
probs = softmax(logits, axis=-1)
preds, _ = tokenizer_base.decode(probs)
return preds[0]
# Загружаем модель один раз
ort_session = initialize_model(MODEL_FILE)
# Gradio интерфейс
demo = gr.Interface(
fn=get_text,
inputs=gr.Image(type="pil"),
outputs=gr.Textbox(),
title="Text Captcha Reader",
description="Распознавание текста на изображениях капчи.",
examples=[
["8000.png"],
["11JW29.png"],
["2a8486.jpg"],
["2nbcx.png"],
["000679.png"],
["000HU.png"],
["00Uga.png.jpg"],
["00bAQwhAZU.jpg"],
["00h57kYf.jpg"],
["0EoHdtVb.png"],
["0JS21.png"],
["0p98z.png"],
["10010.png"]
]
)
if __name__ == "__main__":
demo.launch()