File size: 2,164 Bytes
1603bbc
 
 
31b5da1
1603bbc
 
 
da8aa49
7528682
 
 
 
1603bbc
faca012
31b5da1
faca012
31b5da1
 
7528682
31b5da1
7528682
31b5da1
1603bbc
31b5da1
 
 
 
1603bbc
7528682
1603bbc
 
 
 
31b5da1
1603bbc
7528682
1603bbc
7528682
31b5da1
1603bbc
faca012
7528682
faca012
1603bbc
7528682
 
1603bbc
faca012
7528682
 
 
23ea8c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import onnx
import onnxruntime as rt
from PIL import Image
import numpy as np
from tokenizer_base import Tokenizer
import gradio as gr

# Параметры модели
MODEL_FILE = "captcha.onnx"
IMG_SIZE = (32, 128)
CHARSET = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
tokenizer_base = Tokenizer(CHARSET)

# Надёжный препроцессинг
def preprocess_image(img, img_size):
    img = img.convert("RGB")
    img = img.resize(img_size, Image.BICUBIC)
    img = np.array(img).astype(np.float32) / 255.0
    img = np.transpose(img, (2, 0, 1))  # HWC → CHW
    img = (img - 0.5) / 0.5
    img = np.expand_dims(img, axis=0)   # batch dim
    return img

# softmax на numpy
def softmax(x, axis=-1):
    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return e_x / e_x.sum(axis=axis, keepdims=True)

# Инициализация модели
def initialize_model(model_file):
    onnx_model = onnx.load(model_file)
    onnx.checker.check_model(onnx_model)
    ort_session = rt.InferenceSession(model_file)
    return ort_session

# Распознавание текста
def get_text(img_org):
    x = preprocess_image(img_org, IMG_SIZE)
    ort_inputs = {ort_session.get_inputs()[0].name: x}
    logits = ort_session.run(None, ort_inputs)[0]
    probs = softmax(logits, axis=-1)
    preds, _ = tokenizer_base.decode(probs)
    return preds[0]

# Загружаем модель один раз
ort_session = initialize_model(MODEL_FILE)

# Gradio интерфейс
demo = gr.Interface(
    fn=get_text,
    inputs=gr.Image(type="pil"),
    outputs=gr.Textbox(),
    title="Text Captcha Reader",
    description="Распознавание текста на изображениях капчи.",
    examples=[
        ["8000.png"],
        ["11JW29.png"],
        ["2a8486.jpg"],
        ["2nbcx.png"],
        ["000679.png"],
        ["000HU.png"],
        ["00Uga.png.jpg"],
        ["00bAQwhAZU.jpg"],
        ["00h57kYf.jpg"],
        ["0EoHdtVb.png"],
        ["0JS21.png"],
        ["0p98z.png"],
        ["10010.png"]
    ]
)

if __name__ == "__main__":
    demo.launch()