import torch import onnx import onnxruntime as rt from torchvision import transforms as T from PIL import Image from tokenizer_base import Tokenizer import gradio as gr model_file = "captcha.onnx" img_size = (32, 128) charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" tokenizer_base = Tokenizer(charset) def get_transform(img_size): transforms = [ T.Resize(img_size, T.InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(0.5, 0.5) ] return T.Compose(transforms) def to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() def initialize_model(model_file): transform = get_transform(img_size) onnx_model = onnx.load(model_file) onnx.checker.check_model(onnx_model) ort_session = rt.InferenceSession(model_file) return transform, ort_session # Core OCR function def get_text(img_org): x = transform(img_org.convert('RGB')).unsqueeze(0) ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} logits = ort_session.run(None, ort_inputs)[0] probs = torch.tensor(logits).softmax(-1) preds, _ = tokenizer_base.decode(probs) return preds[0] # Load model transform, ort_session = initialize_model(model_file=model_file) # ✅ Use gr.Interface (not Blocks) to support external API call iface = gr.Interface( fn=get_text, inputs=gr.Image(type="pil"), outputs="text", title="Text Captcha Solver", description="API & UI nhận diện Captcha sử dụng mô hình ONNX" ) iface.launch(enable_queue=True) # <-- BẮT BUỘC để gọi được từ WinForms