vanh99 commited on
Commit
60e65f6
·
verified ·
1 Parent(s): e9a1406

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import onnx
3
+ import onnxruntime as rt
4
+ from torchvision import transforms as T
5
+ from pathlib import Path
6
+ from PIL import Image
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ import os
10
+ import gradio as gr
11
+
12
+ from utils.tokenizer_base import Tokenizer
13
+
14
+
15
+ # Download the model from Hugging Face Hub
16
+ hf_token = os.environ.get("HF_TOKEN")
17
+ cwd = os.getcwd()
18
+
19
+ # Tải file model.onnx từ repo private
20
+ model_path = hf_hub_download(
21
+ repo_id="vanh99/GRU-model",
22
+ filename="model.onnx",
23
+ use_auth_token=hf_token
24
+ )
25
+
26
+ print("Model path:", model_path)
27
+
28
+ # Define the image size and vocabulary
29
+ img_size = (32, 128)
30
+ vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
31
+
32
+ # Initialize the tokenizer
33
+ tokenizer = Tokenizer(vocab)
34
+
35
+
36
+ def to_numpy(tensor):
37
+ """Convert tensor to numpy."""
38
+ return (
39
+ tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
40
+ )
41
+
42
+
43
+ def get_transform(img_size):
44
+ """Preprocess the input image."""
45
+ transforms = []
46
+ transforms.extend(
47
+ [
48
+ T.Resize(img_size, T.InterpolationMode.BICUBIC),
49
+ T.ToTensor(),
50
+ T.Normalize(0.5, 0.5),
51
+ ]
52
+ )
53
+ return T.Compose(transforms)
54
+
55
+
56
+ def load_model(model_file):
57
+ """Load the model and return the transform function."""
58
+ transform = get_transform(img_size)
59
+
60
+ onnx_model = onnx.load(model_file)
61
+ onnx.checker.check_model(onnx_model)
62
+
63
+ s = rt.InferenceSession(model_file)
64
+ return transform, s
65
+
66
+
67
+ # Load the model
68
+ transform, s = load_model(model_file=model_file)
69
+
70
+
71
+ def process(img: Image.Image):
72
+ """Predict the text from the input image."""
73
+ x = transform(img.convert("RGB")).unsqueeze(0)
74
+
75
+ ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
76
+ logits = s.run(None, ort_inputs)[0]
77
+ probs = torch.tensor(logits).softmax(-1)
78
+ preds, probs = tokenizer.decode(probs)
79
+
80
+ return preds[0]
81
+
82
+
83
+ iface = gr.Interface(
84
+ process,
85
+ gr.Image(type="pil", label="Input Image"),
86
+ gr.Textbox(label="Predicted Text"),
87
+ title="OCR for CAPTCHA",
88
+ description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
89
+ examples=[
90
+ "examples/1.png",
91
+ "examples/2.jpg",
92
+ "examples/3.jpg",
93
+ "examples/4.png",
94
+ "examples/5.png",
95
+ ],
96
+ )
97
+
98
+ if __name__ == "__main__":
99
+ iface.launch()