Chung-Fan commited on
Commit
65b330c
·
1 Parent(s): f62fc24

Add OCR app

Browse files
Files changed (2) hide show
  1. app.py +45 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from src.model import CRNN
3
+ from PIL import Image as PILImage
4
+ import torchvision.transforms as transforms
5
+ import gradio as gr
6
+
7
+ # Load CRNN model
8
+ model = CRNN(img_height=32, img_width=100, img_channel=1, num_class=37, rnn_hidden=256)
9
+ model.load_state_dict(torch.load("crnn_gpu.pt", map_location="cpu"))
10
+ model.eval()
11
+
12
+ alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
13
+
14
+ def ctc_decode(preds):
15
+ preds = preds.argmax(2).transpose(1,0).contiguous().view(-1)
16
+ decoded = []
17
+ prev_idx = -1
18
+ for idx in preds:
19
+ if idx != prev_idx and idx != 0:
20
+ decoded.append(alphabet[idx-1])
21
+ prev_idx = idx
22
+ return ''.join(decoded)
23
+
24
+ transform = transforms.Compose([
25
+ transforms.Grayscale(),
26
+ transforms.Resize((32,100)),
27
+ transforms.ToTensor(),
28
+ transforms.Normalize((0.5,), (0.5,))
29
+ ])
30
+
31
+ def ocr(image: PILImage.Image):
32
+ img_tensor = transform(image).unsqueeze(0)
33
+ with torch.no_grad():
34
+ preds = model(img_tensor)
35
+ text = ctc_decode(preds)
36
+ return text
37
+
38
+ iface = gr.Interface(
39
+ fn=ocr,
40
+ inputs=gr.Image(type="pil", interactive=True, label="Upload and crop image"),
41
+ outputs="text",
42
+ title="CRNN OCR"
43
+ )
44
+
45
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ gradio
4
+ Pillow