topdu commited on
Commit
2bea00a
·
verified ·
1 Parent(s): 1f46178

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -198
app.py CHANGED
@@ -1,198 +1,2 @@
1
- # @Author: OpenOCR
2
- # @Contact: 784990967@qq.com
3
- import os
4
- import gradio as gr # gradio==4.20.0
5
-
6
- os.environ['FLAGS_allocator_strategy'] = 'auto_growth'
7
- import cv2
8
- import numpy as np
9
- import json
10
- import time
11
- from PIL import Image
12
- from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
13
-
14
- def initialize_ocr(model_type, drop_score):
15
- return OpenOCR(mode=model_type, drop_score=drop_score)
16
-
17
- # Default model type
18
- model_type = 'mobile'
19
- drop_score = 0.4
20
- text_sys = initialize_ocr(model_type, drop_score)
21
-
22
- # warm up 5 times
23
- if True:
24
- img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
25
- for i in range(5):
26
- res = text_sys(img_numpy=img)
27
-
28
- font_path = './simfang.ttf'
29
- font_path = check_and_download_font(font_path)
30
-
31
- def main(input_image,
32
- model_type_select,
33
- det_input_size_textbox=960,
34
- rec_drop_score=0.4,
35
- mask_thresh=0.3,
36
- box_thresh=0.6,
37
- unclip_ratio=1.5,
38
- det_score_mode='slow'):
39
- global text_sys, model_type
40
-
41
- # Update OCR model if the model type changes
42
- if model_type_select != model_type:
43
- model_type = model_type_select
44
- text_sys = initialize_ocr(model_type, rec_drop_score)
45
-
46
- img = input_image[:, :, ::-1]
47
- starttime = time.time()
48
- results, time_dict, mask = text_sys(img_numpy=img,
49
- return_mask=True,
50
- det_input_size=int(det_input_size_textbox),
51
- thresh=mask_thresh,
52
- box_thresh=box_thresh,
53
- unclip_ratio=unclip_ratio,
54
- score_mode=det_score_mode)
55
- elapse = time.time() - starttime
56
- save_pred = json.dumps(results[0], ensure_ascii=False)
57
- image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
58
- boxes = [res['points'] for res in results[0]]
59
- txts = [res['transcription'] for res in results[0]]
60
- scores = [res['score'] for res in results[0]]
61
- draw_img = draw_ocr_box_txt(
62
- image,
63
- boxes,
64
- txts,
65
- scores,
66
- drop_score=rec_drop_score,
67
- font_path=font_path,
68
- )
69
- mask = mask[0, 0, :, :] > mask_thresh
70
- return save_pred, elapse, draw_img, mask.astype('uint8') * 255
71
-
72
- def get_all_file_names_including_subdirs(dir_path):
73
- all_file_names = []
74
-
75
- for root, dirs, files in os.walk(dir_path):
76
- for file_name in files:
77
- all_file_names.append(os.path.join(root, file_name))
78
-
79
- file_names_only = [os.path.basename(file) for file in all_file_names]
80
- return file_names_only
81
-
82
- def list_image_paths(directory):
83
- image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
84
-
85
- image_paths = []
86
-
87
- for root, dirs, files in os.walk(directory):
88
- for file in files:
89
- if file.lower().endswith(image_extensions):
90
- relative_path = os.path.relpath(os.path.join(root, file),
91
- directory)
92
- full_path = os.path.join(directory, relative_path)
93
- image_paths.append(full_path)
94
- image_paths = sorted(image_paths)
95
- return image_paths
96
-
97
- def find_file_in_current_dir_and_subdirs(file_name):
98
- for root, dirs, files in os.walk('.'):
99
- if file_name in files:
100
- relative_path = os.path.join(root, file_name)
101
- return relative_path
102
-
103
- e2e_img_example = list_image_paths('./OCR_e2e_img')
104
-
105
- if __name__ == '__main__':
106
- css = '.image-container img { width: 100%; max-height: 320px;}'
107
-
108
- with gr.Blocks(css=css) as demo:
109
- gr.HTML("""
110
- <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
111
- <p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>) <a href="https://github.com/Topdu/OpenOCR/tree/main?tab=readme-ov-file#quick-start">[Local Deployment]</a></p>""")
112
- with gr.Row():
113
- with gr.Column(scale=1):
114
- input_image = gr.Image(label='Input image',
115
- elem_classes=['image-container'])
116
-
117
- examples = gr.Examples(examples=e2e_img_example,
118
- inputs=input_image,
119
- label='Examples')
120
- downstream = gr.Button('Run')
121
-
122
- # 添加参数调节组件
123
- with gr.Column():
124
- with gr.Row():
125
- det_input_size_textbox = gr.Number(
126
- label='Detection Input Size',
127
- value=960,
128
- info='The longest side of the detection network input size, defaults to 960.')
129
- det_score_mode_dropdown = gr.Dropdown(
130
- ["slow", "fast"],
131
- value="slow",
132
- label="Det Score Mode",
133
- info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
134
- )
135
- with gr.Row():
136
- rec_drop_score_slider = gr.Slider(
137
- 0.0,
138
- 1.0,
139
- value=0.01,
140
- step=0.01,
141
- label="Recognition Drop Score",
142
- info="Recognition confidence threshold, default value is 0.01. Recognition results and corresponding text boxes lower than this threshold are discarded.")
143
- mask_thresh_slider = gr.Slider(
144
- 0.0,
145
- 1.0,
146
- value=0.3,
147
- step=0.01,
148
- label="Mask Threshold",
149
- info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
150
- with gr.Row():
151
- box_thresh_slider = gr.Slider(
152
- 0.0,
153
- 1.0,
154
- value=0.6,
155
- step=0.01,
156
- label="Box Threshold",
157
- info="Text Box Confidence Threshold, default value is 0.6, turn it down if there is text being missed.")
158
- unclip_ratio_slider = gr.Slider(
159
- 1.5,
160
- 2.0,
161
- value=1.5,
162
- step=0.05,
163
- label="Unclip Ratio",
164
- info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
165
-
166
- # 模型选择组件
167
- model_type_dropdown = gr.Dropdown(
168
- ['mobile', 'server'],
169
- value='mobile',
170
- label='Model Type',
171
- info='Select the type of OCR model: high efficiency model mobile, high accuracy model server.'
172
- )
173
-
174
- with gr.Column(scale=1):
175
- img_mask = gr.Image(label='mask',
176
- interactive=False,
177
- elem_classes=['image-container'])
178
- img_output = gr.Image(label=' ',
179
- interactive=False,
180
- elem_classes=['image-container'])
181
-
182
- output = gr.Textbox(label='Result')
183
- confidence = gr.Textbox(label='Latency')
184
-
185
- downstream.click(fn=main,
186
- inputs=[
187
- input_image, model_type_dropdown, det_input_size_textbox, rec_drop_score_slider,
188
- mask_thresh_slider, box_thresh_slider,
189
- unclip_ratio_slider, det_score_mode_dropdown
190
- ],
191
- outputs=[
192
- output,
193
- confidence,
194
- img_output,
195
- img_mask,
196
- ])
197
-
198
- demo.launch(share=True)
 
1
+ from openocr.demo_gradio import launch_demo
2
+ launch_demo()