File size: 3,712 Bytes
6d6af66 8b775e5 6d6af66 db7a2e8 8b775e5 4589caf ebb9438 8b775e5 db7a2e8 8b775e5 6d6af66 8b775e5 6d6af66 db7a2e8 6d6af66 db7a2e8 6d6af66 db7a2e8 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 6d6af66 8b775e5 fa63dda 7a086ec 6d6af66 8b775e5 64b3bfb 389b598 ebb9438 daf8121 4589caf daf8121 e2feaed 4589caf ebb9438 daf8121 4589caf daf8121 4589caf ebb9438 e2feaed e726d75 8b775e5 4589caf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import atexit
import functools
from queue import Queue
from threading import Event, Thread
from paddleocr import PaddleOCR
from PIL import Image
import gradio as gr
LANG_CONFIG = {
"ch": {"num_workers": 2},
"en": {"num_workers": 2},
"fr": {"num_workers": 1},
"german": {"num_workers": 1},
"korean": {"num_workers": 1},
"japan": {"num_workers": 1},
}
CONCURRENCY_LIMIT = 8
class PaddleOCRModelManager(object):
def __init__(self,
num_workers,
model_factory):
super().__init__()
self._model_factory = model_factory
self._queue = Queue()
self._workers = []
self._model_initialized_event = Event()
for _ in range(num_workers):
worker = Thread(target=self._worker, daemon=False)
worker.start()
self._model_initialized_event.wait()
self._model_initialized_event.clear()
self._workers.append(worker)
def infer(self, *args, **kwargs):
result_queue = Queue(maxsize=1)
self._queue.put((args, kwargs, result_queue))
success, payload = result_queue.get()
if success:
return payload
else:
raise payload
def close(self):
for _ in self._workers:
self._queue.put(None)
for worker in self._workers:
worker.join()
def _worker(self):
model = self._model_factory()
self._model_initialized_event.set()
while True:
item = self._queue.get()
if item is None:
break
args, kwargs, result_queue = item
try:
result = model.ocr(*args, **kwargs)
result_queue.put((True, result))
except Exception as e:
result_queue.put((False, e))
finally:
self._queue.task_done()
def create_model(lang):
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
model_managers = {}
for lang, config in LANG_CONFIG.items():
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
model_managers[lang] = model_manager
def close_model_managers():
for manager in model_managers.values():
manager.close()
atexit.register(close_model_managers)
def inference(img, lang):
ocr = model_managers[lang]
result = ocr.infer(img, cls=True)[0] # OCR results
output = []
for line in result:
box = line[0] # Bounding box coordinates
text = line[1][0] # Extracted text
output.append({
"text": text,
"box": box
})
return output
title = 'PaddleOCR'
description = '''
- Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
- To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below.
- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
'''
examples = [
['en_example.jpg', 'en'],
['cn_example.jpg', 'ch'],
['jp_example.jpg', 'japan'],
]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
gr.Interface(
inference,
[
gr.Image(type='filepath', label='Input'),
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='Language')
],
gr.JSON(label='OCR Results'),
title=title,
description=description,
examples=examples,
cache_examples=False,
css=css,
concurrency_limit=CONCURRENCY_LIMIT,
).launch(debug=False)
|