|
|
import atexit |
|
|
import functools |
|
|
from queue import Queue |
|
|
from threading import Event, Thread |
|
|
|
|
|
from paddleocr import PaddleOCR |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
|
|
|
LANG_CONFIG = { |
|
|
"ch": {"num_workers": 2}, |
|
|
"en": {"num_workers": 2}, |
|
|
"fr": {"num_workers": 1}, |
|
|
"german": {"num_workers": 1}, |
|
|
"korean": {"num_workers": 1}, |
|
|
"japan": {"num_workers": 1}, |
|
|
} |
|
|
CONCURRENCY_LIMIT = 8 |
|
|
|
|
|
|
|
|
class PaddleOCRModelManager(object): |
|
|
def __init__(self, |
|
|
num_workers, |
|
|
model_factory): |
|
|
super().__init__() |
|
|
self._model_factory = model_factory |
|
|
self._queue = Queue() |
|
|
self._workers = [] |
|
|
self._model_initialized_event = Event() |
|
|
for _ in range(num_workers): |
|
|
worker = Thread(target=self._worker, daemon=False) |
|
|
worker.start() |
|
|
self._model_initialized_event.wait() |
|
|
self._model_initialized_event.clear() |
|
|
self._workers.append(worker) |
|
|
|
|
|
def infer(self, *args, **kwargs): |
|
|
result_queue = Queue(maxsize=1) |
|
|
self._queue.put((args, kwargs, result_queue)) |
|
|
success, payload = result_queue.get() |
|
|
if success: |
|
|
return payload |
|
|
else: |
|
|
raise payload |
|
|
|
|
|
def close(self): |
|
|
for _ in self._workers: |
|
|
self._queue.put(None) |
|
|
for worker in self._workers: |
|
|
worker.join() |
|
|
|
|
|
def _worker(self): |
|
|
model = self._model_factory() |
|
|
self._model_initialized_event.set() |
|
|
while True: |
|
|
item = self._queue.get() |
|
|
if item is None: |
|
|
break |
|
|
args, kwargs, result_queue = item |
|
|
try: |
|
|
result = model.ocr(*args, **kwargs) |
|
|
result_queue.put((True, result)) |
|
|
except Exception as e: |
|
|
result_queue.put((False, e)) |
|
|
finally: |
|
|
self._queue.task_done() |
|
|
|
|
|
|
|
|
def create_model(lang): |
|
|
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False) |
|
|
|
|
|
|
|
|
model_managers = {} |
|
|
for lang, config in LANG_CONFIG.items(): |
|
|
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang)) |
|
|
model_managers[lang] = model_manager |
|
|
|
|
|
|
|
|
def close_model_managers(): |
|
|
for manager in model_managers.values(): |
|
|
manager.close() |
|
|
|
|
|
|
|
|
atexit.register(close_model_managers) |
|
|
|
|
|
|
|
|
def inference(img, lang): |
|
|
ocr = model_managers[lang] |
|
|
result = ocr.infer(img, cls=True)[0] |
|
|
|
|
|
output = [] |
|
|
for line in result: |
|
|
box = line[0] |
|
|
text = line[1][0] |
|
|
output.append({ |
|
|
"text": text, |
|
|
"box": box |
|
|
}) |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
title = 'PaddleOCR' |
|
|
description = ''' |
|
|
- Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese. |
|
|
- To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below. |
|
|
- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR). |
|
|
''' |
|
|
|
|
|
examples = [ |
|
|
['en_example.jpg', 'en'], |
|
|
['cn_example.jpg', 'ch'], |
|
|
['jp_example.jpg', 'japan'], |
|
|
] |
|
|
|
|
|
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" |
|
|
|
|
|
gr.Interface( |
|
|
inference, |
|
|
[ |
|
|
gr.Image(type='filepath', label='Input'), |
|
|
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='Language') |
|
|
], |
|
|
gr.JSON(label='OCR Results'), |
|
|
title=title, |
|
|
description=description, |
|
|
examples=examples, |
|
|
cache_examples=False, |
|
|
css=css, |
|
|
concurrency_limit=CONCURRENCY_LIMIT, |
|
|
).launch(debug=False) |
|
|
|