ocr / app.py
alfin-efendy's picture
Set debug mode to False in Gradio interface launch
3161f06
import atexit
import functools
from queue import Queue
from threading import Event, Thread
from paddleocr import PaddleOCR
import gradio as gr
LANG_CONFIG = {
"ch": {"num_workers": 2},
"en": {"num_workers": 2},
"fr": {"num_workers": 1},
"german": {"num_workers": 1},
"korean": {"num_workers": 1},
"japan": {"num_workers": 1},
}
CONCURRENCY_LIMIT = 8
class PaddleOCRModelManager(object):
def __init__(self,
num_workers,
model_factory):
super().__init__()
self._model_factory = model_factory
self._queue = Queue()
self._workers = []
self._model_initialized_event = Event()
for _ in range(num_workers):
worker = Thread(target=self._worker, daemon=False)
worker.start()
self._model_initialized_event.wait()
self._model_initialized_event.clear()
self._workers.append(worker)
def infer(self, *args, **kwargs):
# XXX: Should I use a more lightweight data structure, say, a future?
result_queue = Queue(maxsize=1)
self._queue.put((args, kwargs, result_queue))
success, payload = result_queue.get()
if success:
return payload
else:
raise payload
def close(self):
for _ in self._workers:
self._queue.put(None)
for worker in self._workers:
worker.join()
def _worker(self):
model = self._model_factory()
self._model_initialized_event.set()
while True:
item = self._queue.get()
if item is None:
break
args, kwargs, result_queue = item
try:
result = model.predict(*args, **kwargs)
result_queue.put((True, result))
except Exception as e:
result_queue.put((False, e))
finally:
self._queue.task_done()
def create_model(lang):
return PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
lang=lang,
)
model_managers = {}
for lang, config in LANG_CONFIG.items():
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
model_managers[lang] = model_manager
def close_model_managers():
for manager in model_managers.values():
manager.close()
atexit.register(close_model_managers)
def inference(img, lang):
ocr = model_managers[lang]
result = ocr.infer(input=img)
# result is a list of dicts, each dict contains 'rec_texts' and 'rec_boxes'
response = []
for page in result:
rec_texts = page.get('rec_texts', [])
rec_boxes = page.get('rec_boxes', [])
rec_scores = page.get('rec_scores', [])
# rec_boxes is likely a numpy array, convert to list
if hasattr(rec_boxes, 'tolist'):
rec_boxes = rec_boxes.tolist()
for text, bbox, score in zip(rec_texts, rec_boxes, rec_scores):
response.append({"text": text, "bbox": bbox, "score": score})
return response
title = 'PaddleOCR 3.0'
description = '''
- Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
- To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below.
'''
examples = [
['en_example.jpg','en'],
['cn_example.jpg','ch'],
['jp_example.jpg','japan'],
]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
gr.Interface(
inference,
[
gr.Image(type='filepath', label='Input'),
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
],
gr.JSON(label='OCR Results'),
title=title,
description=description,
examples=examples,
cache_examples=False,
css=css,
concurrency_limit=CONCURRENCY_LIMIT,
allow_flagging="never",
).launch(debug=False)