Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,22 @@
|
|
| 1 |
import atexit
|
| 2 |
import functools
|
|
|
|
|
|
|
| 3 |
from queue import Queue
|
| 4 |
from threading import Event, Thread
|
| 5 |
|
| 6 |
-
from paddleocr import PaddleOCR
|
| 7 |
-
from PIL import Image
|
| 8 |
import gradio as gr
|
| 9 |
-
import numpy as np
|
| 10 |
-
import io
|
| 11 |
-
|
| 12 |
|
| 13 |
LANG_CONFIG = {
|
| 14 |
"ch": {"num_workers": 1},
|
| 15 |
"en": {"num_workers": 1},
|
| 16 |
-
# "fr": {"num_workers": 1},
|
| 17 |
-
# "german": {"num_workers": 1},
|
| 18 |
-
# "korean": {"num_workers": 1},
|
| 19 |
-
# "japan": {"num_workers": 1},
|
| 20 |
}
|
| 21 |
CONCURRENCY_LIMIT = 8
|
| 22 |
-
|
| 23 |
|
| 24 |
class PaddleOCRModelManager(object):
|
| 25 |
-
def __init__(self,
|
| 26 |
-
num_workers,
|
| 27 |
-
model_factory):
|
| 28 |
super().__init__()
|
| 29 |
self._model_factory = model_factory
|
| 30 |
self._queue = Queue()
|
|
@@ -38,7 +30,6 @@ class PaddleOCRModelManager(object):
|
|
| 38 |
self._workers.append(worker)
|
| 39 |
|
| 40 |
def infer(self, *args, **kwargs):
|
| 41 |
-
# XXX: Should I use a more lightweight data structure, say, a future?
|
| 42 |
result_queue = Queue(maxsize=1)
|
| 43 |
self._queue.put((args, kwargs, result_queue))
|
| 44 |
success, payload = result_queue.get()
|
|
@@ -69,67 +60,70 @@ class PaddleOCRModelManager(object):
|
|
| 69 |
finally:
|
| 70 |
self._queue.task_done()
|
| 71 |
|
| 72 |
-
|
| 73 |
def create_model(lang):
|
| 74 |
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
|
| 75 |
|
| 76 |
-
|
| 77 |
model_managers = {}
|
| 78 |
for lang, config in LANG_CONFIG.items():
|
| 79 |
-
model_manager = PaddleOCRModelManager(
|
|
|
|
|
|
|
| 80 |
model_managers[lang] = model_manager
|
| 81 |
|
| 82 |
-
|
| 83 |
def close_model_managers():
|
| 84 |
for manager in model_managers.values():
|
| 85 |
manager.close()
|
| 86 |
|
| 87 |
-
|
| 88 |
-
# XXX: Not sure if gradio allows adding custom teardown logic
|
| 89 |
atexit.register(close_model_managers)
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
ocr = model_managers[lang]
|
| 110 |
-
|
| 111 |
-
|
| 112 |
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
title =
|
| 115 |
-
description =
|
| 116 |
-
- Gradio demo for PaddleOCR.
|
| 117 |
-
-
|
| 118 |
-
|
| 119 |
-
'''
|
| 120 |
|
| 121 |
examples = [
|
| 122 |
-
[
|
| 123 |
-
[
|
| 124 |
-
['jp_example.jpg','japan'],
|
| 125 |
]
|
| 126 |
|
| 127 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
|
|
|
| 128 |
gr.Interface(
|
| 129 |
inference,
|
| 130 |
[
|
| 131 |
-
gr.Image(type=
|
| 132 |
-
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value=
|
| 133 |
],
|
| 134 |
gr.JSON(label="Output"),
|
| 135 |
title=title,
|
|
@@ -138,4 +132,4 @@ gr.Interface(
|
|
| 138 |
cache_examples=False,
|
| 139 |
css=css,
|
| 140 |
concurrency_limit=CONCURRENCY_LIMIT,
|
| 141 |
-
|
|
|
|
| 1 |
import atexit
|
| 2 |
import functools
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
from queue import Queue
|
| 6 |
from threading import Event, Thread
|
| 7 |
|
| 8 |
+
from paddleocr import PaddleOCR
|
|
|
|
| 9 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
LANG_CONFIG = {
|
| 12 |
"ch": {"num_workers": 1},
|
| 13 |
"en": {"num_workers": 1},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
}
|
| 15 |
CONCURRENCY_LIMIT = 8
|
| 16 |
+
TMP_PREFIX = "/tmp/gradio/"
|
| 17 |
|
| 18 |
class PaddleOCRModelManager(object):
|
| 19 |
+
def __init__(self, num_workers, model_factory):
|
|
|
|
|
|
|
| 20 |
super().__init__()
|
| 21 |
self._model_factory = model_factory
|
| 22 |
self._queue = Queue()
|
|
|
|
| 30 |
self._workers.append(worker)
|
| 31 |
|
| 32 |
def infer(self, *args, **kwargs):
|
|
|
|
| 33 |
result_queue = Queue(maxsize=1)
|
| 34 |
self._queue.put((args, kwargs, result_queue))
|
| 35 |
success, payload = result_queue.get()
|
|
|
|
| 60 |
finally:
|
| 61 |
self._queue.task_done()
|
| 62 |
|
|
|
|
| 63 |
def create_model(lang):
|
| 64 |
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
|
| 65 |
|
|
|
|
| 66 |
model_managers = {}
|
| 67 |
for lang, config in LANG_CONFIG.items():
|
| 68 |
+
model_manager = PaddleOCRModelManager(
|
| 69 |
+
config["num_workers"], functools.partial(create_model, lang=lang)
|
| 70 |
+
)
|
| 71 |
model_managers[lang] = model_manager
|
| 72 |
|
|
|
|
| 73 |
def close_model_managers():
|
| 74 |
for manager in model_managers.values():
|
| 75 |
manager.close()
|
| 76 |
|
|
|
|
|
|
|
| 77 |
atexit.register(close_model_managers)
|
| 78 |
|
| 79 |
+
def _safe_cleanup(path_str: str):
|
| 80 |
+
# 仅清理 /tmp/gradio 下的文件与空目录,避免误删
|
| 81 |
+
try:
|
| 82 |
+
real = Path(os.path.realpath(path_str))
|
| 83 |
+
if str(real).startswith(TMP_PREFIX) and real.is_file():
|
| 84 |
+
real.unlink(missing_ok=True)
|
| 85 |
+
parent = real.parent
|
| 86 |
+
# 尝试删除空目录(/tmp/gradio/<hash>)
|
| 87 |
+
if str(parent).startswith(TMP_PREFIX):
|
| 88 |
+
try:
|
| 89 |
+
parent.rmdir()
|
| 90 |
+
except OSError:
|
| 91 |
+
pass
|
| 92 |
+
except Exception:
|
| 93 |
+
# 清理失败不影响主流程
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
def inference(img_path: str, lang: str):
|
| 97 |
+
# Gradio Image(type='filepath') 传入的是服务端本地路径字符串
|
| 98 |
+
if not isinstance(img_path, str) or not img_path:
|
| 99 |
+
raise ValueError("无效的图片路径")
|
| 100 |
|
| 101 |
ocr = model_managers[lang]
|
| 102 |
+
# 直接使用路径字符串,PaddleOCR 支持 str
|
| 103 |
+
result = ocr.infer(img_path, cls=True)[0]
|
| 104 |
|
| 105 |
+
# 推理完成后清理临时文件
|
| 106 |
+
_safe_cleanup(img_path)
|
| 107 |
+
return result
|
| 108 |
|
| 109 |
+
title = "PaddleOCR"
|
| 110 |
+
description = """
|
| 111 |
+
- Gradio demo for PaddleOCR. Supports Chinese and English.
|
| 112 |
+
- Upload an image and choose language; returns structured JSON.
|
| 113 |
+
"""
|
|
|
|
| 114 |
|
| 115 |
examples = [
|
| 116 |
+
["en_example.jpg", "en"],
|
| 117 |
+
["cn_example.jpg", "ch"],
|
|
|
|
| 118 |
]
|
| 119 |
|
| 120 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
| 121 |
+
|
| 122 |
gr.Interface(
|
| 123 |
inference,
|
| 124 |
[
|
| 125 |
+
gr.Image(type="filepath", label="Input"),
|
| 126 |
+
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value="en", label="language"),
|
| 127 |
],
|
| 128 |
gr.JSON(label="Output"),
|
| 129 |
title=title,
|
|
|
|
| 132 |
cache_examples=False,
|
| 133 |
css=css,
|
| 134 |
concurrency_limit=CONCURRENCY_LIMIT,
|
| 135 |
+
).launch(debug=False)
|