ocr

Runtime error

App Files Files Community

Toughen1 commited on Jul 12, 2025

Commit

47716d3

verified ·

1 Parent(s): 5dcc55e

支持Base64接口调用

Browse files

Files changed (1) hide show

app.py +88 -28

app.py CHANGED Viewed

@@ -1,13 +1,20 @@
-import atexit
 import functools
 from queue import Queue
-from threading import Event, Thread
-from paddleocr import PaddleOCR, draw_ocr
 from PIL import Image
 import gradio as gr
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
@@ -16,27 +23,26 @@ LANG_CONFIG = {
     "korean": {"num_workers": 1},
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
-class PaddleOCRModelManager(object):
-    def __init__(self,
-                 num_workers,
-                 model_factory):
-        super().__init__()
         self._model_factory = model_factory
         self._queue = Queue()
         self._workers = []
         self._model_initialized_event = Event()
         for _ in range(num_workers):
-            worker = Thread(target=self._worker, daemon=False)
             worker.start()
             self._model_initialized_event.wait()
             self._model_initialized_event.clear()
             self._workers.append(worker)
     def infer(self, *args, **kwargs):
-        # XXX: Should I use a more lightweight data structure, say, a future?
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
         success, payload = result_queue.get()
@@ -72,10 +78,11 @@ def create_model(lang):
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
-model_managers = {}
-for lang, config in LANG_CONFIG.items():
-    model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
-    model_managers[lang] = model_manager
 def close_model_managers():
@@ -83,37 +90,37 @@ def close_model_managers():
         manager.close()
-# XXX: Not sure if gradio allows adding custom teardown logic
 atexit.register(close_model_managers)
 def inference(img, lang):
     ocr = model_managers[lang]
     result = ocr.infer(img, cls=True)[0]
-    img_path = img
-    image = Image.open(img_path).convert("RGB")
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
-    im_show = draw_ocr(image, boxes, txts, scores,
-                    font_path="./simfang.ttf")
     return im_show
 title = 'PaddleOCR'
 description = '''
-- Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
-- To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below.
-- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR).
 '''
 examples = [
-    ['en_example.jpg','en'],
-    ['cn_example.jpg','ch'],
-    ['jp_example.jpg','japan'],
 ]
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
 gr.Interface(
     inference,
     [
@@ -127,4 +134,57 @@ gr.Interface(
     cache_examples=False,
     css=css,
     concurrency_limit=CONCURRENCY_LIMIT,
-    ).launch(debug=False)

 import functools
+import io
+import base64
 from queue import Queue
+from threading import Thread, Event
+from typing import List
+import atexit
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 from PIL import Image
+from paddleocr import PaddleOCR, draw_ocr
 import gradio as gr
+import uvicorn
+import threading
+# ---------- 配置 ----------
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
     "korean": {"num_workers": 1},
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
+# ---------- 模型池管理 ----------
+class PaddleOCRModelManager:
+    def __init__(self, num_workers, model_factory):
         self._model_factory = model_factory
         self._queue = Queue()
         self._workers = []
         self._model_initialized_event = Event()
         for _ in range(num_workers):
+            worker = Thread(target=self._worker, daemon=True)
             worker.start()
             self._model_initialized_event.wait()
             self._model_initialized_event.clear()
             self._workers.append(worker)
     def infer(self, *args, **kwargs):
         result_queue = Queue(maxsize=1)
         self._queue.put((args, kwargs, result_queue))
         success, payload = result_queue.get()
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
+# ---------- 初始化模型池 ----------
+model_managers = {
+    lang: PaddleOCRModelManager(cfg["num_workers"], functools.partial(create_model, lang=lang))
+    for lang, cfg in LANG_CONFIG.items()
+}
 def close_model_managers():
         manager.close()
 atexit.register(close_model_managers)
+# ---------- Gradio 推理函数 ----------
 def inference(img, lang):
     ocr = model_managers[lang]
     result = ocr.infer(img, cls=True)[0]
+    image = Image.open(img).convert("RGB")
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
     return im_show
+# ---------- Gradio Web UI ----------
 title = 'PaddleOCR'
 description = '''
+- PaddleOCR Gradio demo 支持中、英、法、德、韩、日文图像文字识别。
+- 上传图像并选择语言即可识别；也可以通过 API 接口以 base64 图片方式调用。
+- 文档见：https://github.com/PaddlePaddle/PaddleOCR
 '''
 examples = [
+    ['en_example.jpg', 'en'],
+    ['cn_example.jpg', 'ch'],
+    ['jp_example.jpg', 'japan'],
 ]
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
 gr.Interface(
     inference,
     [
     cache_examples=False,
     css=css,
     concurrency_limit=CONCURRENCY_LIMIT,
+).launch(share=False, debug=False, prevent_thread_lock=True)
+# ---------- FastAPI 接口（Base64） ----------
+app = FastAPI(
+    title="PaddleOCR REST API",
+    description="Support base64 image OCR with multi-language",
+    version="1.0.0"
+)
+class PredictRequest(BaseModel):
+    image_base64: str
+    lang: str
+@app.post("/predict")
+async def predict(request: PredictRequest):
+    lang = request.lang.lower()
+    if lang not in model_managers:
+        raise HTTPException(status_code=400, detail=f"Unsupported language: {lang}")
+    try:
+        image_data = base64.b64decode(request.image_base64.split(",")[-1])
+        image = Image.open(io.BytesIO(image_data)).convert("RGB")
+        temp_path = "/tmp/temp_image.png"
+        image.save(temp_path)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid base64 image: {str(e)}")
+    ocr = model_managers[lang]
+    result = ocr.infer(temp_path, cls=True)[0]
+    boxes = [line[0] for line in result]
+    txts = [line[1][0] for line in result]
+    scores = [line[1][1] for line in result]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
+    buf = io.BytesIO()
+    im_show.save(buf, format="PNG")
+    image_base64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+    return {
+        "texts": txts,
+        "scores": scores,
+        "image_base64": "data:image/png;base64," + image_base64
+    }
+# ---------- 后台启动 FastAPI ----------
+def run_api():
+    uvicorn.run(app, host="0.0.0.0", port=7861)
+threading.Thread(target=run_api, daemon=True).start()