ocr

Runtime error

App Files Files Community

Toughen1 commited on Jul 12, 2025

Commit

c049531

verified ·

1 Parent(s): 2f92370

1111

Browse files

Files changed (1) hide show

app.py +45 -85

app.py CHANGED Viewed

@@ -1,20 +1,17 @@
 import functools
-import io
-import base64
 from queue import Queue
-from threading import Thread, Event
-from typing import List
-import atexit
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from PIL import Image
 from paddleocr import PaddleOCR, draw_ocr
 import gradio as gr
-import uvicorn
-import threading
-# ---------- 配置 ----------
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
@@ -23,20 +20,18 @@ LANG_CONFIG = {
     "korean": {"num_workers": 1},
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
-# ---------- 模型池管理 ----------
-class PaddleOCRModelManager:
     def __init__(self, num_workers, model_factory):
         self._model_factory = model_factory
         self._queue = Queue()
         self._workers = []
         self._model_initialized_event = Event()
         for _ in range(num_workers):
-            worker = Thread(target=self._worker, daemon=True)
             worker.start()
             self._model_initialized_event.wait()
             self._model_initialized_event.clear()
@@ -73,44 +68,46 @@ class PaddleOCRModelManager:
             finally:
                 self._queue.task_done()
 def create_model(lang):
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
-# ---------- 初始化模型池 ----------
 model_managers = {
     lang: PaddleOCRModelManager(cfg["num_workers"], functools.partial(create_model, lang=lang))
     for lang, cfg in LANG_CONFIG.items()
 }
 def close_model_managers():
     for manager in model_managers.values():
         manager.close()
 atexit.register(close_model_managers)
-# ---------- Gradio 推理函数 ----------
-def inference(img, lang):
     ocr = model_managers[lang]
-    result = ocr.infer(img, cls=True)[0]
-    image = Image.open(img).convert("RGB")
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
     im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
-    return im_show
-# ---------- Gradio Web UI ----------
-title = 'PaddleOCR'
 description = '''
-- PaddleOCR Gradio demo 支持中、英、法、德、韩、日文图像文字识别。
-- 上传图像并选择语言即可识别；也可以通过 API 接口以 base64 图片方式调用。
-- 文档见：https://github.com/PaddlePaddle/PaddleOCR
 '''
 examples = [
@@ -121,70 +118,33 @@ examples = [
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
-gr.Interface(
-    inference,
-    [
         gr.Image(type='filepath', label='Input'),
         gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
     ],
-    gr.Image(type='pil', label='Output'),
     title=title,
     description=description,
     examples=examples,
     cache_examples=False,
     css=css,
-    concurrency_limit=CONCURRENCY_LIMIT,
-).launch(share=False, debug=False, prevent_thread_lock=True)
-# ---------- FastAPI 接口（Base64） ----------
-app = FastAPI(
-    title="PaddleOCR REST API",
-    description="Support base64 image OCR with multi-language",
-    version="1.0.0"
 )
-class PredictRequest(BaseModel):
-    image_base64: str
-    lang: str
-@app.post("/predict")
-async def predict(request: PredictRequest):
-    lang = request.lang.lower()
-    if lang not in model_managers:
-        raise HTTPException(status_code=400, detail=f"Unsupported language: {lang}")
     try:
-        image_data = base64.b64decode(request.image_base64.split(",")[-1])
-        image = Image.open(io.BytesIO(image_data)).convert("RGB")
-        temp_path = "/tmp/temp_image.png"
-        image.save(temp_path)
     except Exception as e:
-        raise HTTPException(status_code=400, detail=f"Invalid base64 image: {str(e)}")
-    ocr = model_managers[lang]
-    result = ocr.infer(temp_path, cls=True)[0]
-    boxes = [line[0] for line in result]
-    txts = [line[1][0] for line in result]
-    scores = [line[1][1] for line in result]
-    im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
-    buf = io.BytesIO()
-    im_show.save(buf, format="PNG")
-    image_base64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-    return {
-        "texts": txts,
-        "scores": scores,
-        "image_base64": "data:image/png;base64," + image_base64
-    }
-# ---------- 后台启动 FastAPI ----------
-def run_api():
-    uvicorn.run(app, host="0.0.0.0", port=7861)
-threading.Thread(target=run_api, daemon=True).start()

+import atexit
 import functools
 from queue import Queue
+from threading import Event, Thread
 from paddleocr import PaddleOCR, draw_ocr
+from PIL import Image
+from io import BytesIO
+import base64
 import gradio as gr
+from fastapi import FastAPI, UploadFile, Form
+from pydantic import BaseModel
+# ========== 模型配置 ==========
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
     "korean": {"num_workers": 1},
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
+# ========== 模型池管理类 ==========
+class PaddleOCRModelManager(object):
     def __init__(self, num_workers, model_factory):
+        super().__init__()
         self._model_factory = model_factory
         self._queue = Queue()
         self._workers = []
         self._model_initialized_event = Event()
         for _ in range(num_workers):
+            worker = Thread(target=self._worker, daemon=False)
             worker.start()
             self._model_initialized_event.wait()
             self._model_initialized_event.clear()
             finally:
                 self._queue.task_done()
+# ========== OCR 模型初始化 ==========
 def create_model(lang):
     return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
 model_managers = {
     lang: PaddleOCRModelManager(cfg["num_workers"], functools.partial(create_model, lang=lang))
     for lang, cfg in LANG_CONFIG.items()
 }
 def close_model_managers():
     for manager in model_managers.values():
         manager.close()
 atexit.register(close_model_managers)
+# ========== 通用 OCR 推理函数 ==========
+def run_ocr(image: Image.Image, lang: str):
     ocr = model_managers[lang]
+    buffered = BytesIO()
+    image.save(buffered, format="PNG")
+    buffered.seek(0)
+    result = ocr.infer(buffered, cls=True)[0]
     boxes = [line[0] for line in result]
     txts = [line[1][0] for line in result]
     scores = [line[1][1] for line in result]
     im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
+    return im_show, txts
+# ========== Gradio UI ==========
+def gradio_inference(img_path, lang):
+    image = Image.open(img_path).convert("RGB")
+    result_image, _ = run_ocr(image, lang)
+    return result_image
+title = "PaddleOCR"
 description = '''
+- Gradio demo for PaddleOCR with multi-language support.
+- Supports Chinese, English, French, German, Korean, and Japanese.
+- Upload an image or use the RESTful API below.
 '''
 examples = [
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+gr_app = gr.Interface(
+    gradio_inference,
+    inputs=[
         gr.Image(type='filepath', label='Input'),
         gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language')
     ],
+    outputs=gr.Image(type='pil', label='Output'),
     title=title,
     description=description,
     examples=examples,
     cache_examples=False,
     css=css,
+    concurrency_limit=CONCURRENCY_LIMIT
 )
+# ========== FastAPI + REST OCR ==========
+app = FastAPI()
+@app.post("/api/ocr_base64")
+def ocr_base64(data: str = Form(...), lang: str = Form("ch")):
     try:
+        content = base64.b64decode(data)
+        image = Image.open(BytesIO(content)).convert("RGB")
+        _, texts = run_ocr(image, lang)
+        return {"success": True, "text": texts}
     except Exception as e:
+        return {"success": False, "error": str(e)}
+# 挂载 Gradio 到 FastAPI
+app = gr.mount_gradio_app(app, gr_app, path="/")