Spaces:

handsme
/

n1ocr-api

Running

App Files Files Community

handsme commited on about 1 month ago

Commit

e5564f3

1 Parent(s): 689f815

重构：PaddlePaddle → RapidOCR + PP-OCRv5 ONNX（轻量快速）

Browse files

Files changed (3) hide show

Dockerfile +16 -34
app.py +27 -38
requirements.txt +3 -3

Dockerfile CHANGED Viewed

@@ -1,54 +1,36 @@
-# 照搬 piika919/paddle-ui 的多阶段构建方式
-FROM python:3.10-slim as builder
-# 安装构建依赖
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
     libgl1 \
     libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
     libgomp1 \
     && rm -rf /var/lib/apt/lists/*
-# 创建虚拟环境
-RUN python -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-# 安装依赖
 COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
-# 最终镜像
-FROM python:3.10-slim
-# 运行时依赖
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    libgl1 \
-    libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
-    libgomp1 \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-# 复制虚拟环境
-COPY --from=builder /opt/venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-WORKDIR /app
 COPY app.py .
-# 预下载 PP-OCRv5 模型
-ENV DISABLE_MODEL_SOURCE_CHECK=True
-RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(lang='en', ocr_version='PP-OCRv5', device='cpu')"
 EXPOSE 7860
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
     CMD curl -f http://localhost:7860/ || exit 1
 CMD ["python", "app.py"]

+# PP-OCRv5 ONNX 轻量部署（RapidOCR + ONNX Runtime）
+FROM python:3.10-slim
+# 安装系统依赖
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libgl1 \
     libglib2.0-0 \
     libgomp1 \
+    curl \
     && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# 安装 Python 依赖
 COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
+# 复制代码
 COPY app.py .
+# 预下载 PP-OCRv5 ONNX 模型（避免首次请求慢）
+RUN python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('monkt/paddleocr-onnx', 'detection/v5/det.onnx')
+hf_hub_download('monkt/paddleocr-onnx', 'languages/english/rec.onnx')
+hf_hub_download('monkt/paddleocr-onnx', 'languages/english/dict.txt')
+print('PP-OCRv5 ONNX models downloaded successfully')
+"
 EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
     CMD curl -f http://localhost:7860/ || exit 1
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -4,19 +4,22 @@ import base64
 import io
 import numpy as np
 from PIL import Image
-from paddleocr import PaddleOCR
 from fastapi import FastAPI, Request
 from fastapi.responses import RedirectResponse
 import uvicorn
-# 照搬 piika919/paddle-ui 的初始化方式
-ocr_engine = PaddleOCR(
-    lang="en",
-    ocr_version="PP-OCRv5",
-    use_doc_orientation_classify=False,
-    use_doc_unwarping=False,
-    use_textline_orientation=False,
-    device="cpu",
 )
@@ -30,39 +33,24 @@ def ocr_recognize(image):
         else:
             img_array = image
-        # PP-OCRv5 使用 .predict()
-        results = ocr_engine.predict(img_array)
         lines = []
         raw_results = []
-        for res in results:
-            # 照搬 piika919 的结果解析方式，兼容 dict 和 object
-            def get_val(obj, key, default=None):
-                if isinstance(obj, dict) or hasattr(obj, 'keys'):
-                    return obj.get(key, default)
-                return getattr(obj, key, default)
-            rec_polys = get_val(res, "rec_polys")
-            rec_texts = get_val(res, "rec_texts")
-            rec_scores = get_val(res, "rec_scores")
-            if rec_texts is None:
-                continue
-            scores = rec_scores if rec_scores is not None else [1.0] * len(rec_texts)
-            polys = rec_polys if rec_polys is not None else [None] * len(rec_texts)
-            for i, (text, score) in enumerate(zip(rec_texts, scores)):
                 lines.append(text)
-                bbox = []
-                if i < len(polys) and polys[i] is not None:
-                    poly = polys[i]
-                    bbox = poly.tolist() if isinstance(poly, np.ndarray) else poly
                 raw_results.append({
                     "text": text,
-                    "confidence": round(float(score), 4),
-                    "bbox": bbox,
                 })
         return json.dumps({
@@ -70,6 +58,7 @@ def ocr_recognize(image):
             "lines": lines,
             "full_text": "\n".join(lines),
             "raw": raw_results,
         }, ensure_ascii=False, indent=2)
     except Exception as e:
         return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
@@ -89,7 +78,7 @@ async def api_predict(request: Request):
         image_bytes = base64.b64decode(base64_str)
         image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-        # 限制图片最大边为 2000px，防止大图导致 PaddlePaddle C++ 层内存溢出
         max_side = 2000
         w, h = image.size
         if max(w, h) > max_side:
@@ -103,8 +92,8 @@ async def api_predict(request: Request):
 # ---- Gradio 界面 ----
-with gr.Blocks(title="n1payocr API - PP-OCRv5", analytics_enabled=False) as demo:
-    gr.Markdown("# 🔍 n1payocr 文字识别引擎 (PP-OCRv5)")
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(type="pil", label="上传图片")
@@ -113,7 +102,7 @@ with gr.Blocks(title="n1payocr API - PP-OCRv5", analytics_enabled=False) as demo
             output_text = gr.Textbox(label="识别结果 (JSON)", lines=20)
     submit_btn.click(fn=ocr_recognize, inputs=input_image, outputs=output_text, api_name=False)
-# 挂载 Gradio 到 /ui 路径，show_api=False 隐藏 API 文档
 app = gr.mount_gradio_app(app, demo, path="/ui")

 import io
 import numpy as np
 from PIL import Image
+from huggingface_hub import hf_hub_download
+from rapidocr_onnxruntime import RapidOCR
 from fastapi import FastAPI, Request
 from fastapi.responses import RedirectResponse
 import uvicorn
+# 下载 PP-OCRv5 ONNX 模型
+det_path = hf_hub_download("monkt/paddleocr-onnx", "detection/v5/det.onnx")
+rec_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/rec.onnx")
+dict_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/dict.txt")
+# 初始化 RapidOCR（PP-OCRv5 ONNX 推理）
+ocr_engine = RapidOCR(
+    det_model_path=det_path,
+    rec_model_path=rec_path,
+    rec_keys_path=dict_path,
 )
         else:
             img_array = image
+        # RapidOCR 调用
+        result, elapsed = ocr_engine(img_array)
         lines = []
         raw_results = []
+        if result:
+            for item in result:
+                # RapidOCR 返回格式: (bbox, (text, confidence))
+                bbox = item[0]  # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
+                text = item[1][0]
+                confidence = item[1][1]
                 lines.append(text)
                 raw_results.append({
                     "text": text,
+                    "confidence": round(float(confidence), 4),
+                    "bbox": bbox if isinstance(bbox, list) else bbox.tolist(),
                 })
         return json.dumps({
             "lines": lines,
             "full_text": "\n".join(lines),
             "raw": raw_results,
+            "elapsed": round(elapsed, 3),
         }, ensure_ascii=False, indent=2)
     except Exception as e:
         return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
         image_bytes = base64.b64decode(base64_str)
         image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        # 限制图片最大边为 2000px，防止大图导致内存溢出
         max_side = 2000
         w, h = image.size
         if max(w, h) > max_side:
 # ---- Gradio 界面 ----
+with gr.Blocks(title="n1payocr API - PP-OCRv5 ONNX", analytics_enabled=False) as demo:
+    gr.Markdown("# 🔍 n1payocr 文字识别引擎 (PP-OCRv5 ONNX)")
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(type="pil", label="上传图片")
             output_text = gr.Textbox(label="识别结果 (JSON)", lines=20)
     submit_btn.click(fn=ocr_recognize, inputs=input_image, outputs=output_text, api_name=False)
+# 挂载 Gradio 到 /ui 路径
 app = gr.mount_gradio_app(app, demo, path="/ui")

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-# 照搬 piika919/paddle-ui 的依赖版本
-paddlepaddle==3.0.0
-paddleocr[all]>=2.9.0
 Pillow>=10.0.0
 numpy>=1.24.0
 opencv-python-headless>=4.8.0

+# RapidOCR + ONNX Runtime（替代 PaddlePaddle，轻量快速）
+rapidocr-onnxruntime>=1.4.0
+huggingface_hub
 Pillow>=10.0.0
 numpy>=1.24.0
 opencv-python-headless>=4.8.0