File size: 3,916 Bytes
7d3d21f
 
 
 
 
 
e5564f3
 
e27d531
e33ca39
e27d531
7d3d21f
e5564f3
 
 
 
 
 
 
 
 
 
7d3d21f
 
e33ca39
7d3d21f
4642d29
7d3d21f
 
 
 
 
 
 
 
e5564f3
 
7d3d21f
 
 
e33ca39
e5564f3
 
57d81fa
e5564f3
57d81fa
 
e33ca39
 
 
 
e5564f3
 
e33ca39
e27d531
a469cb9
 
 
e27d531
 
 
 
 
a469cb9
e27d531
7d3d21f
e27d531
7d3d21f
9445766
 
 
 
e33ca39
9445766
 
e33ca39
9445766
 
 
 
 
 
e33ca39
e5564f3
689f815
 
 
 
 
 
9445766
 
 
 
 
e33ca39
e27d531
e5564f3
 
7d3d21f
 
 
 
 
 
9595b9e
9445766
e5564f3
9445766
 
e33ca39
9445766
 
 
4068dd0
e33ca39
9445766
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import json
import base64
import io
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download
from rapidocr_onnxruntime import RapidOCR
from fastapi import FastAPI, Request
from fastapi.responses import RedirectResponse
import uvicorn

# 下载 PP-OCRv5 ONNX 模型
det_path = hf_hub_download("monkt/paddleocr-onnx", "detection/v5/det.onnx")
rec_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/rec.onnx")
dict_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/dict.txt")

# 初始化 RapidOCR(PP-OCRv5 ONNX 推理)
ocr_engine = RapidOCR(
    det_model_path=det_path,
    rec_model_path=rec_path,
    rec_keys_path=dict_path,
)


def ocr_recognize(image):
    """识别图片中的文字"""
    if image is None:
        return json.dumps({"success": False, "error": "未提供图片"}, ensure_ascii=False)
    try:
        if isinstance(image, Image.Image):
            img_array = np.array(image)
        else:
            img_array = image

        # RapidOCR 调用
        result, elapsed = ocr_engine(img_array)

        lines = []
        raw_results = []

        if result:
            for item in result:
                # RapidOCR 返回格式: (bbox, text, confidence)
                bbox = item[0]  # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
                text = item[1]
                confidence = item[2]

                lines.append(text)
                raw_results.append({
                    "text": text,
                    "confidence": round(float(confidence), 4),
                    "bbox": bbox if isinstance(bbox, list) else bbox.tolist(),
                })

        # elapsed 可能是包含多个阶段耗时的列表,求和即可
        total_elapsed = sum(elapsed) if isinstance(elapsed, list) else elapsed

        return json.dumps({
            "success": True,
            "lines": lines,
            "full_text": "\n".join(lines),
            "raw": raw_results,
            "elapsed": round(total_elapsed, 3),
        }, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)


# ---- FastAPI 纯净接口 ----
app = FastAPI()


@app.post("/api/predict")
async def api_predict(request: Request):
    """接收 base64 编码的图片,返回 OCR 结果"""
    try:
        body = await request.json()
        data_uri = body.get("data", [])[0]
        base64_str = data_uri.split(",")[-1] if "," in data_uri else data_uri
        image_bytes = base64.b64decode(base64_str)
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        # 限制图片最大边为 2000px,防止大图导致内存溢出
        max_side = 2000
        w, h = image.size
        if max(w, h) > max_side:
            scale = max_side / max(w, h)
            image = image.resize((int(w * scale), int(h * scale)), Image.LANCZOS)

        result_json_str = ocr_recognize(image)
        return {"data": [result_json_str]}
    except Exception as e:
        return {"data": [json.dumps({"success": False, "error": str(e)})]}


# ---- Gradio 界面 ----
with gr.Blocks(title="n1payocr API - PP-OCRv5 ONNX", analytics_enabled=False) as demo:
    gr.Markdown("# 🔍 n1payocr 文字识别引擎 (PP-OCRv5 ONNX)")
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="上传图片")
            submit_btn = gr.Button("识别", variant="primary")
        with gr.Column():
            output_text = gr.Textbox(label="识别结果 (JSON)", lines=20)
    submit_btn.click(fn=ocr_recognize, inputs=input_image, outputs=output_text, api_name=False)

# 挂载 Gradio 到 /ui 路径
app = gr.mount_gradio_app(app, demo, path="/ui")


@app.get("/")
async def root():
    return RedirectResponse(url="/ui")


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)