File size: 3,916 Bytes
7d3d21f e5564f3 e27d531 e33ca39 e27d531 7d3d21f e5564f3 7d3d21f e33ca39 7d3d21f 4642d29 7d3d21f e5564f3 7d3d21f e33ca39 e5564f3 57d81fa e5564f3 57d81fa e33ca39 e5564f3 e33ca39 e27d531 a469cb9 e27d531 a469cb9 e27d531 7d3d21f e27d531 7d3d21f 9445766 e33ca39 9445766 e33ca39 9445766 e33ca39 e5564f3 689f815 9445766 e33ca39 e27d531 e5564f3 7d3d21f 9595b9e 9445766 e5564f3 9445766 e33ca39 9445766 4068dd0 e33ca39 9445766 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | import gradio as gr
import json
import base64
import io
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download
from rapidocr_onnxruntime import RapidOCR
from fastapi import FastAPI, Request
from fastapi.responses import RedirectResponse
import uvicorn
# 下载 PP-OCRv5 ONNX 模型
det_path = hf_hub_download("monkt/paddleocr-onnx", "detection/v5/det.onnx")
rec_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/rec.onnx")
dict_path = hf_hub_download("monkt/paddleocr-onnx", "languages/english/dict.txt")
# 初始化 RapidOCR(PP-OCRv5 ONNX 推理)
ocr_engine = RapidOCR(
det_model_path=det_path,
rec_model_path=rec_path,
rec_keys_path=dict_path,
)
def ocr_recognize(image):
"""识别图片中的文字"""
if image is None:
return json.dumps({"success": False, "error": "未提供图片"}, ensure_ascii=False)
try:
if isinstance(image, Image.Image):
img_array = np.array(image)
else:
img_array = image
# RapidOCR 调用
result, elapsed = ocr_engine(img_array)
lines = []
raw_results = []
if result:
for item in result:
# RapidOCR 返回格式: (bbox, text, confidence)
bbox = item[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
text = item[1]
confidence = item[2]
lines.append(text)
raw_results.append({
"text": text,
"confidence": round(float(confidence), 4),
"bbox": bbox if isinstance(bbox, list) else bbox.tolist(),
})
# elapsed 可能是包含多个阶段耗时的列表,求和即可
total_elapsed = sum(elapsed) if isinstance(elapsed, list) else elapsed
return json.dumps({
"success": True,
"lines": lines,
"full_text": "\n".join(lines),
"raw": raw_results,
"elapsed": round(total_elapsed, 3),
}, ensure_ascii=False, indent=2)
except Exception as e:
return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
# ---- FastAPI 纯净接口 ----
app = FastAPI()
@app.post("/api/predict")
async def api_predict(request: Request):
"""接收 base64 编码的图片,返回 OCR 结果"""
try:
body = await request.json()
data_uri = body.get("data", [])[0]
base64_str = data_uri.split(",")[-1] if "," in data_uri else data_uri
image_bytes = base64.b64decode(base64_str)
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
# 限制图片最大边为 2000px,防止大图导致内存溢出
max_side = 2000
w, h = image.size
if max(w, h) > max_side:
scale = max_side / max(w, h)
image = image.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
result_json_str = ocr_recognize(image)
return {"data": [result_json_str]}
except Exception as e:
return {"data": [json.dumps({"success": False, "error": str(e)})]}
# ---- Gradio 界面 ----
with gr.Blocks(title="n1payocr API - PP-OCRv5 ONNX", analytics_enabled=False) as demo:
gr.Markdown("# 🔍 n1payocr 文字识别引擎 (PP-OCRv5 ONNX)")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="上传图片")
submit_btn = gr.Button("识别", variant="primary")
with gr.Column():
output_text = gr.Textbox(label="识别结果 (JSON)", lines=20)
submit_btn.click(fn=ocr_recognize, inputs=input_image, outputs=output_text, api_name=False)
# 挂载 Gradio 到 /ui 路径
app = gr.mount_gradio_app(app, demo, path="/ui")
@app.get("/")
async def root():
return RedirectResponse(url="/ui")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|