Spaces:

HycJack
/

fastapi_rapidocr

Sleeping

App Files Files Community

HycJack commited on Sep 23, 2025

Commit

7b3952c

1 Parent(s): c084450

add ocr_table

Browse files

Files changed (3) hide show

main.py +54 -0
requirements.txt +1 -0
test.py +25 -0

main.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI, File, UploadFile
 from PIL import Image
 from fastapi.middleware.cors import CORSMiddleware
 from rapidocr_onnxruntime import RapidOCR
 import io
 import numpy as np
 import pandas as pd
@@ -40,3 +41,56 @@ async def ocr(file: UploadFile = File(...)):
         columns=("box", "rec", "score"),
     )
     return out_df.to_dict(orient='records')

 from PIL import Image
 from fastapi.middleware.cors import CORSMiddleware
 from rapidocr_onnxruntime import RapidOCR
+from rapid_table import ModelType, RapidTable, RapidTableInput, RapidTableOutput
 import io
 import numpy as np
 import pandas as pd
         columns=("box", "rec", "score"),
     )
     return out_df.to_dict(orient='records')
+@app.post("/ocr_table")
+async def ocr_table(file: UploadFile = File(...)):
+    # ------------------- ① 参数校验 -------------------
+    if not file.filename:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="上传的文件没有文件名",
+        )
+    # 只接受常见的图片 MIME 类型，防止恶意上传非图片文件
+    allowed_mime = {"image/jpeg", "image/png", "image/bmp", "image/tiff"}
+    if file.content_type not in allowed_mime:
+        raise HTTPException(
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+            detail=f"不支持的文件类型: {file.content_type}",
+        )
+    # ------------------- ② 保存文件 -------------------
+    # 生成唯一文件名（保留原始后缀，方便调试）
+    suffix = Path(file.filename).suffix.lower()
+    # 如果上传的文件没有后缀，默认使用 .png
+    if not suffix:
+        suffix = ".png"
+    unique_name = f"{uuid.uuid4().hex}{suffix}"
+    tmp_path = TMP_DIR / unique_name
+    try:
+        # 读取全部字节并写入磁盘（使用 async 读取，写入同步即可）
+        contents = await file.read()
+        with open(tmp_path, "wb") as f:
+            f.write(contents)
+        logger.info(f"文件已保存至 {tmp_path}")
+        # 使用示例
+        input_args = RapidTableInput(model_type=ModelType.PPSTRUCTURE_ZH)
+        table_engine = RapidTable(input_args)
+        table_results = table_engine(tmp_path)
+        print(table_results.pred_html)
+        return table_results
+    finally:
+        # ------------------- ⑥ 清理临时文件 -------------------
+        # 为了防止磁盘被塞满，尽量在请求结束后删除文件
+        try:
+            if tmp_path.exists():
+                tmp_path.unlink()
+                logger.debug(f"已删除临时文件 {tmp_path}")
+        except Exception as exc:
+            logger.warning(f"删除临时文件 {tmp_path} 失败: {exc}")
+    return None

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ pandas
 Pillow
 onnxruntime
 rapidocr_onnxruntime
 python-multipart

 Pillow
 onnxruntime
 rapidocr_onnxruntime
+rapid_table
 python-multipart

test.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from openai import OpenAI
+# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
+base_url = "https://maas.hikvision.com.cn/v1"
+api_key = "sk-20f1cbcdae0e4b789ee06923c201d6a8"
+client = OpenAI(base_url=base_url, api_key=api_key)
+response = client.chat.completions.create(
+    model="gpt-oss-120b",
+    messages=[
+        {
+            "role": "user",
+            "content": "what is your model",
+        }
+    ],
+    stream=True,
+)
+for chunk in response:
+    if chunk.choices[0].delta.content is not None:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+    elif chunk.choices[0].finish_reason == "stop":
+        print()
+    else:
+        pass