HycJack commited on
Commit
7b3952c
·
1 Parent(s): c084450

add ocr_table

Browse files
Files changed (3) hide show
  1. main.py +54 -0
  2. requirements.txt +1 -0
  3. test.py +25 -0
main.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI, File, UploadFile
2
  from PIL import Image
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from rapidocr_onnxruntime import RapidOCR
 
5
  import io
6
  import numpy as np
7
  import pandas as pd
@@ -40,3 +41,56 @@ async def ocr(file: UploadFile = File(...)):
40
  columns=("box", "rec", "score"),
41
  )
42
  return out_df.to_dict(orient='records')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from PIL import Image
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from rapidocr_onnxruntime import RapidOCR
5
+ from rapid_table import ModelType, RapidTable, RapidTableInput, RapidTableOutput
6
  import io
7
  import numpy as np
8
  import pandas as pd
 
41
  columns=("box", "rec", "score"),
42
  )
43
  return out_df.to_dict(orient='records')
44
+
45
+
46
+ @app.post("/ocr_table")
47
+ async def ocr_table(file: UploadFile = File(...)):
48
+ # ------------------- ① 参数校验 -------------------
49
+ if not file.filename:
50
+ raise HTTPException(
51
+ status_code=status.HTTP_400_BAD_REQUEST,
52
+ detail="上传的文件没有文件名",
53
+ )
54
+
55
+ # 只接受常见的图片 MIME 类型,防止恶意上传非图片文件
56
+ allowed_mime = {"image/jpeg", "image/png", "image/bmp", "image/tiff"}
57
+ if file.content_type not in allowed_mime:
58
+ raise HTTPException(
59
+ status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
60
+ detail=f"不支持的文件类型: {file.content_type}",
61
+ )
62
+
63
+ # ------------------- ② 保存文件 -------------------
64
+ # 生成唯一文件名(保留原始后缀,方便调试)
65
+ suffix = Path(file.filename).suffix.lower()
66
+ # 如果上传的文件没有后缀,默认使用 .png
67
+ if not suffix:
68
+ suffix = ".png"
69
+ unique_name = f"{uuid.uuid4().hex}{suffix}"
70
+ tmp_path = TMP_DIR / unique_name
71
+
72
+ try:
73
+ # 读取全部字节并写入磁盘(使用 async 读取,写入同步即可)
74
+ contents = await file.read()
75
+ with open(tmp_path, "wb") as f:
76
+ f.write(contents)
77
+
78
+ logger.info(f"文件已保存至 {tmp_path}")
79
+
80
+ # 使用示例
81
+ input_args = RapidTableInput(model_type=ModelType.PPSTRUCTURE_ZH)
82
+ table_engine = RapidTable(input_args)
83
+ table_results = table_engine(tmp_path)
84
+
85
+ print(table_results.pred_html)
86
+ return table_results
87
+ finally:
88
+ # ------------------- ⑥ 清理临时文件 -------------------
89
+ # 为了防止磁盘被塞满,尽量在请求结束后删除文件
90
+ try:
91
+ if tmp_path.exists():
92
+ tmp_path.unlink()
93
+ logger.debug(f"已删除临时文件 {tmp_path}")
94
+ except Exception as exc:
95
+ logger.warning(f"删除临时文件 {tmp_path} 失败: {exc}")
96
+ return None
requirements.txt CHANGED
@@ -5,4 +5,5 @@ pandas
5
  Pillow
6
  onnxruntime
7
  rapidocr_onnxruntime
 
8
  python-multipart
 
5
  Pillow
6
  onnxruntime
7
  rapidocr_onnxruntime
8
+ rapid_table
9
  python-multipart
test.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
4
+ base_url = "https://maas.hikvision.com.cn/v1"
5
+ api_key = "sk-20f1cbcdae0e4b789ee06923c201d6a8"
6
+
7
+ client = OpenAI(base_url=base_url, api_key=api_key)
8
+ response = client.chat.completions.create(
9
+ model="gpt-oss-120b",
10
+ messages=[
11
+ {
12
+ "role": "user",
13
+ "content": "what is your model",
14
+ }
15
+ ],
16
+ stream=True,
17
+ )
18
+
19
+ for chunk in response:
20
+ if chunk.choices[0].delta.content is not None:
21
+ print(chunk.choices[0].delta.content, end="", flush=True)
22
+ elif chunk.choices[0].finish_reason == "stop":
23
+ print()
24
+ else:
25
+ pass