Spaces:
Sleeping
Sleeping
File size: 8,314 Bytes
cdd0b9c 3d9375d eb3ccdd cc9b893 22270c0 207f627 3d9375d 95aedfb d06eee7 207f627 3d9375d 9eec6a0 735f30a cdd0b9c 735f30a 210c73b d248bb7 9eec6a0 3d9375d 735f30a d043f56 d06eee7 207f627 acd143d 582f181 acd143d aa1faaf acd143d aa1faaf acd143d aa1faaf acd143d aa1faaf acd143d 7253ef8 acd143d 582f181 66fce79 3d9375d dff8ea2 3d9375d 1dffa15 cdd0b9c 735f30a cc9b893 735f30a 22270c0 735f30a cc9b893 22270c0 582f181 6c3fb23 96da143 a6d675a 32e00ae 96da143 09d4b12 6165cf5 6c3fb23 cc9b893 d043f56 d06eee7 c8e5c9f dd5bddb cc9b893 3d9375d cc9b893 3d9375d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | #from paddleocr import PaddleOCR, PPStructureV3
from paddleocr import PPStructureV3
import base64
import cv2
import numpy as np
import uvicorn
import math
from fastapi import FastAPI, HTTPException
from fastapi.responses import PlainTextResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from collections.abc import Mapping, Sequence
from dataclasses import is_dataclass, asdict
# --- Configuration & Model Loading ---
class OcrInput(BaseModel):
image_base64: str
app = FastAPI(
title="PaddleOCR-VL API",
description="A custom REST API for PaddleOCR running on Hugging Face Spaces",
)
print("**Loading PaddleOCR model...**")
## --- INITIALIZE BOTH ENGINES ---
# 1. Standard OCR Engine (for /ocr endpoint)
#ocr_engine = PaddleOCR(use_angle_cls=False, lang='en')
# 2. Structure Analysis Engine (for /structure endpoint)
# We set layout=True and table=True to perform comprehensive structure analysis
structure_engine = PPStructureV3(use_doc_orientation_classify=False, use_doc_unwarping=False)
print("**Model loaded successfully.**")
# --- Helper Function for Image Decoding ---
def decode_image(image_base64: str):
"""Decodes base64 string to a numpy image array."""
img_data = base64.b64decode(image_base64)
np_arr = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
return img
def sanitize(obj):
if isinstance(obj, float):
return obj if math.isfinite(obj) else None
if isinstance(obj, list):
return [sanitize(v) for v in obj]
if isinstance(obj, dict):
return {k: sanitize(v) for k, v in obj.items()}
return obj
def to_jsonable(obj, exclude_keys=("img",), include_img=False, img_mode="shape"):
def encode(value):
# numpy types
if isinstance(value, np.ndarray):
return value.tolist()
if isinstance(value, np.floating):
return float(value)
if isinstance(value, np.integer):
return int(value)
if isinstance(value, np.bool_):
return bool(value)
# mappings (dict-like)
if isinstance(value, Mapping):
out = {}
for k, v in value.items():
if k in exclude_keys:
if include_img and k == "img":
out.update(_encode_img(v, img_mode))
continue
out[k] = encode(v)
return out
# sequences (but not str/bytes)
if isinstance(value, (list, tuple)):
return [encode(v) for v in value]
# dataclasses
if is_dataclass(value):
return encode(asdict(value))
# objects with to_dict()
if hasattr(value, "to_dict") and callable(value.to_dict):
try:
return encode(value.to_dict())
except Exception:
pass
# objects with __dict__
if hasattr(value, "__dict__"):
obj_dict = {k: v for k, v in vars(value).items() if k not in exclude_keys}
# handle excluded keys like 'img'
for k in exclude_keys:
if include_img and k == "img" and hasattr(value, k):
obj_dict.update(_encode_img(getattr(value, k), img_mode))
return encode(obj_dict)
# leave everything else to FastAPI's encoder (safe primitives, None, etc.)
return value
def _encode_img(img_value, mode):
if mode == "shape" and isinstance(img_value, np.ndarray):
return {"img_shape": tuple(img_value.shape)}
if mode == "base64" and isinstance(img_value, np.ndarray):
ok, buf = cv2.imencode(".png", img_value)
if ok:
return {"img_base64": base64.b64encode(buf).decode("ascii")}
return {}
# final pass through jsonable_encoder to normalize remaining primitives
return jsonable_encoder(encode(obj), custom_encoder={
np.ndarray: lambda x: x.tolist(),
np.integer: int,
np.floating: float,
np.bool_: bool,
})
def clean_ppstructure_result(result, exclude_keys=("img",), include_img=False, img_mode="shape"):
if isinstance(result, list):
return [clean_ppstructure_result(item, exclude_keys, include_img, img_mode) for item in result]
if isinstance(result, dict) or hasattr(result, "__dict__"):
items = vars(result).items() if hasattr(result, "__dict__") else result.items()
cleaned = {}
for key, value in items:
if key in exclude_keys:
if include_img and key == "img":
if img_mode == "shape" and isinstance(value, np.ndarray):
cleaned["img_shape"] = value.shape
elif img_mode == "base64" and isinstance(value, np.ndarray):
ok, buf = cv2.imencode(".png", value)
if ok:
cleaned["img_base64"] = base64.b64encode(buf).decode("ascii")
continue
cleaned[key] = clean_ppstructure_result(value, exclude_keys, include_img, img_mode)
return cleaned
if isinstance(result, tuple) or isinstance(result, set):
return [clean_ppstructure_result(v, exclude_keys, include_img, img_mode) for v in result]
if isinstance(result, np.ndarray):
return result.tolist()
if isinstance(result, np.floating):
return float(result)
if isinstance(result, np.integer):
return int(result)
if isinstance(result, np.bool_):
return bool(result)
return result
# --- API Endpoints ---
@app.get("/")
def read_root():
return {"status": "ok", "message": "PaddleOCR-VL API is running from python"}
@app.get("/test")
def test_endpoint():
return {"message": "Hugging Face - successful GET of /test"}
#@app.post("/ocr")
#def run_ocr(ocr_input: OcrInput):
# """Endpoint for traditional text detection and recognition."""
# try:
# print("** Have recieved /ocr request **")
# img = decode_image(ocr_input.image_base64)
# if img is None:
# raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")
#
# print("** Request for standard OCR received. Running ocr_engine.**")
# raw_result = ocr_engine.ocr(img)
#
# print("** OCR Complete. converting to JSON **")
# json_safe_result = clean_ppstructure_result(raw_result)
# print("** Converted to JSON, now returning response.**")
# return {"result": json_safe_result}
#
# except HTTPException:
# raise
# except Exception as e:
# raise HTTPException(status_code=500, detail=f"An error occurred in /ocr: {str(e)}")
@app.post("/structure")
def run_structure_analysis(ocr_input: OcrInput):
"""Endpoint for layout analysis, table recognition, and text extraction (PPStructure)."""
try:
print("** /structure called. Decoding image **")
img = decode_image(ocr_input.image_base64)
if img is None:
raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")
print("** Request for structure analysis received. Running structure_engine.**")
raw_result = structure_engine.predict(img)
print("!! Start of raw data !!")
#print(raw_result)
print(type(raw_result[0]))
print(type(raw_result[0].get('parsing_res_list')))
print(raw_result[0].get('parsing_res_list'))
print(type(raw_result[0].get('parsing_res_list')[0]))
print(raw_result[0].get('parsing_res_list')[0])
print("!! End of raw data !!")
print("** Structure Analysis Complete. Converting to JSON-safe **")
# json_safe_result = clean_ppstructure_result(raw_result)
#return {"result": json_safe_result}
json_safe_result = sanitize(clean_ppstructure_result(raw_result[0].get('parsing_res_list')))
return {"result": to_jsonable(json_safe_result, exclude_keys=("img",), include_img=False)}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"An error occurred in /structure: {str(e)}")
# --- Run the App ---
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860) |