paddleocrvl / app.py
ahd75's picture
Update app.py
cdd0b9c verified
#from paddleocr import PaddleOCR, PPStructureV3
from paddleocr import PPStructureV3
import base64
import cv2
import numpy as np
import uvicorn
import math
from fastapi import FastAPI, HTTPException
from fastapi.responses import PlainTextResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel
from collections.abc import Mapping, Sequence
from dataclasses import is_dataclass, asdict
# --- Configuration & Model Loading ---
class OcrInput(BaseModel):
image_base64: str
app = FastAPI(
title="PaddleOCR-VL API",
description="A custom REST API for PaddleOCR running on Hugging Face Spaces",
)
print("**Loading PaddleOCR model...**")
## --- INITIALIZE BOTH ENGINES ---
# 1. Standard OCR Engine (for /ocr endpoint)
#ocr_engine = PaddleOCR(use_angle_cls=False, lang='en')
# 2. Structure Analysis Engine (for /structure endpoint)
# We set layout=True and table=True to perform comprehensive structure analysis
structure_engine = PPStructureV3(use_doc_orientation_classify=False, use_doc_unwarping=False)
print("**Model loaded successfully.**")
# --- Helper Function for Image Decoding ---
def decode_image(image_base64: str):
"""Decodes base64 string to a numpy image array."""
img_data = base64.b64decode(image_base64)
np_arr = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
return img
def sanitize(obj):
if isinstance(obj, float):
return obj if math.isfinite(obj) else None
if isinstance(obj, list):
return [sanitize(v) for v in obj]
if isinstance(obj, dict):
return {k: sanitize(v) for k, v in obj.items()}
return obj
def to_jsonable(obj, exclude_keys=("img",), include_img=False, img_mode="shape"):
def encode(value):
# numpy types
if isinstance(value, np.ndarray):
return value.tolist()
if isinstance(value, np.floating):
return float(value)
if isinstance(value, np.integer):
return int(value)
if isinstance(value, np.bool_):
return bool(value)
# mappings (dict-like)
if isinstance(value, Mapping):
out = {}
for k, v in value.items():
if k in exclude_keys:
if include_img and k == "img":
out.update(_encode_img(v, img_mode))
continue
out[k] = encode(v)
return out
# sequences (but not str/bytes)
if isinstance(value, (list, tuple)):
return [encode(v) for v in value]
# dataclasses
if is_dataclass(value):
return encode(asdict(value))
# objects with to_dict()
if hasattr(value, "to_dict") and callable(value.to_dict):
try:
return encode(value.to_dict())
except Exception:
pass
# objects with __dict__
if hasattr(value, "__dict__"):
obj_dict = {k: v for k, v in vars(value).items() if k not in exclude_keys}
# handle excluded keys like 'img'
for k in exclude_keys:
if include_img and k == "img" and hasattr(value, k):
obj_dict.update(_encode_img(getattr(value, k), img_mode))
return encode(obj_dict)
# leave everything else to FastAPI's encoder (safe primitives, None, etc.)
return value
def _encode_img(img_value, mode):
if mode == "shape" and isinstance(img_value, np.ndarray):
return {"img_shape": tuple(img_value.shape)}
if mode == "base64" and isinstance(img_value, np.ndarray):
ok, buf = cv2.imencode(".png", img_value)
if ok:
return {"img_base64": base64.b64encode(buf).decode("ascii")}
return {}
# final pass through jsonable_encoder to normalize remaining primitives
return jsonable_encoder(encode(obj), custom_encoder={
np.ndarray: lambda x: x.tolist(),
np.integer: int,
np.floating: float,
np.bool_: bool,
})
def clean_ppstructure_result(result, exclude_keys=("img",), include_img=False, img_mode="shape"):
if isinstance(result, list):
return [clean_ppstructure_result(item, exclude_keys, include_img, img_mode) for item in result]
if isinstance(result, dict) or hasattr(result, "__dict__"):
items = vars(result).items() if hasattr(result, "__dict__") else result.items()
cleaned = {}
for key, value in items:
if key in exclude_keys:
if include_img and key == "img":
if img_mode == "shape" and isinstance(value, np.ndarray):
cleaned["img_shape"] = value.shape
elif img_mode == "base64" and isinstance(value, np.ndarray):
ok, buf = cv2.imencode(".png", value)
if ok:
cleaned["img_base64"] = base64.b64encode(buf).decode("ascii")
continue
cleaned[key] = clean_ppstructure_result(value, exclude_keys, include_img, img_mode)
return cleaned
if isinstance(result, tuple) or isinstance(result, set):
return [clean_ppstructure_result(v, exclude_keys, include_img, img_mode) for v in result]
if isinstance(result, np.ndarray):
return result.tolist()
if isinstance(result, np.floating):
return float(result)
if isinstance(result, np.integer):
return int(result)
if isinstance(result, np.bool_):
return bool(result)
return result
# --- API Endpoints ---
@app.get("/")
def read_root():
return {"status": "ok", "message": "PaddleOCR-VL API is running from python"}
@app.get("/test")
def test_endpoint():
return {"message": "Hugging Face - successful GET of /test"}
#@app.post("/ocr")
#def run_ocr(ocr_input: OcrInput):
# """Endpoint for traditional text detection and recognition."""
# try:
# print("** Have recieved /ocr request **")
# img = decode_image(ocr_input.image_base64)
# if img is None:
# raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")
#
# print("** Request for standard OCR received. Running ocr_engine.**")
# raw_result = ocr_engine.ocr(img)
#
# print("** OCR Complete. converting to JSON **")
# json_safe_result = clean_ppstructure_result(raw_result)
# print("** Converted to JSON, now returning response.**")
# return {"result": json_safe_result}
#
# except HTTPException:
# raise
# except Exception as e:
# raise HTTPException(status_code=500, detail=f"An error occurred in /ocr: {str(e)}")
@app.post("/structure")
def run_structure_analysis(ocr_input: OcrInput):
"""Endpoint for layout analysis, table recognition, and text extraction (PPStructure)."""
try:
print("** /structure called. Decoding image **")
img = decode_image(ocr_input.image_base64)
if img is None:
raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")
print("** Request for structure analysis received. Running structure_engine.**")
raw_result = structure_engine.predict(img)
print("!! Start of raw data !!")
#print(raw_result)
print(type(raw_result[0]))
print(type(raw_result[0].get('parsing_res_list')))
print(raw_result[0].get('parsing_res_list'))
print(type(raw_result[0].get('parsing_res_list')[0]))
print(raw_result[0].get('parsing_res_list')[0])
print("!! End of raw data !!")
print("** Structure Analysis Complete. Converting to JSON-safe **")
# json_safe_result = clean_ppstructure_result(raw_result)
#return {"result": json_safe_result}
json_safe_result = sanitize(clean_ppstructure_result(raw_result[0].get('parsing_res_list')))
return {"result": to_jsonable(json_safe_result, exclude_keys=("img",), include_img=False)}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"An error occurred in /structure: {str(e)}")
# --- Run the App ---
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)