Spaces:

ahd75
/

paddleocrvl

Sleeping

File size: 8,314 Bytes

cdd0b9c
 
3d9375d
 
 
 
eb3ccdd
cc9b893
22270c0
207f627
3d9375d
95aedfb
d06eee7
 
207f627
3d9375d
 
 
 
 
 
 
 
 
 
9eec6a0
735f30a
 
cdd0b9c
735f30a
 
 
210c73b
d248bb7
9eec6a0
3d9375d
735f30a
 
 
 
 
 
 
 
d043f56
 
 
 
 
 
 
 
d06eee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207f627
acd143d
582f181
acd143d
 
 
 
 
aa1faaf
acd143d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa1faaf
acd143d
aa1faaf
acd143d
aa1faaf
acd143d
7253ef8
acd143d
 
582f181
66fce79
3d9375d
 
 
 
dff8ea2
3d9375d
1dffa15
 
 
 
cdd0b9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735f30a
cc9b893
735f30a
 
 
22270c0
735f30a
 
cc9b893
22270c0
 
582f181
6c3fb23
96da143
a6d675a
32e00ae
96da143
09d4b12
 
6165cf5
6c3fb23
cc9b893
d043f56
d06eee7
c8e5c9f
dd5bddb
cc9b893
 
 
3d9375d
cc9b893
3d9375d

#from paddleocr import PaddleOCR, PPStructureV3
from paddleocr import PPStructureV3
import base64
import cv2
import numpy as np
import uvicorn
import math
from fastapi import FastAPI, HTTPException
from fastapi.responses import PlainTextResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel

from collections.abc import Mapping, Sequence
from dataclasses import is_dataclass, asdict

# --- Configuration & Model Loading ---

class OcrInput(BaseModel):
    image_base64: str

app = FastAPI(
    title="PaddleOCR-VL API",
    description="A custom REST API for PaddleOCR running on Hugging Face Spaces",
)

print("**Loading PaddleOCR model...**")
## --- INITIALIZE BOTH ENGINES ---
# 1. Standard OCR Engine (for /ocr endpoint)
#ocr_engine = PaddleOCR(use_angle_cls=False, lang='en')

# 2. Structure Analysis Engine (for /structure endpoint)
# We set layout=True and table=True to perform comprehensive structure analysis
structure_engine = PPStructureV3(use_doc_orientation_classify=False, use_doc_unwarping=False)

print("**Model loaded successfully.**")

# --- Helper Function for Image Decoding ---
def decode_image(image_base64: str):
    """Decodes base64 string to a numpy image array."""
    img_data = base64.b64decode(image_base64)
    np_arr = np.frombuffer(img_data, np.uint8)
    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
    return img

def sanitize(obj):
    if isinstance(obj, float):
        return obj if math.isfinite(obj) else None
    if isinstance(obj, list):
        return [sanitize(v) for v in obj]
    if isinstance(obj, dict):
        return {k: sanitize(v) for k, v in obj.items()}
    return obj

def to_jsonable(obj, exclude_keys=("img",), include_img=False, img_mode="shape"):
    def encode(value):
        # numpy types
        if isinstance(value, np.ndarray):
            return value.tolist()
        if isinstance(value, np.floating):
            return float(value)
        if isinstance(value, np.integer):
            return int(value)
        if isinstance(value, np.bool_):
            return bool(value)

        # mappings (dict-like)
        if isinstance(value, Mapping):
            out = {}
            for k, v in value.items():
                if k in exclude_keys:
                    if include_img and k == "img":
                        out.update(_encode_img(v, img_mode))
                    continue
                out[k] = encode(v)
            return out

        # sequences (but not str/bytes)
        if isinstance(value, (list, tuple)):
            return [encode(v) for v in value]

        # dataclasses
        if is_dataclass(value):
            return encode(asdict(value))

        # objects with to_dict()
        if hasattr(value, "to_dict") and callable(value.to_dict):
            try:
                return encode(value.to_dict())
            except Exception:
                pass

        # objects with __dict__
        if hasattr(value, "__dict__"):
            obj_dict = {k: v for k, v in vars(value).items() if k not in exclude_keys}
            # handle excluded keys like 'img'
            for k in exclude_keys:
                if include_img and k == "img" and hasattr(value, k):
                    obj_dict.update(_encode_img(getattr(value, k), img_mode))
            return encode(obj_dict)

        # leave everything else to FastAPI's encoder (safe primitives, None, etc.)
        return value

    def _encode_img(img_value, mode):
        if mode == "shape" and isinstance(img_value, np.ndarray):
            return {"img_shape": tuple(img_value.shape)}
        if mode == "base64" and isinstance(img_value, np.ndarray):
            ok, buf = cv2.imencode(".png", img_value)
            if ok:
                return {"img_base64": base64.b64encode(buf).decode("ascii")}
        return {}

    # final pass through jsonable_encoder to normalize remaining primitives
    return jsonable_encoder(encode(obj), custom_encoder={
        np.ndarray: lambda x: x.tolist(),
        np.integer: int,
        np.floating: float,
        np.bool_: bool,
    })
    
def clean_ppstructure_result(result, exclude_keys=("img",), include_img=False, img_mode="shape"):
    if isinstance(result, list):
        return [clean_ppstructure_result(item, exclude_keys, include_img, img_mode) for item in result]

    if isinstance(result, dict) or hasattr(result, "__dict__"):
        items = vars(result).items() if hasattr(result, "__dict__") else result.items()
        cleaned = {}
        for key, value in items:
            if key in exclude_keys:
                if include_img and key == "img":
                    if img_mode == "shape" and isinstance(value, np.ndarray):
                        cleaned["img_shape"] = value.shape
                    elif img_mode == "base64" and isinstance(value, np.ndarray):
                        ok, buf = cv2.imencode(".png", value)
                        if ok:
                            cleaned["img_base64"] = base64.b64encode(buf).decode("ascii")
                continue
            cleaned[key] = clean_ppstructure_result(value, exclude_keys, include_img, img_mode)
        return cleaned

    if isinstance(result, tuple) or isinstance(result, set):
        return [clean_ppstructure_result(v, exclude_keys, include_img, img_mode) for v in result]

    if isinstance(result, np.ndarray):
        return result.tolist()
    if isinstance(result, np.floating):
        return float(result)
    if isinstance(result, np.integer):
        return int(result)
    if isinstance(result, np.bool_):
        return bool(result)

    return result


# --- API Endpoints ---

@app.get("/")
def read_root():
    return {"status": "ok", "message": "PaddleOCR-VL API is running from python"}

@app.get("/test")
def test_endpoint():
    return {"message": "Hugging Face - successful GET of /test"}

#@app.post("/ocr")
#def run_ocr(ocr_input: OcrInput):
#   """Endpoint for traditional text detection and recognition."""
#    try:
#        print("** Have recieved /ocr request **")
#        img = decode_image(ocr_input.image_base64)
#        if img is None:
 #           raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")
#
#        print("** Request for standard OCR received. Running ocr_engine.**")
#        raw_result = ocr_engine.ocr(img)
#
#        print("** OCR Complete. converting to JSON **")
#        json_safe_result = clean_ppstructure_result(raw_result)
#        print("** Converted to JSON, now returning response.**")
#        return {"result": json_safe_result}
#
#    except HTTPException:
#        raise
#    except Exception as e:
#        raise HTTPException(status_code=500, detail=f"An error occurred in /ocr: {str(e)}")
        
@app.post("/structure")
def run_structure_analysis(ocr_input: OcrInput):
    """Endpoint for layout analysis, table recognition, and text extraction (PPStructure)."""
    try:
        print("** /structure called. Decoding image **")
        img = decode_image(ocr_input.image_base64)
        if img is None:
            raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.")

        print("** Request for structure analysis received. Running structure_engine.**")
        raw_result = structure_engine.predict(img)
        print("!! Start of raw data !!")
        #print(raw_result)
        print(type(raw_result[0]))
        print(type(raw_result[0].get('parsing_res_list')))
        print(raw_result[0].get('parsing_res_list'))
        print(type(raw_result[0].get('parsing_res_list')[0]))
        print(raw_result[0].get('parsing_res_list')[0])
        
        print("!! End of raw data !!")
        print("** Structure Analysis Complete. Converting to JSON-safe **")
        # json_safe_result = clean_ppstructure_result(raw_result)
        #return {"result": json_safe_result}
        json_safe_result = sanitize(clean_ppstructure_result(raw_result[0].get('parsing_res_list')))
        return {"result": to_jsonable(json_safe_result, exclude_keys=("img",), include_img=False)}

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred in /structure: {str(e)}")
# --- Run the App ---

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)