#from paddleocr import PaddleOCR, PPStructureV3 from paddleocr import PPStructureV3 import base64 import cv2 import numpy as np import uvicorn import math from fastapi import FastAPI, HTTPException from fastapi.responses import PlainTextResponse from fastapi.encoders import jsonable_encoder from pydantic import BaseModel from collections.abc import Mapping, Sequence from dataclasses import is_dataclass, asdict # --- Configuration & Model Loading --- class OcrInput(BaseModel): image_base64: str app = FastAPI( title="PaddleOCR-VL API", description="A custom REST API for PaddleOCR running on Hugging Face Spaces", ) print("**Loading PaddleOCR model...**") ## --- INITIALIZE BOTH ENGINES --- # 1. Standard OCR Engine (for /ocr endpoint) #ocr_engine = PaddleOCR(use_angle_cls=False, lang='en') # 2. Structure Analysis Engine (for /structure endpoint) # We set layout=True and table=True to perform comprehensive structure analysis structure_engine = PPStructureV3(use_doc_orientation_classify=False, use_doc_unwarping=False) print("**Model loaded successfully.**") # --- Helper Function for Image Decoding --- def decode_image(image_base64: str): """Decodes base64 string to a numpy image array.""" img_data = base64.b64decode(image_base64) np_arr = np.frombuffer(img_data, np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) return img def sanitize(obj): if isinstance(obj, float): return obj if math.isfinite(obj) else None if isinstance(obj, list): return [sanitize(v) for v in obj] if isinstance(obj, dict): return {k: sanitize(v) for k, v in obj.items()} return obj def to_jsonable(obj, exclude_keys=("img",), include_img=False, img_mode="shape"): def encode(value): # numpy types if isinstance(value, np.ndarray): return value.tolist() if isinstance(value, np.floating): return float(value) if isinstance(value, np.integer): return int(value) if isinstance(value, np.bool_): return bool(value) # mappings (dict-like) if isinstance(value, Mapping): out = {} for k, v in value.items(): if k in exclude_keys: if include_img and k == "img": out.update(_encode_img(v, img_mode)) continue out[k] = encode(v) return out # sequences (but not str/bytes) if isinstance(value, (list, tuple)): return [encode(v) for v in value] # dataclasses if is_dataclass(value): return encode(asdict(value)) # objects with to_dict() if hasattr(value, "to_dict") and callable(value.to_dict): try: return encode(value.to_dict()) except Exception: pass # objects with __dict__ if hasattr(value, "__dict__"): obj_dict = {k: v for k, v in vars(value).items() if k not in exclude_keys} # handle excluded keys like 'img' for k in exclude_keys: if include_img and k == "img" and hasattr(value, k): obj_dict.update(_encode_img(getattr(value, k), img_mode)) return encode(obj_dict) # leave everything else to FastAPI's encoder (safe primitives, None, etc.) return value def _encode_img(img_value, mode): if mode == "shape" and isinstance(img_value, np.ndarray): return {"img_shape": tuple(img_value.shape)} if mode == "base64" and isinstance(img_value, np.ndarray): ok, buf = cv2.imencode(".png", img_value) if ok: return {"img_base64": base64.b64encode(buf).decode("ascii")} return {} # final pass through jsonable_encoder to normalize remaining primitives return jsonable_encoder(encode(obj), custom_encoder={ np.ndarray: lambda x: x.tolist(), np.integer: int, np.floating: float, np.bool_: bool, }) def clean_ppstructure_result(result, exclude_keys=("img",), include_img=False, img_mode="shape"): if isinstance(result, list): return [clean_ppstructure_result(item, exclude_keys, include_img, img_mode) for item in result] if isinstance(result, dict) or hasattr(result, "__dict__"): items = vars(result).items() if hasattr(result, "__dict__") else result.items() cleaned = {} for key, value in items: if key in exclude_keys: if include_img and key == "img": if img_mode == "shape" and isinstance(value, np.ndarray): cleaned["img_shape"] = value.shape elif img_mode == "base64" and isinstance(value, np.ndarray): ok, buf = cv2.imencode(".png", value) if ok: cleaned["img_base64"] = base64.b64encode(buf).decode("ascii") continue cleaned[key] = clean_ppstructure_result(value, exclude_keys, include_img, img_mode) return cleaned if isinstance(result, tuple) or isinstance(result, set): return [clean_ppstructure_result(v, exclude_keys, include_img, img_mode) for v in result] if isinstance(result, np.ndarray): return result.tolist() if isinstance(result, np.floating): return float(result) if isinstance(result, np.integer): return int(result) if isinstance(result, np.bool_): return bool(result) return result # --- API Endpoints --- @app.get("/") def read_root(): return {"status": "ok", "message": "PaddleOCR-VL API is running from python"} @app.get("/test") def test_endpoint(): return {"message": "Hugging Face - successful GET of /test"} #@app.post("/ocr") #def run_ocr(ocr_input: OcrInput): # """Endpoint for traditional text detection and recognition.""" # try: # print("** Have recieved /ocr request **") # img = decode_image(ocr_input.image_base64) # if img is None: # raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.") # # print("** Request for standard OCR received. Running ocr_engine.**") # raw_result = ocr_engine.ocr(img) # # print("** OCR Complete. converting to JSON **") # json_safe_result = clean_ppstructure_result(raw_result) # print("** Converted to JSON, now returning response.**") # return {"result": json_safe_result} # # except HTTPException: # raise # except Exception as e: # raise HTTPException(status_code=500, detail=f"An error occurred in /ocr: {str(e)}") @app.post("/structure") def run_structure_analysis(ocr_input: OcrInput): """Endpoint for layout analysis, table recognition, and text extraction (PPStructure).""" try: print("** /structure called. Decoding image **") img = decode_image(ocr_input.image_base64) if img is None: raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.") print("** Request for structure analysis received. Running structure_engine.**") raw_result = structure_engine.predict(img) print("!! Start of raw data !!") #print(raw_result) print(type(raw_result[0])) print(type(raw_result[0].get('parsing_res_list'))) print(raw_result[0].get('parsing_res_list')) print(type(raw_result[0].get('parsing_res_list')[0])) print(raw_result[0].get('parsing_res_list')[0]) print("!! End of raw data !!") print("** Structure Analysis Complete. Converting to JSON-safe **") # json_safe_result = clean_ppstructure_result(raw_result) #return {"result": json_safe_result} json_safe_result = sanitize(clean_ppstructure_result(raw_result[0].get('parsing_res_list'))) return {"result": to_jsonable(json_safe_result, exclude_keys=("img",), include_img=False)} except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"An error occurred in /structure: {str(e)}") # --- Run the App --- if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)