Spaces:
Sleeping
Sleeping
| #from paddleocr import PaddleOCR, PPStructureV3 | |
| from paddleocr import PPStructureV3 | |
| import base64 | |
| import cv2 | |
| import numpy as np | |
| import uvicorn | |
| import math | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import PlainTextResponse | |
| from fastapi.encoders import jsonable_encoder | |
| from pydantic import BaseModel | |
| from collections.abc import Mapping, Sequence | |
| from dataclasses import is_dataclass, asdict | |
| # --- Configuration & Model Loading --- | |
| class OcrInput(BaseModel): | |
| image_base64: str | |
| app = FastAPI( | |
| title="PaddleOCR-VL API", | |
| description="A custom REST API for PaddleOCR running on Hugging Face Spaces", | |
| ) | |
| print("**Loading PaddleOCR model...**") | |
| ## --- INITIALIZE BOTH ENGINES --- | |
| # 1. Standard OCR Engine (for /ocr endpoint) | |
| #ocr_engine = PaddleOCR(use_angle_cls=False, lang='en') | |
| # 2. Structure Analysis Engine (for /structure endpoint) | |
| # We set layout=True and table=True to perform comprehensive structure analysis | |
| structure_engine = PPStructureV3(use_doc_orientation_classify=False, use_doc_unwarping=False) | |
| print("**Model loaded successfully.**") | |
| # --- Helper Function for Image Decoding --- | |
| def decode_image(image_base64: str): | |
| """Decodes base64 string to a numpy image array.""" | |
| img_data = base64.b64decode(image_base64) | |
| np_arr = np.frombuffer(img_data, np.uint8) | |
| img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) | |
| return img | |
| def sanitize(obj): | |
| if isinstance(obj, float): | |
| return obj if math.isfinite(obj) else None | |
| if isinstance(obj, list): | |
| return [sanitize(v) for v in obj] | |
| if isinstance(obj, dict): | |
| return {k: sanitize(v) for k, v in obj.items()} | |
| return obj | |
| def to_jsonable(obj, exclude_keys=("img",), include_img=False, img_mode="shape"): | |
| def encode(value): | |
| # numpy types | |
| if isinstance(value, np.ndarray): | |
| return value.tolist() | |
| if isinstance(value, np.floating): | |
| return float(value) | |
| if isinstance(value, np.integer): | |
| return int(value) | |
| if isinstance(value, np.bool_): | |
| return bool(value) | |
| # mappings (dict-like) | |
| if isinstance(value, Mapping): | |
| out = {} | |
| for k, v in value.items(): | |
| if k in exclude_keys: | |
| if include_img and k == "img": | |
| out.update(_encode_img(v, img_mode)) | |
| continue | |
| out[k] = encode(v) | |
| return out | |
| # sequences (but not str/bytes) | |
| if isinstance(value, (list, tuple)): | |
| return [encode(v) for v in value] | |
| # dataclasses | |
| if is_dataclass(value): | |
| return encode(asdict(value)) | |
| # objects with to_dict() | |
| if hasattr(value, "to_dict") and callable(value.to_dict): | |
| try: | |
| return encode(value.to_dict()) | |
| except Exception: | |
| pass | |
| # objects with __dict__ | |
| if hasattr(value, "__dict__"): | |
| obj_dict = {k: v for k, v in vars(value).items() if k not in exclude_keys} | |
| # handle excluded keys like 'img' | |
| for k in exclude_keys: | |
| if include_img and k == "img" and hasattr(value, k): | |
| obj_dict.update(_encode_img(getattr(value, k), img_mode)) | |
| return encode(obj_dict) | |
| # leave everything else to FastAPI's encoder (safe primitives, None, etc.) | |
| return value | |
| def _encode_img(img_value, mode): | |
| if mode == "shape" and isinstance(img_value, np.ndarray): | |
| return {"img_shape": tuple(img_value.shape)} | |
| if mode == "base64" and isinstance(img_value, np.ndarray): | |
| ok, buf = cv2.imencode(".png", img_value) | |
| if ok: | |
| return {"img_base64": base64.b64encode(buf).decode("ascii")} | |
| return {} | |
| # final pass through jsonable_encoder to normalize remaining primitives | |
| return jsonable_encoder(encode(obj), custom_encoder={ | |
| np.ndarray: lambda x: x.tolist(), | |
| np.integer: int, | |
| np.floating: float, | |
| np.bool_: bool, | |
| }) | |
| def clean_ppstructure_result(result, exclude_keys=("img",), include_img=False, img_mode="shape"): | |
| if isinstance(result, list): | |
| return [clean_ppstructure_result(item, exclude_keys, include_img, img_mode) for item in result] | |
| if isinstance(result, dict) or hasattr(result, "__dict__"): | |
| items = vars(result).items() if hasattr(result, "__dict__") else result.items() | |
| cleaned = {} | |
| for key, value in items: | |
| if key in exclude_keys: | |
| if include_img and key == "img": | |
| if img_mode == "shape" and isinstance(value, np.ndarray): | |
| cleaned["img_shape"] = value.shape | |
| elif img_mode == "base64" and isinstance(value, np.ndarray): | |
| ok, buf = cv2.imencode(".png", value) | |
| if ok: | |
| cleaned["img_base64"] = base64.b64encode(buf).decode("ascii") | |
| continue | |
| cleaned[key] = clean_ppstructure_result(value, exclude_keys, include_img, img_mode) | |
| return cleaned | |
| if isinstance(result, tuple) or isinstance(result, set): | |
| return [clean_ppstructure_result(v, exclude_keys, include_img, img_mode) for v in result] | |
| if isinstance(result, np.ndarray): | |
| return result.tolist() | |
| if isinstance(result, np.floating): | |
| return float(result) | |
| if isinstance(result, np.integer): | |
| return int(result) | |
| if isinstance(result, np.bool_): | |
| return bool(result) | |
| return result | |
| # --- API Endpoints --- | |
| def read_root(): | |
| return {"status": "ok", "message": "PaddleOCR-VL API is running from python"} | |
| def test_endpoint(): | |
| return {"message": "Hugging Face - successful GET of /test"} | |
| #@app.post("/ocr") | |
| #def run_ocr(ocr_input: OcrInput): | |
| # """Endpoint for traditional text detection and recognition.""" | |
| # try: | |
| # print("** Have recieved /ocr request **") | |
| # img = decode_image(ocr_input.image_base64) | |
| # if img is None: | |
| # raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.") | |
| # | |
| # print("** Request for standard OCR received. Running ocr_engine.**") | |
| # raw_result = ocr_engine.ocr(img) | |
| # | |
| # print("** OCR Complete. converting to JSON **") | |
| # json_safe_result = clean_ppstructure_result(raw_result) | |
| # print("** Converted to JSON, now returning response.**") | |
| # return {"result": json_safe_result} | |
| # | |
| # except HTTPException: | |
| # raise | |
| # except Exception as e: | |
| # raise HTTPException(status_code=500, detail=f"An error occurred in /ocr: {str(e)}") | |
| def run_structure_analysis(ocr_input: OcrInput): | |
| """Endpoint for layout analysis, table recognition, and text extraction (PPStructure).""" | |
| try: | |
| print("** /structure called. Decoding image **") | |
| img = decode_image(ocr_input.image_base64) | |
| if img is None: | |
| raise HTTPException(status_code=400, detail="Invalid image data. Could not decode.") | |
| print("** Request for structure analysis received. Running structure_engine.**") | |
| raw_result = structure_engine.predict(img) | |
| print("!! Start of raw data !!") | |
| #print(raw_result) | |
| print(type(raw_result[0])) | |
| print(type(raw_result[0].get('parsing_res_list'))) | |
| print(raw_result[0].get('parsing_res_list')) | |
| print(type(raw_result[0].get('parsing_res_list')[0])) | |
| print(raw_result[0].get('parsing_res_list')[0]) | |
| print("!! End of raw data !!") | |
| print("** Structure Analysis Complete. Converting to JSON-safe **") | |
| # json_safe_result = clean_ppstructure_result(raw_result) | |
| #return {"result": json_safe_result} | |
| json_safe_result = sanitize(clean_ppstructure_result(raw_result[0].get('parsing_res_list'))) | |
| return {"result": to_jsonable(json_safe_result, exclude_keys=("img",), include_img=False)} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"An error occurred in /structure: {str(e)}") | |
| # --- Run the App --- | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |