Spaces:
Sleeping
Sleeping
| """ | |
| PaddleOCR-VL-1.5 Bridge Server (HF Spaces Edition) | |
| ==================================================== | |
| Deploys on Hugging Face Spaces as a FastAPI app. | |
| Connects to vLLM Docker running on your GPU server. | |
| Architecture: | |
| Gradio App (another HF Space or any client) | |
| | | |
| This HF Space (Bridge, port 7860) | |
| | | |
| Your GPU Server (vLLM Docker, 117.54.141.62:8000) | |
| HF Space Settings β Variables and secrets: | |
| VLLM_SERVER_URL = http://117.54.141.62:8000/v1 | |
| API_KEY = (optional, for auth) | |
| Your GPU Server: | |
| docker run --rm --gpus all -p 8000:8000 -v ~/.cache/paddleocr:/root/.cache ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleocr-genai-vllm-server:latest-nvidia-gpu paddleocr genai_server --model_name PaddleOCR-VL-1.5-0.9B --host 0.0.0.0 --port 8000 --backend vllm | |
| Gradio App HF Space env: | |
| API_URL = https://<your-bridge-space>.hf.space/api/ocr | |
| """ | |
| import base64 | |
| import json | |
| import os | |
| import tempfile | |
| import traceback | |
| from typing import Any, Dict, Optional | |
| import uvicorn | |
| from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from openai import OpenAI | |
| # ============================================================================= | |
| # Configuration | |
| # ============================================================================= | |
| VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1") | |
| VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B") | |
| BRIDGE_PORT = int(os.environ.get("PORT", "7860")) # HF Spaces default port | |
| API_KEY = os.environ.get("API_KEY", "") | |
| # ============================================================================= | |
| # Initialize OpenAI client (for element-level recognition) | |
| # ============================================================================= | |
| openai_client = OpenAI( | |
| api_key="EMPTY", | |
| base_url=VLLM_SERVER_URL, | |
| timeout=600 | |
| ) | |
| # ============================================================================= | |
| # PaddleOCR pipeline (for full document parsing with layout detection) | |
| # ============================================================================= | |
| pipeline = None | |
| def get_pipeline(): | |
| """Lazy-load the PaddleOCR pipeline.""" | |
| global pipeline | |
| if pipeline is None: | |
| from paddleocr import PaddleOCRVL | |
| pipeline = PaddleOCRVL( | |
| vl_rec_backend="vllm-server", | |
| vl_rec_server_url=VLLM_SERVER_URL | |
| ) | |
| return pipeline | |
| # ============================================================================= | |
| # FastAPI App | |
| # ============================================================================= | |
| app = FastAPI( | |
| title="PaddleOCR-VL-1.5 Bridge API", | |
| description="Full document parsing API β bridge between Gradio UI and vLLM server", | |
| version="1.0.0" | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============================================================================= | |
| # Auth | |
| # ============================================================================= | |
| def verify_auth(authorization: Optional[str] = None): | |
| if API_KEY and API_KEY.strip(): | |
| if not authorization or authorization != f"Bearer {API_KEY}": | |
| raise HTTPException(status_code=401, detail="Unauthorized") | |
| # ============================================================================= | |
| # Helpers | |
| # ============================================================================= | |
| TASK_PROMPTS = { | |
| "ocr": "OCR:", | |
| "formula": "Formula Recognition:", | |
| "table": "Table Recognition:", | |
| "chart": "Chart Recognition:", | |
| "spotting": "Spotting:", | |
| "seal": "Seal Recognition:", | |
| } | |
| def save_temp_image(file_data: str) -> str: | |
| """Save base64 or URL image to temp file.""" | |
| if file_data.startswith(("http://", "https://")): | |
| import requests as req | |
| resp = req.get(file_data, timeout=120) | |
| resp.raise_for_status() | |
| content = resp.content | |
| ct = resp.headers.get("content-type", "image/png") | |
| ext = ".png" | |
| if "jpeg" in ct or "jpg" in ct: | |
| ext = ".jpg" | |
| elif "webp" in ct: | |
| ext = ".webp" | |
| elif "bmp" in ct: | |
| ext = ".bmp" | |
| else: | |
| content = base64.b64decode(file_data) | |
| ext = ".png" | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext) | |
| tmp.write(content) | |
| tmp.close() | |
| return tmp.name | |
| def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]: | |
| """Element-level recognition via direct vLLM call.""" | |
| if file_data.startswith(("http://", "https://")): | |
| image_url = file_data | |
| else: | |
| image_url = f"data:image/png;base64,{file_data}" | |
| task_prompt = TASK_PROMPTS.get(prompt_label, "OCR:") | |
| response = openai_client.chat.completions.create( | |
| model=VLLM_MODEL_NAME, | |
| messages=[{ | |
| "role": "user", | |
| "content": [ | |
| {"type": "image_url", "image_url": {"url": image_url}}, | |
| {"type": "text", "text": task_prompt} | |
| ] | |
| }], | |
| temperature=0.0 | |
| ) | |
| result_text = response.choices[0].message.content | |
| return { | |
| "errorCode": 0, | |
| "result": { | |
| "layoutParsingResults": [{ | |
| "markdown": {"text": result_text, "images": {}}, | |
| "outputImages": {}, | |
| "prunedResult": { | |
| "spotting_res": _parse_spotting(result_text) if prompt_label == "spotting" else {} | |
| } | |
| }] | |
| } | |
| } | |
| def full_document_parsing(file_data: str, use_chart_recognition: bool = False, | |
| use_doc_unwarping: bool = True, | |
| use_doc_orientation_classify: bool = True) -> Dict[str, Any]: | |
| """Full document parsing with layout detection + VLM recognition.""" | |
| tmp_path = save_temp_image(file_data) | |
| try: | |
| pipe = get_pipeline() | |
| output = pipe.predict(tmp_path) | |
| results = [] | |
| for i, res in enumerate(output): | |
| output_dir = tempfile.mkdtemp() | |
| res.save_to_json(save_path=output_dir) | |
| res.save_to_markdown(save_path=output_dir) | |
| md_text = "" | |
| md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")] | |
| if md_files: | |
| with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f: | |
| md_text = f.read() | |
| json_data = {} | |
| json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")] | |
| if json_files: | |
| with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f: | |
| json_data = json.load(f) | |
| results.append({ | |
| "markdown": {"text": md_text, "images": {}}, | |
| "outputImages": {}, | |
| "jsonData": json_data | |
| }) | |
| return { | |
| "errorCode": 0, | |
| "result": { | |
| "layoutParsingResults": results if results else [{ | |
| "markdown": {"text": "", "images": {}}, | |
| "outputImages": {} | |
| }] | |
| } | |
| } | |
| finally: | |
| if os.path.exists(tmp_path): | |
| os.unlink(tmp_path) | |
| def _parse_spotting(text: str) -> dict: | |
| try: | |
| return json.loads(text) | |
| except (json.JSONDecodeError, TypeError): | |
| return {"raw_text": text} | |
| # ============================================================================= | |
| # Endpoints | |
| # ============================================================================= | |
| async def root(): | |
| return { | |
| "service": "PaddleOCR-VL-1.5 Bridge API", | |
| "status": "running", | |
| "endpoints": ["/health", "/api/ocr", "/api/parse", "/api/parse/markdown", "/v1/chat/completions", "/docs"] | |
| } | |
| async def health(): | |
| return {"status": "ok", "model": VLLM_MODEL_NAME, "vllm_url": VLLM_SERVER_URL} | |
| async def ocr_endpoint(request: Request, authorization: Optional[str] = Header(None)): | |
| """ | |
| Main OCR endpoint β compatible with the Gradio app. | |
| Body: | |
| { | |
| "file": "base64_or_url", | |
| "useLayoutDetection": true/false, | |
| "promptLabel": "ocr|formula|table|chart|spotting|seal", | |
| "useChartRecognition": false, | |
| "useDocUnwarping": true, | |
| "useDocOrientationClassify": true | |
| } | |
| """ | |
| verify_auth(authorization) | |
| try: | |
| body = await request.json() | |
| except Exception: | |
| raise HTTPException(status_code=400, detail="Invalid JSON body") | |
| file_data = body.get("file", "") | |
| if not file_data: | |
| raise HTTPException(status_code=400, detail="Missing 'file' field") | |
| use_layout = body.get("useLayoutDetection", False) | |
| prompt_label = body.get("promptLabel", "ocr") | |
| use_chart = body.get("useChartRecognition", False) | |
| use_unwarp = body.get("useDocUnwarping", True) | |
| use_orient = body.get("useDocOrientationClassify", True) | |
| try: | |
| if use_layout: | |
| return full_document_parsing(file_data, use_chart, use_unwarp, use_orient) | |
| else: | |
| return element_level_recognition(file_data, prompt_label) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return {"errorCode": -1, "errorMsg": str(e)} | |
| async def parse_file( | |
| file: UploadFile = File(...), | |
| use_layout_detection: bool = True, | |
| prompt_label: str = "ocr", | |
| authorization: Optional[str] = Header(None) | |
| ): | |
| """ | |
| File upload endpoint. | |
| curl -X POST https://<space>.hf.space/api/parse -F "file=@document.png" | |
| """ | |
| verify_auth(authorization) | |
| content = await file.read() | |
| b64 = base64.b64encode(content).decode("utf-8") | |
| try: | |
| if use_layout_detection: | |
| return full_document_parsing(b64) | |
| else: | |
| return element_level_recognition(b64, prompt_label) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return {"errorCode": -1, "errorMsg": str(e)} | |
| async def parse_to_markdown( | |
| file: UploadFile = File(...), | |
| authorization: Optional[str] = Header(None) | |
| ): | |
| """ | |
| Returns just markdown text. | |
| curl -X POST https://<space>.hf.space/api/parse/markdown -F "file=@document.png" | |
| """ | |
| verify_auth(authorization) | |
| content = await file.read() | |
| b64 = base64.b64encode(content).decode("utf-8") | |
| try: | |
| result = full_document_parsing(b64) | |
| pages = result.get("result", {}).get("layoutParsingResults", []) | |
| markdown_parts = [p.get("markdown", {}).get("text", "") for p in pages if p.get("markdown", {}).get("text")] | |
| return { | |
| "status": "ok", | |
| "markdown": "\n\n---\n\n".join(markdown_parts), | |
| "page_count": len(pages) | |
| } | |
| except Exception as e: | |
| traceback.print_exc() | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def proxy_chat_completions(request: Request, authorization: Optional[str] = Header(None)): | |
| """Proxy to vLLM for direct OpenAI-compatible calls.""" | |
| verify_auth(authorization) | |
| import httpx | |
| body = await request.json() | |
| async with httpx.AsyncClient(timeout=600) as client: | |
| resp = await client.post( | |
| f"{VLLM_SERVER_URL}/chat/completions", | |
| json=body, | |
| headers={"Content-Type": "application/json"} | |
| ) | |
| return resp.json() | |
| # ============================================================================= | |
| # Entry point | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| print(f""" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β PaddleOCR-VL-1.5 Bridge Server (HF Spaces) β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ | |
| β Bridge API: http://0.0.0.0:{BRIDGE_PORT} β | |
| β vLLM backend: {VLLM_SERVER_URL:<44s}β | |
| β Model: {VLLM_MODEL_NAME:<44s}β | |
| β Auth: {"ENABLED" if API_KEY else "DISABLED":<44s}β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ | |
| β Endpoints: β | |
| β GET /health - Health check β | |
| β GET /docs - Swagger UI β | |
| β POST /api/ocr - Gradio-compatible API β | |
| β POST /api/parse - File upload API β | |
| β POST /api/parse/markdown - Simple markdown output β | |
| β POST /v1/chat/completions - vLLM proxy (OpenAI format) β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """) | |
| uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT) | |