Spaces:

devappsmi
/

document_parse

Sleeping

App Files Files Community

devappsmi commited on Feb 17

Commit

84ef6a4

verified ·

1 Parent(s): 3b79541

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -22

app.py CHANGED Viewed

@@ -14,24 +14,21 @@ Architecture:
 HF Space Settings → Variables and secrets:
     VLLM_SERVER_URL = http://117.54.141.62:8000/v1
     API_KEY = (optional, for auth)
-Your GPU Server:
-    docker run --rm --gpus all -p 8000:8000 -v ~/.cache/paddleocr:/root/.cache ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleocr-genai-vllm-server:latest-nvidia-gpu paddleocr genai_server --model_name PaddleOCR-VL-1.5-0.9B --host 0.0.0.0 --port 8000 --backend vllm
-Gradio App HF Space env:
-    API_URL = https://<your-bridge-space>.hf.space/api/ocr
 """
 import base64
 import json
 import os
 import tempfile
 import traceback
 from typing import Any, Dict, Optional
 import uvicorn
 from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from openai import OpenAI
 # =============================================================================
@@ -39,11 +36,21 @@ from openai import OpenAI
 # =============================================================================
 VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
 VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
-BRIDGE_PORT = int(os.environ.get("PORT", "7860"))  # HF Spaces default port
 API_KEY = os.environ.get("API_KEY", "")
 # =============================================================================
-# Initialize OpenAI client (for element-level recognition)
 # =============================================================================
 openai_client = OpenAI(
     api_key="EMPTY",
@@ -52,7 +59,7 @@ openai_client = OpenAI(
 )
 # =============================================================================
-# PaddleOCR pipeline (for full document parsing with layout detection)
 # =============================================================================
 pipeline = None
@@ -86,6 +93,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # =============================================================================
 # Auth
@@ -108,6 +118,8 @@ TASK_PROMPTS = {
     "seal": "Seal Recognition:",
 }
 def save_temp_image(file_data: str) -> str:
     """Save base64 or URL image to temp file."""
@@ -134,6 +146,32 @@ def save_temp_image(file_data: str) -> str:
     return tmp.name
 def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
     """Element-level recognition via direct vLLM call."""
     if file_data.startswith(("http://", "https://")):
@@ -176,6 +214,7 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
                           use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
     """Full document parsing with layout detection + VLM recognition."""
     tmp_path = save_temp_image(file_data)
     try:
         pipe = get_pipeline()
@@ -184,24 +223,45 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
         results = []
         for i, res in enumerate(output):
             output_dir = tempfile.mkdtemp()
             res.save_to_json(save_path=output_dir)
             res.save_to_markdown(save_path=output_dir)
             md_text = ""
             md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
             if md_files:
                 with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
                     md_text = f.read()
             json_data = {}
             json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
             if json_files:
                 with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
                     json_data = json.load(f)
             results.append({
-                "markdown": {"text": md_text, "images": {}},
-                "outputImages": {},
                 "jsonData": json_data
             })
@@ -293,11 +353,7 @@ async def parse_file(
     prompt_label: str = "ocr",
     authorization: Optional[str] = Header(None)
 ):
-    """
-    File upload endpoint.
-    curl -X POST https://<space>.hf.space/api/parse -F "file=@document.png"
-    """
     verify_auth(authorization)
     content = await file.read()
     b64 = base64.b64encode(content).decode("utf-8")
@@ -317,11 +373,7 @@ async def parse_to_markdown(
     file: UploadFile = File(...),
     authorization: Optional[str] = Header(None)
 ):
-    """
-    Returns just markdown text.
-    curl -X POST https://<space>.hf.space/api/parse/markdown -F "file=@document.png"
-    """
     verify_auth(authorization)
     content = await file.read()
     b64 = base64.b64encode(content).decode("utf-8")
@@ -369,6 +421,7 @@ if __name__ == "__main__":
 ║  vLLM backend: {VLLM_SERVER_URL:<44s}║
 ║  Model:        {VLLM_MODEL_NAME:<44s}║
 ║  Auth:         {"ENABLED" if API_KEY else "DISABLED":<44s}║
 ╠══════════════════════════════════════════════════════════════╣
 ║  Endpoints:                                                  ║
 ║    GET  /health              - Health check                  ║
@@ -377,6 +430,7 @@ if __name__ == "__main__":
 ║    POST /api/parse           - File upload API               ║
 ║    POST /api/parse/markdown  - Simple markdown output        ║
 ║    POST /v1/chat/completions - vLLM proxy (OpenAI format)    ║
 ╚══════════════════════════════════════════════════════════════╝
     """)
-    uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)

 HF Space Settings → Variables and secrets:
     VLLM_SERVER_URL = http://117.54.141.62:8000/v1
     API_KEY = (optional, for auth)
 """
 import base64
 import json
 import os
+import shutil
 import tempfile
 import traceback
+import uuid
 from typing import Any, Dict, Optional
 import uvicorn
 from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
 from openai import OpenAI
 # =============================================================================
 # =============================================================================
 VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
 VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
+BRIDGE_PORT = int(os.environ.get("PORT", "7860"))
 API_KEY = os.environ.get("API_KEY", "")
+# Public base URL for serving static files (auto-detect from HF Space)
+SPACE_HOST = os.environ.get("SPACE_HOST", "")
+if SPACE_HOST:
+    PUBLIC_BASE_URL = f"https://{SPACE_HOST}"
+else:
+    PUBLIC_BASE_URL = os.environ.get("PUBLIC_BASE_URL", f"http://localhost:{BRIDGE_PORT}")
+# Directory to store and serve output images
+STATIC_DIR = "/tmp/ocr_outputs"
+os.makedirs(STATIC_DIR, exist_ok=True)
 # =============================================================================
+# Initialize OpenAI client
 # =============================================================================
 openai_client = OpenAI(
     api_key="EMPTY",
 )
 # =============================================================================
+# PaddleOCR pipeline
 # =============================================================================
 pipeline = None
     allow_headers=["*"],
 )
+# Serve static files (output images)
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 # =============================================================================
 # Auth
     "seal": "Seal Recognition:",
 }
+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
 def save_temp_image(file_data: str) -> str:
     """Save base64 or URL image to temp file."""
     return tmp.name
+def collect_output_images(output_dir: str, request_id: str) -> Dict[str, str]:
+    """
+    Find all image files in the output directory,
+    copy them to the static dir, and return a dict of {name: public_url}.
+    """
+    output_images = {}
+    if not os.path.exists(output_dir):
+        return output_images
+    # Create a subdirectory for this request
+    static_subdir = os.path.join(STATIC_DIR, request_id)
+    os.makedirs(static_subdir, exist_ok=True)
+    for root, dirs, files in os.walk(output_dir):
+        for filename in files:
+            ext = os.path.splitext(filename)[1].lower()
+            if ext in IMAGE_EXTENSIONS:
+                src_path = os.path.join(root, filename)
+                dst_path = os.path.join(static_subdir, filename)
+                shutil.copy2(src_path, dst_path)
+                public_url = f"{PUBLIC_BASE_URL}/static/{request_id}/{filename}"
+                output_images[filename] = public_url
+    return output_images
 def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
     """Element-level recognition via direct vLLM call."""
     if file_data.startswith(("http://", "https://")):
                           use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
     """Full document parsing with layout detection + VLM recognition."""
     tmp_path = save_temp_image(file_data)
+    request_id = str(uuid.uuid4())[:12]
     try:
         pipe = get_pipeline()
         results = []
         for i, res in enumerate(output):
             output_dir = tempfile.mkdtemp()
+            # Save all outputs (json, markdown, images)
             res.save_to_json(save_path=output_dir)
             res.save_to_markdown(save_path=output_dir)
+            # Try to save visualization image
+            try:
+                res.save_to_img(save_path=output_dir)
+            except Exception:
+                pass
+            # Read markdown
             md_text = ""
             md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
             if md_files:
                 with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
                     md_text = f.read()
+            # Read JSON
             json_data = {}
             json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
             if json_files:
                 with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
                     json_data = json.load(f)
+            # Collect and serve output images
+            page_request_id = f"{request_id}_page{i}"
+            output_images = collect_output_images(output_dir, page_request_id)
+            # Also check for images referenced in markdown
+            md_images = {}
+            for fname, url in output_images.items():
+                # Replace local paths in markdown with public URLs
+                md_text = md_text.replace(fname, url)
+                md_images[fname] = url
             results.append({
+                "markdown": {"text": md_text, "images": md_images},
+                "outputImages": output_images,
                 "jsonData": json_data
             })
     prompt_label: str = "ocr",
     authorization: Optional[str] = Header(None)
 ):
+    """File upload endpoint."""
     verify_auth(authorization)
     content = await file.read()
     b64 = base64.b64encode(content).decode("utf-8")
     file: UploadFile = File(...),
     authorization: Optional[str] = Header(None)
 ):
+    """Returns just markdown text."""
     verify_auth(authorization)
     content = await file.read()
     b64 = base64.b64encode(content).decode("utf-8")
 ║  vLLM backend: {VLLM_SERVER_URL:<44s}║
 ║  Model:        {VLLM_MODEL_NAME:<44s}║
 ║  Auth:         {"ENABLED" if API_KEY else "DISABLED":<44s}║
+║  Static URL:   {PUBLIC_BASE_URL:<44s}║
 ╠══════════════════════════════════════════════════════════════╣
 ║  Endpoints:                                                  ║
 ║    GET  /health              - Health check                  ║
 ║    POST /api/parse           - File upload API               ║
 ║    POST /api/parse/markdown  - Simple markdown output        ║
 ║    POST /v1/chat/completions - vLLM proxy (OpenAI format)    ║
+║    GET  /static/...          - Output images                 ║
 ╚══════════════════════════════════════════════════════════════╝
     """)
+    uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)