Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,24 +14,21 @@ Architecture:
|
|
| 14 |
HF Space Settings β Variables and secrets:
|
| 15 |
VLLM_SERVER_URL = http://117.54.141.62:8000/v1
|
| 16 |
API_KEY = (optional, for auth)
|
| 17 |
-
|
| 18 |
-
Your GPU Server:
|
| 19 |
-
docker run --rm --gpus all -p 8000:8000 -v ~/.cache/paddleocr:/root/.cache ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleocr-genai-vllm-server:latest-nvidia-gpu paddleocr genai_server --model_name PaddleOCR-VL-1.5-0.9B --host 0.0.0.0 --port 8000 --backend vllm
|
| 20 |
-
|
| 21 |
-
Gradio App HF Space env:
|
| 22 |
-
API_URL = https://<your-bridge-space>.hf.space/api/ocr
|
| 23 |
"""
|
| 24 |
|
| 25 |
import base64
|
| 26 |
import json
|
| 27 |
import os
|
|
|
|
| 28 |
import tempfile
|
| 29 |
import traceback
|
|
|
|
| 30 |
from typing import Any, Dict, Optional
|
| 31 |
|
| 32 |
import uvicorn
|
| 33 |
from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
|
| 34 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 35 |
from openai import OpenAI
|
| 36 |
|
| 37 |
# =============================================================================
|
|
@@ -39,11 +36,21 @@ from openai import OpenAI
|
|
| 39 |
# =============================================================================
|
| 40 |
VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
|
| 41 |
VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
|
| 42 |
-
BRIDGE_PORT = int(os.environ.get("PORT", "7860"))
|
| 43 |
API_KEY = os.environ.get("API_KEY", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# =============================================================================
|
| 46 |
-
# Initialize OpenAI client
|
| 47 |
# =============================================================================
|
| 48 |
openai_client = OpenAI(
|
| 49 |
api_key="EMPTY",
|
|
@@ -52,7 +59,7 @@ openai_client = OpenAI(
|
|
| 52 |
)
|
| 53 |
|
| 54 |
# =============================================================================
|
| 55 |
-
# PaddleOCR pipeline
|
| 56 |
# =============================================================================
|
| 57 |
pipeline = None
|
| 58 |
|
|
@@ -86,6 +93,9 @@ app.add_middleware(
|
|
| 86 |
allow_headers=["*"],
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# =============================================================================
|
| 91 |
# Auth
|
|
@@ -108,6 +118,8 @@ TASK_PROMPTS = {
|
|
| 108 |
"seal": "Seal Recognition:",
|
| 109 |
}
|
| 110 |
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def save_temp_image(file_data: str) -> str:
|
| 113 |
"""Save base64 or URL image to temp file."""
|
|
@@ -134,6 +146,32 @@ def save_temp_image(file_data: str) -> str:
|
|
| 134 |
return tmp.name
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
|
| 138 |
"""Element-level recognition via direct vLLM call."""
|
| 139 |
if file_data.startswith(("http://", "https://")):
|
|
@@ -176,6 +214,7 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
|
|
| 176 |
use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
|
| 177 |
"""Full document parsing with layout detection + VLM recognition."""
|
| 178 |
tmp_path = save_temp_image(file_data)
|
|
|
|
| 179 |
|
| 180 |
try:
|
| 181 |
pipe = get_pipeline()
|
|
@@ -184,24 +223,45 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
|
|
| 184 |
results = []
|
| 185 |
for i, res in enumerate(output):
|
| 186 |
output_dir = tempfile.mkdtemp()
|
|
|
|
|
|
|
| 187 |
res.save_to_json(save_path=output_dir)
|
| 188 |
res.save_to_markdown(save_path=output_dir)
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
md_text = ""
|
| 191 |
md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
|
| 192 |
if md_files:
|
| 193 |
with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
|
| 194 |
md_text = f.read()
|
| 195 |
|
|
|
|
| 196 |
json_data = {}
|
| 197 |
json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
|
| 198 |
if json_files:
|
| 199 |
with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
|
| 200 |
json_data = json.load(f)
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
results.append({
|
| 203 |
-
"markdown": {"text": md_text, "images":
|
| 204 |
-
"outputImages":
|
| 205 |
"jsonData": json_data
|
| 206 |
})
|
| 207 |
|
|
@@ -293,11 +353,7 @@ async def parse_file(
|
|
| 293 |
prompt_label: str = "ocr",
|
| 294 |
authorization: Optional[str] = Header(None)
|
| 295 |
):
|
| 296 |
-
"""
|
| 297 |
-
File upload endpoint.
|
| 298 |
-
|
| 299 |
-
curl -X POST https://<space>.hf.space/api/parse -F "file=@document.png"
|
| 300 |
-
"""
|
| 301 |
verify_auth(authorization)
|
| 302 |
content = await file.read()
|
| 303 |
b64 = base64.b64encode(content).decode("utf-8")
|
|
@@ -317,11 +373,7 @@ async def parse_to_markdown(
|
|
| 317 |
file: UploadFile = File(...),
|
| 318 |
authorization: Optional[str] = Header(None)
|
| 319 |
):
|
| 320 |
-
"""
|
| 321 |
-
Returns just markdown text.
|
| 322 |
-
|
| 323 |
-
curl -X POST https://<space>.hf.space/api/parse/markdown -F "file=@document.png"
|
| 324 |
-
"""
|
| 325 |
verify_auth(authorization)
|
| 326 |
content = await file.read()
|
| 327 |
b64 = base64.b64encode(content).decode("utf-8")
|
|
@@ -369,6 +421,7 @@ if __name__ == "__main__":
|
|
| 369 |
β vLLM backend: {VLLM_SERVER_URL:<44s}β
|
| 370 |
β Model: {VLLM_MODEL_NAME:<44s}β
|
| 371 |
β Auth: {"ENABLED" if API_KEY else "DISABLED":<44s}β
|
|
|
|
| 372 |
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£
|
| 373 |
β Endpoints: β
|
| 374 |
β GET /health - Health check β
|
|
@@ -377,6 +430,7 @@ if __name__ == "__main__":
|
|
| 377 |
β POST /api/parse - File upload API β
|
| 378 |
β POST /api/parse/markdown - Simple markdown output β
|
| 379 |
β POST /v1/chat/completions - vLLM proxy (OpenAI format) β
|
|
|
|
| 380 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 381 |
""")
|
| 382 |
-
uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)
|
|
|
|
| 14 |
HF Space Settings β Variables and secrets:
|
| 15 |
VLLM_SERVER_URL = http://117.54.141.62:8000/v1
|
| 16 |
API_KEY = (optional, for auth)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
import base64
|
| 20 |
import json
|
| 21 |
import os
|
| 22 |
+
import shutil
|
| 23 |
import tempfile
|
| 24 |
import traceback
|
| 25 |
+
import uuid
|
| 26 |
from typing import Any, Dict, Optional
|
| 27 |
|
| 28 |
import uvicorn
|
| 29 |
from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
|
| 30 |
from fastapi.middleware.cors import CORSMiddleware
|
| 31 |
+
from fastapi.staticfiles import StaticFiles
|
| 32 |
from openai import OpenAI
|
| 33 |
|
| 34 |
# =============================================================================
|
|
|
|
| 36 |
# =============================================================================
|
| 37 |
VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
|
| 38 |
VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
|
| 39 |
+
BRIDGE_PORT = int(os.environ.get("PORT", "7860"))
|
| 40 |
API_KEY = os.environ.get("API_KEY", "")
|
| 41 |
+
# Public base URL for serving static files (auto-detect from HF Space)
|
| 42 |
+
SPACE_HOST = os.environ.get("SPACE_HOST", "")
|
| 43 |
+
if SPACE_HOST:
|
| 44 |
+
PUBLIC_BASE_URL = f"https://{SPACE_HOST}"
|
| 45 |
+
else:
|
| 46 |
+
PUBLIC_BASE_URL = os.environ.get("PUBLIC_BASE_URL", f"http://localhost:{BRIDGE_PORT}")
|
| 47 |
+
|
| 48 |
+
# Directory to store and serve output images
|
| 49 |
+
STATIC_DIR = "/tmp/ocr_outputs"
|
| 50 |
+
os.makedirs(STATIC_DIR, exist_ok=True)
|
| 51 |
|
| 52 |
# =============================================================================
|
| 53 |
+
# Initialize OpenAI client
|
| 54 |
# =============================================================================
|
| 55 |
openai_client = OpenAI(
|
| 56 |
api_key="EMPTY",
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
# =============================================================================
|
| 62 |
+
# PaddleOCR pipeline
|
| 63 |
# =============================================================================
|
| 64 |
pipeline = None
|
| 65 |
|
|
|
|
| 93 |
allow_headers=["*"],
|
| 94 |
)
|
| 95 |
|
| 96 |
+
# Serve static files (output images)
|
| 97 |
+
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 98 |
+
|
| 99 |
|
| 100 |
# =============================================================================
|
| 101 |
# Auth
|
|
|
|
| 118 |
"seal": "Seal Recognition:",
|
| 119 |
}
|
| 120 |
|
| 121 |
+
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
|
| 122 |
+
|
| 123 |
|
| 124 |
def save_temp_image(file_data: str) -> str:
|
| 125 |
"""Save base64 or URL image to temp file."""
|
|
|
|
| 146 |
return tmp.name
|
| 147 |
|
| 148 |
|
| 149 |
+
def collect_output_images(output_dir: str, request_id: str) -> Dict[str, str]:
|
| 150 |
+
"""
|
| 151 |
+
Find all image files in the output directory,
|
| 152 |
+
copy them to the static dir, and return a dict of {name: public_url}.
|
| 153 |
+
"""
|
| 154 |
+
output_images = {}
|
| 155 |
+
if not os.path.exists(output_dir):
|
| 156 |
+
return output_images
|
| 157 |
+
|
| 158 |
+
# Create a subdirectory for this request
|
| 159 |
+
static_subdir = os.path.join(STATIC_DIR, request_id)
|
| 160 |
+
os.makedirs(static_subdir, exist_ok=True)
|
| 161 |
+
|
| 162 |
+
for root, dirs, files in os.walk(output_dir):
|
| 163 |
+
for filename in files:
|
| 164 |
+
ext = os.path.splitext(filename)[1].lower()
|
| 165 |
+
if ext in IMAGE_EXTENSIONS:
|
| 166 |
+
src_path = os.path.join(root, filename)
|
| 167 |
+
dst_path = os.path.join(static_subdir, filename)
|
| 168 |
+
shutil.copy2(src_path, dst_path)
|
| 169 |
+
public_url = f"{PUBLIC_BASE_URL}/static/{request_id}/{filename}"
|
| 170 |
+
output_images[filename] = public_url
|
| 171 |
+
|
| 172 |
+
return output_images
|
| 173 |
+
|
| 174 |
+
|
| 175 |
def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
|
| 176 |
"""Element-level recognition via direct vLLM call."""
|
| 177 |
if file_data.startswith(("http://", "https://")):
|
|
|
|
| 214 |
use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
|
| 215 |
"""Full document parsing with layout detection + VLM recognition."""
|
| 216 |
tmp_path = save_temp_image(file_data)
|
| 217 |
+
request_id = str(uuid.uuid4())[:12]
|
| 218 |
|
| 219 |
try:
|
| 220 |
pipe = get_pipeline()
|
|
|
|
| 223 |
results = []
|
| 224 |
for i, res in enumerate(output):
|
| 225 |
output_dir = tempfile.mkdtemp()
|
| 226 |
+
|
| 227 |
+
# Save all outputs (json, markdown, images)
|
| 228 |
res.save_to_json(save_path=output_dir)
|
| 229 |
res.save_to_markdown(save_path=output_dir)
|
| 230 |
|
| 231 |
+
# Try to save visualization image
|
| 232 |
+
try:
|
| 233 |
+
res.save_to_img(save_path=output_dir)
|
| 234 |
+
except Exception:
|
| 235 |
+
pass
|
| 236 |
+
|
| 237 |
+
# Read markdown
|
| 238 |
md_text = ""
|
| 239 |
md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
|
| 240 |
if md_files:
|
| 241 |
with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
|
| 242 |
md_text = f.read()
|
| 243 |
|
| 244 |
+
# Read JSON
|
| 245 |
json_data = {}
|
| 246 |
json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
|
| 247 |
if json_files:
|
| 248 |
with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
|
| 249 |
json_data = json.load(f)
|
| 250 |
|
| 251 |
+
# Collect and serve output images
|
| 252 |
+
page_request_id = f"{request_id}_page{i}"
|
| 253 |
+
output_images = collect_output_images(output_dir, page_request_id)
|
| 254 |
+
|
| 255 |
+
# Also check for images referenced in markdown
|
| 256 |
+
md_images = {}
|
| 257 |
+
for fname, url in output_images.items():
|
| 258 |
+
# Replace local paths in markdown with public URLs
|
| 259 |
+
md_text = md_text.replace(fname, url)
|
| 260 |
+
md_images[fname] = url
|
| 261 |
+
|
| 262 |
results.append({
|
| 263 |
+
"markdown": {"text": md_text, "images": md_images},
|
| 264 |
+
"outputImages": output_images,
|
| 265 |
"jsonData": json_data
|
| 266 |
})
|
| 267 |
|
|
|
|
| 353 |
prompt_label: str = "ocr",
|
| 354 |
authorization: Optional[str] = Header(None)
|
| 355 |
):
|
| 356 |
+
"""File upload endpoint."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
verify_auth(authorization)
|
| 358 |
content = await file.read()
|
| 359 |
b64 = base64.b64encode(content).decode("utf-8")
|
|
|
|
| 373 |
file: UploadFile = File(...),
|
| 374 |
authorization: Optional[str] = Header(None)
|
| 375 |
):
|
| 376 |
+
"""Returns just markdown text."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
verify_auth(authorization)
|
| 378 |
content = await file.read()
|
| 379 |
b64 = base64.b64encode(content).decode("utf-8")
|
|
|
|
| 421 |
β vLLM backend: {VLLM_SERVER_URL:<44s}β
|
| 422 |
β Model: {VLLM_MODEL_NAME:<44s}β
|
| 423 |
β Auth: {"ENABLED" if API_KEY else "DISABLED":<44s}β
|
| 424 |
+
β Static URL: {PUBLIC_BASE_URL:<44s}β
|
| 425 |
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£
|
| 426 |
β Endpoints: β
|
| 427 |
β GET /health - Health check β
|
|
|
|
| 430 |
β POST /api/parse - File upload API β
|
| 431 |
β POST /api/parse/markdown - Simple markdown output β
|
| 432 |
β POST /v1/chat/completions - vLLM proxy (OpenAI format) β
|
| 433 |
+
β GET /static/... - Output images β
|
| 434 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 435 |
""")
|
| 436 |
+
uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)
|