devappsmi commited on
Commit
84ef6a4
Β·
verified Β·
1 Parent(s): 3b79541

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -22
app.py CHANGED
@@ -14,24 +14,21 @@ Architecture:
14
  HF Space Settings β†’ Variables and secrets:
15
  VLLM_SERVER_URL = http://117.54.141.62:8000/v1
16
  API_KEY = (optional, for auth)
17
-
18
- Your GPU Server:
19
- docker run --rm --gpus all -p 8000:8000 -v ~/.cache/paddleocr:/root/.cache ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleocr-genai-vllm-server:latest-nvidia-gpu paddleocr genai_server --model_name PaddleOCR-VL-1.5-0.9B --host 0.0.0.0 --port 8000 --backend vllm
20
-
21
- Gradio App HF Space env:
22
- API_URL = https://<your-bridge-space>.hf.space/api/ocr
23
  """
24
 
25
  import base64
26
  import json
27
  import os
 
28
  import tempfile
29
  import traceback
 
30
  from typing import Any, Dict, Optional
31
 
32
  import uvicorn
33
  from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
34
  from fastapi.middleware.cors import CORSMiddleware
 
35
  from openai import OpenAI
36
 
37
  # =============================================================================
@@ -39,11 +36,21 @@ from openai import OpenAI
39
  # =============================================================================
40
  VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
41
  VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
42
- BRIDGE_PORT = int(os.environ.get("PORT", "7860")) # HF Spaces default port
43
  API_KEY = os.environ.get("API_KEY", "")
 
 
 
 
 
 
 
 
 
 
44
 
45
  # =============================================================================
46
- # Initialize OpenAI client (for element-level recognition)
47
  # =============================================================================
48
  openai_client = OpenAI(
49
  api_key="EMPTY",
@@ -52,7 +59,7 @@ openai_client = OpenAI(
52
  )
53
 
54
  # =============================================================================
55
- # PaddleOCR pipeline (for full document parsing with layout detection)
56
  # =============================================================================
57
  pipeline = None
58
 
@@ -86,6 +93,9 @@ app.add_middleware(
86
  allow_headers=["*"],
87
  )
88
 
 
 
 
89
 
90
  # =============================================================================
91
  # Auth
@@ -108,6 +118,8 @@ TASK_PROMPTS = {
108
  "seal": "Seal Recognition:",
109
  }
110
 
 
 
111
 
112
  def save_temp_image(file_data: str) -> str:
113
  """Save base64 or URL image to temp file."""
@@ -134,6 +146,32 @@ def save_temp_image(file_data: str) -> str:
134
  return tmp.name
135
 
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
138
  """Element-level recognition via direct vLLM call."""
139
  if file_data.startswith(("http://", "https://")):
@@ -176,6 +214,7 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
176
  use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
177
  """Full document parsing with layout detection + VLM recognition."""
178
  tmp_path = save_temp_image(file_data)
 
179
 
180
  try:
181
  pipe = get_pipeline()
@@ -184,24 +223,45 @@ def full_document_parsing(file_data: str, use_chart_recognition: bool = False,
184
  results = []
185
  for i, res in enumerate(output):
186
  output_dir = tempfile.mkdtemp()
 
 
187
  res.save_to_json(save_path=output_dir)
188
  res.save_to_markdown(save_path=output_dir)
189
 
 
 
 
 
 
 
 
190
  md_text = ""
191
  md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
192
  if md_files:
193
  with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
194
  md_text = f.read()
195
 
 
196
  json_data = {}
197
  json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
198
  if json_files:
199
  with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
200
  json_data = json.load(f)
201
 
 
 
 
 
 
 
 
 
 
 
 
202
  results.append({
203
- "markdown": {"text": md_text, "images": {}},
204
- "outputImages": {},
205
  "jsonData": json_data
206
  })
207
 
@@ -293,11 +353,7 @@ async def parse_file(
293
  prompt_label: str = "ocr",
294
  authorization: Optional[str] = Header(None)
295
  ):
296
- """
297
- File upload endpoint.
298
-
299
- curl -X POST https://<space>.hf.space/api/parse -F "file=@document.png"
300
- """
301
  verify_auth(authorization)
302
  content = await file.read()
303
  b64 = base64.b64encode(content).decode("utf-8")
@@ -317,11 +373,7 @@ async def parse_to_markdown(
317
  file: UploadFile = File(...),
318
  authorization: Optional[str] = Header(None)
319
  ):
320
- """
321
- Returns just markdown text.
322
-
323
- curl -X POST https://<space>.hf.space/api/parse/markdown -F "file=@document.png"
324
- """
325
  verify_auth(authorization)
326
  content = await file.read()
327
  b64 = base64.b64encode(content).decode("utf-8")
@@ -369,6 +421,7 @@ if __name__ == "__main__":
369
  β•‘ vLLM backend: {VLLM_SERVER_URL:<44s}β•‘
370
  β•‘ Model: {VLLM_MODEL_NAME:<44s}β•‘
371
  β•‘ Auth: {"ENABLED" if API_KEY else "DISABLED":<44s}β•‘
 
372
  ╠══════════════════════════════════════════════════════════════╣
373
  β•‘ Endpoints: β•‘
374
  β•‘ GET /health - Health check β•‘
@@ -377,6 +430,7 @@ if __name__ == "__main__":
377
  β•‘ POST /api/parse - File upload API β•‘
378
  β•‘ POST /api/parse/markdown - Simple markdown output β•‘
379
  β•‘ POST /v1/chat/completions - vLLM proxy (OpenAI format) β•‘
 
380
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
381
  """)
382
- uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)
 
14
  HF Space Settings β†’ Variables and secrets:
15
  VLLM_SERVER_URL = http://117.54.141.62:8000/v1
16
  API_KEY = (optional, for auth)
 
 
 
 
 
 
17
  """
18
 
19
  import base64
20
  import json
21
  import os
22
+ import shutil
23
  import tempfile
24
  import traceback
25
+ import uuid
26
  from typing import Any, Dict, Optional
27
 
28
  import uvicorn
29
  from fastapi import FastAPI, File, Header, HTTPException, Request, UploadFile
30
  from fastapi.middleware.cors import CORSMiddleware
31
+ from fastapi.staticfiles import StaticFiles
32
  from openai import OpenAI
33
 
34
  # =============================================================================
 
36
  # =============================================================================
37
  VLLM_SERVER_URL = os.environ.get("VLLM_SERVER_URL", "http://117.54.141.62:8000/v1")
38
  VLLM_MODEL_NAME = os.environ.get("VLLM_MODEL_NAME", "PaddleOCR-VL-1.5-0.9B")
39
+ BRIDGE_PORT = int(os.environ.get("PORT", "7860"))
40
  API_KEY = os.environ.get("API_KEY", "")
41
+ # Public base URL for serving static files (auto-detect from HF Space)
42
+ SPACE_HOST = os.environ.get("SPACE_HOST", "")
43
+ if SPACE_HOST:
44
+ PUBLIC_BASE_URL = f"https://{SPACE_HOST}"
45
+ else:
46
+ PUBLIC_BASE_URL = os.environ.get("PUBLIC_BASE_URL", f"http://localhost:{BRIDGE_PORT}")
47
+
48
+ # Directory to store and serve output images
49
+ STATIC_DIR = "/tmp/ocr_outputs"
50
+ os.makedirs(STATIC_DIR, exist_ok=True)
51
 
52
  # =============================================================================
53
+ # Initialize OpenAI client
54
  # =============================================================================
55
  openai_client = OpenAI(
56
  api_key="EMPTY",
 
59
  )
60
 
61
  # =============================================================================
62
+ # PaddleOCR pipeline
63
  # =============================================================================
64
  pipeline = None
65
 
 
93
  allow_headers=["*"],
94
  )
95
 
96
+ # Serve static files (output images)
97
+ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
98
+
99
 
100
  # =============================================================================
101
  # Auth
 
118
  "seal": "Seal Recognition:",
119
  }
120
 
121
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
122
+
123
 
124
  def save_temp_image(file_data: str) -> str:
125
  """Save base64 or URL image to temp file."""
 
146
  return tmp.name
147
 
148
 
149
+ def collect_output_images(output_dir: str, request_id: str) -> Dict[str, str]:
150
+ """
151
+ Find all image files in the output directory,
152
+ copy them to the static dir, and return a dict of {name: public_url}.
153
+ """
154
+ output_images = {}
155
+ if not os.path.exists(output_dir):
156
+ return output_images
157
+
158
+ # Create a subdirectory for this request
159
+ static_subdir = os.path.join(STATIC_DIR, request_id)
160
+ os.makedirs(static_subdir, exist_ok=True)
161
+
162
+ for root, dirs, files in os.walk(output_dir):
163
+ for filename in files:
164
+ ext = os.path.splitext(filename)[1].lower()
165
+ if ext in IMAGE_EXTENSIONS:
166
+ src_path = os.path.join(root, filename)
167
+ dst_path = os.path.join(static_subdir, filename)
168
+ shutil.copy2(src_path, dst_path)
169
+ public_url = f"{PUBLIC_BASE_URL}/static/{request_id}/{filename}"
170
+ output_images[filename] = public_url
171
+
172
+ return output_images
173
+
174
+
175
  def element_level_recognition(file_data: str, prompt_label: str) -> Dict[str, Any]:
176
  """Element-level recognition via direct vLLM call."""
177
  if file_data.startswith(("http://", "https://")):
 
214
  use_doc_orientation_classify: bool = True) -> Dict[str, Any]:
215
  """Full document parsing with layout detection + VLM recognition."""
216
  tmp_path = save_temp_image(file_data)
217
+ request_id = str(uuid.uuid4())[:12]
218
 
219
  try:
220
  pipe = get_pipeline()
 
223
  results = []
224
  for i, res in enumerate(output):
225
  output_dir = tempfile.mkdtemp()
226
+
227
+ # Save all outputs (json, markdown, images)
228
  res.save_to_json(save_path=output_dir)
229
  res.save_to_markdown(save_path=output_dir)
230
 
231
+ # Try to save visualization image
232
+ try:
233
+ res.save_to_img(save_path=output_dir)
234
+ except Exception:
235
+ pass
236
+
237
+ # Read markdown
238
  md_text = ""
239
  md_files = [f for f in os.listdir(output_dir) if f.endswith(".md")]
240
  if md_files:
241
  with open(os.path.join(output_dir, md_files[0]), "r", encoding="utf-8") as f:
242
  md_text = f.read()
243
 
244
+ # Read JSON
245
  json_data = {}
246
  json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]
247
  if json_files:
248
  with open(os.path.join(output_dir, json_files[0]), "r", encoding="utf-8") as f:
249
  json_data = json.load(f)
250
 
251
+ # Collect and serve output images
252
+ page_request_id = f"{request_id}_page{i}"
253
+ output_images = collect_output_images(output_dir, page_request_id)
254
+
255
+ # Also check for images referenced in markdown
256
+ md_images = {}
257
+ for fname, url in output_images.items():
258
+ # Replace local paths in markdown with public URLs
259
+ md_text = md_text.replace(fname, url)
260
+ md_images[fname] = url
261
+
262
  results.append({
263
+ "markdown": {"text": md_text, "images": md_images},
264
+ "outputImages": output_images,
265
  "jsonData": json_data
266
  })
267
 
 
353
  prompt_label: str = "ocr",
354
  authorization: Optional[str] = Header(None)
355
  ):
356
+ """File upload endpoint."""
 
 
 
 
357
  verify_auth(authorization)
358
  content = await file.read()
359
  b64 = base64.b64encode(content).decode("utf-8")
 
373
  file: UploadFile = File(...),
374
  authorization: Optional[str] = Header(None)
375
  ):
376
+ """Returns just markdown text."""
 
 
 
 
377
  verify_auth(authorization)
378
  content = await file.read()
379
  b64 = base64.b64encode(content).decode("utf-8")
 
421
  β•‘ vLLM backend: {VLLM_SERVER_URL:<44s}β•‘
422
  β•‘ Model: {VLLM_MODEL_NAME:<44s}β•‘
423
  β•‘ Auth: {"ENABLED" if API_KEY else "DISABLED":<44s}β•‘
424
+ β•‘ Static URL: {PUBLIC_BASE_URL:<44s}β•‘
425
  ╠══════════════════════════════════════════════════════════════╣
426
  β•‘ Endpoints: β•‘
427
  β•‘ GET /health - Health check β•‘
 
430
  β•‘ POST /api/parse - File upload API β•‘
431
  β•‘ POST /api/parse/markdown - Simple markdown output β•‘
432
  β•‘ POST /v1/chat/completions - vLLM proxy (OpenAI format) β•‘
433
+ β•‘ GET /static/... - Output images β•‘
434
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
435
  """)
436
+ uvicorn.run(app, host="0.0.0.0", port=BRIDGE_PORT)