Spaces:
Paused
Paused
Add concurrent processing support using thread pool executor
Browse files
api.py
CHANGED
|
@@ -9,6 +9,8 @@ import tempfile
|
|
| 9 |
from pathlib import Path
|
| 10 |
from typing import Optional
|
| 11 |
from urllib.parse import urlparse
|
|
|
|
|
|
|
| 12 |
|
| 13 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Query
|
| 14 |
from fastapi.responses import JSONResponse
|
|
@@ -33,13 +35,18 @@ app.add_middleware(
|
|
| 33 |
allow_headers=["*"],
|
| 34 |
)
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
@app.on_event("startup")
|
| 38 |
async def startup_event():
|
| 39 |
"""Authenticate with Hugging Face and pre-load models if possible."""
|
| 40 |
# Authenticate with Hugging Face if token is available
|
| 41 |
# HF Spaces automatically provides HF_TOKEN, but we also check HUGGINGFACE_TOKEN
|
| 42 |
-
hf_token = os.environ.get(
|
|
|
|
| 43 |
if hf_token:
|
| 44 |
try:
|
| 45 |
from huggingface_hub import login
|
|
@@ -50,7 +57,7 @@ async def startup_event():
|
|
| 50 |
else:
|
| 51 |
print("⚠ Warning: No HF_TOKEN found. Gated models may not work.")
|
| 52 |
print(" Set HF_TOKEN in Space Settings → Secrets for gated model access.")
|
| 53 |
-
|
| 54 |
# Check if stamp model exists
|
| 55 |
stamp_model_path = Path("stamp_detector/stamp_model.pt")
|
| 56 |
if stamp_model_path.exists():
|
|
@@ -116,15 +123,18 @@ async def process_pdf(
|
|
| 116 |
temp_pdf.write(content)
|
| 117 |
temp_pdf_path = temp_pdf.name
|
| 118 |
|
| 119 |
-
# Process the PDF
|
| 120 |
try:
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
| 128 |
)
|
| 129 |
|
| 130 |
# Return the result as JSON
|
|
@@ -197,15 +207,18 @@ async def process_pdf_advanced(
|
|
| 197 |
temp_pdf.write(content)
|
| 198 |
temp_pdf_path = temp_pdf.name
|
| 199 |
|
| 200 |
-
# Process the PDF
|
| 201 |
try:
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
| 209 |
)
|
| 210 |
|
| 211 |
# Return the result as JSON
|
|
@@ -349,15 +362,18 @@ async def process_pdf_from_url(
|
|
| 349 |
detail=f"Error fetching PDF from URL: {str(e)}"
|
| 350 |
)
|
| 351 |
|
| 352 |
-
# Process the PDF
|
| 353 |
try:
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
| 361 |
)
|
| 362 |
|
| 363 |
# Return the result as JSON
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
from typing import Optional
|
| 11 |
from urllib.parse import urlparse
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
+
import asyncio
|
| 14 |
|
| 15 |
from fastapi import FastAPI, File, UploadFile, HTTPException, Query
|
| 16 |
from fastapi.responses import JSONResponse
|
|
|
|
| 35 |
allow_headers=["*"],
|
| 36 |
)
|
| 37 |
|
| 38 |
+
# Thread pool executor for running blocking CPU/GPU operations concurrently
|
| 39 |
+
# This allows multiple PDFs to be processed in parallel
|
| 40 |
+
executor = ThreadPoolExecutor(max_workers=4) # Adjust based on your GPU/CPU capacity
|
| 41 |
+
|
| 42 |
|
| 43 |
@app.on_event("startup")
|
| 44 |
async def startup_event():
|
| 45 |
"""Authenticate with Hugging Face and pre-load models if possible."""
|
| 46 |
# Authenticate with Hugging Face if token is available
|
| 47 |
# HF Spaces automatically provides HF_TOKEN, but we also check HUGGINGFACE_TOKEN
|
| 48 |
+
hf_token = os.environ.get(
|
| 49 |
+
"HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
|
| 50 |
if hf_token:
|
| 51 |
try:
|
| 52 |
from huggingface_hub import login
|
|
|
|
| 57 |
else:
|
| 58 |
print("⚠ Warning: No HF_TOKEN found. Gated models may not work.")
|
| 59 |
print(" Set HF_TOKEN in Space Settings → Secrets for gated model access.")
|
| 60 |
+
|
| 61 |
# Check if stamp model exists
|
| 62 |
stamp_model_path = Path("stamp_detector/stamp_model.pt")
|
| 63 |
if stamp_model_path.exists():
|
|
|
|
| 123 |
temp_pdf.write(content)
|
| 124 |
temp_pdf_path = temp_pdf.name
|
| 125 |
|
| 126 |
+
# Process the PDF in a thread pool to allow concurrent requests
|
| 127 |
try:
|
| 128 |
+
loop = asyncio.get_event_loop()
|
| 129 |
+
result = await loop.run_in_executor(
|
| 130 |
+
executor,
|
| 131 |
+
process_pdf_pipeline,
|
| 132 |
+
temp_pdf_path,
|
| 133 |
+
tempfile.gettempdir(), # Use temp directory
|
| 134 |
+
"stamp_detector/stamp_model.pt",
|
| 135 |
+
stamp_conf,
|
| 136 |
+
dpi,
|
| 137 |
+
False # save_intermediate
|
| 138 |
)
|
| 139 |
|
| 140 |
# Return the result as JSON
|
|
|
|
| 207 |
temp_pdf.write(content)
|
| 208 |
temp_pdf_path = temp_pdf.name
|
| 209 |
|
| 210 |
+
# Process the PDF in a thread pool to allow concurrent requests
|
| 211 |
try:
|
| 212 |
+
loop = asyncio.get_event_loop()
|
| 213 |
+
result = await loop.run_in_executor(
|
| 214 |
+
executor,
|
| 215 |
+
process_pdf_pipeline,
|
| 216 |
+
temp_pdf_path,
|
| 217 |
+
tempfile.gettempdir(), # Use temp directory
|
| 218 |
+
stamp_model_path,
|
| 219 |
+
stamp_conf,
|
| 220 |
+
dpi,
|
| 221 |
+
False # save_intermediate
|
| 222 |
)
|
| 223 |
|
| 224 |
# Return the result as JSON
|
|
|
|
| 362 |
detail=f"Error fetching PDF from URL: {str(e)}"
|
| 363 |
)
|
| 364 |
|
| 365 |
+
# Process the PDF in a thread pool to allow concurrent requests
|
| 366 |
try:
|
| 367 |
+
loop = asyncio.get_event_loop()
|
| 368 |
+
result = await loop.run_in_executor(
|
| 369 |
+
executor,
|
| 370 |
+
process_pdf_pipeline,
|
| 371 |
+
temp_pdf_path,
|
| 372 |
+
tempfile.gettempdir(),
|
| 373 |
+
stamp_model_path,
|
| 374 |
+
stamp_conf,
|
| 375 |
+
dpi,
|
| 376 |
+
False # save_intermediate
|
| 377 |
)
|
| 378 |
|
| 379 |
# Return the result as JSON
|