bekzhanK1 commited on
Commit
fd9c6ee
·
1 Parent(s): d0e8746

Add concurrent processing support using thread pool executor

Browse files
Files changed (1) hide show
  1. api.py +42 -26
api.py CHANGED
@@ -9,6 +9,8 @@ import tempfile
9
  from pathlib import Path
10
  from typing import Optional
11
  from urllib.parse import urlparse
 
 
12
 
13
  from fastapi import FastAPI, File, UploadFile, HTTPException, Query
14
  from fastapi.responses import JSONResponse
@@ -33,13 +35,18 @@ app.add_middleware(
33
  allow_headers=["*"],
34
  )
35
 
 
 
 
 
36
 
37
  @app.on_event("startup")
38
  async def startup_event():
39
  """Authenticate with Hugging Face and pre-load models if possible."""
40
  # Authenticate with Hugging Face if token is available
41
  # HF Spaces automatically provides HF_TOKEN, but we also check HUGGINGFACE_TOKEN
42
- hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
 
43
  if hf_token:
44
  try:
45
  from huggingface_hub import login
@@ -50,7 +57,7 @@ async def startup_event():
50
  else:
51
  print("⚠ Warning: No HF_TOKEN found. Gated models may not work.")
52
  print(" Set HF_TOKEN in Space Settings → Secrets for gated model access.")
53
-
54
  # Check if stamp model exists
55
  stamp_model_path = Path("stamp_detector/stamp_model.pt")
56
  if stamp_model_path.exists():
@@ -116,15 +123,18 @@ async def process_pdf(
116
  temp_pdf.write(content)
117
  temp_pdf_path = temp_pdf.name
118
 
119
- # Process the PDF
120
  try:
121
- result = process_pdf_pipeline(
122
- pdf_path=temp_pdf_path,
123
- output_dir=tempfile.gettempdir(), # Use temp directory
124
- stamp_model_path="stamp_detector/stamp_model.pt",
125
- stamp_conf=stamp_conf,
126
- dpi=dpi,
127
- save_intermediate=False
 
 
 
128
  )
129
 
130
  # Return the result as JSON
@@ -197,15 +207,18 @@ async def process_pdf_advanced(
197
  temp_pdf.write(content)
198
  temp_pdf_path = temp_pdf.name
199
 
200
- # Process the PDF
201
  try:
202
- result = process_pdf_pipeline(
203
- pdf_path=temp_pdf_path,
204
- output_dir=tempfile.gettempdir(), # Use temp directory
205
- stamp_model_path=stamp_model_path,
206
- stamp_conf=stamp_conf,
207
- dpi=dpi,
208
- save_intermediate=False
 
 
 
209
  )
210
 
211
  # Return the result as JSON
@@ -349,15 +362,18 @@ async def process_pdf_from_url(
349
  detail=f"Error fetching PDF from URL: {str(e)}"
350
  )
351
 
352
- # Process the PDF
353
  try:
354
- result = process_pdf_pipeline(
355
- pdf_path=temp_pdf_path,
356
- output_dir=tempfile.gettempdir(),
357
- stamp_model_path=stamp_model_path,
358
- stamp_conf=stamp_conf,
359
- dpi=dpi,
360
- save_intermediate=False
 
 
 
361
  )
362
 
363
  # Return the result as JSON
 
9
  from pathlib import Path
10
  from typing import Optional
11
  from urllib.parse import urlparse
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ import asyncio
14
 
15
  from fastapi import FastAPI, File, UploadFile, HTTPException, Query
16
  from fastapi.responses import JSONResponse
 
35
  allow_headers=["*"],
36
  )
37
 
38
+ # Thread pool executor for running blocking CPU/GPU operations concurrently
39
+ # This allows multiple PDFs to be processed in parallel
40
+ executor = ThreadPoolExecutor(max_workers=4) # Adjust based on your GPU/CPU capacity
41
+
42
 
43
  @app.on_event("startup")
44
  async def startup_event():
45
  """Authenticate with Hugging Face and pre-load models if possible."""
46
  # Authenticate with Hugging Face if token is available
47
  # HF Spaces automatically provides HF_TOKEN, but we also check HUGGINGFACE_TOKEN
48
+ hf_token = os.environ.get(
49
+ "HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
50
  if hf_token:
51
  try:
52
  from huggingface_hub import login
 
57
  else:
58
  print("⚠ Warning: No HF_TOKEN found. Gated models may not work.")
59
  print(" Set HF_TOKEN in Space Settings → Secrets for gated model access.")
60
+
61
  # Check if stamp model exists
62
  stamp_model_path = Path("stamp_detector/stamp_model.pt")
63
  if stamp_model_path.exists():
 
123
  temp_pdf.write(content)
124
  temp_pdf_path = temp_pdf.name
125
 
126
+ # Process the PDF in a thread pool to allow concurrent requests
127
  try:
128
+ loop = asyncio.get_event_loop()
129
+ result = await loop.run_in_executor(
130
+ executor,
131
+ process_pdf_pipeline,
132
+ temp_pdf_path,
133
+ tempfile.gettempdir(), # Use temp directory
134
+ "stamp_detector/stamp_model.pt",
135
+ stamp_conf,
136
+ dpi,
137
+ False # save_intermediate
138
  )
139
 
140
  # Return the result as JSON
 
207
  temp_pdf.write(content)
208
  temp_pdf_path = temp_pdf.name
209
 
210
+ # Process the PDF in a thread pool to allow concurrent requests
211
  try:
212
+ loop = asyncio.get_event_loop()
213
+ result = await loop.run_in_executor(
214
+ executor,
215
+ process_pdf_pipeline,
216
+ temp_pdf_path,
217
+ tempfile.gettempdir(), # Use temp directory
218
+ stamp_model_path,
219
+ stamp_conf,
220
+ dpi,
221
+ False # save_intermediate
222
  )
223
 
224
  # Return the result as JSON
 
362
  detail=f"Error fetching PDF from URL: {str(e)}"
363
  )
364
 
365
+ # Process the PDF in a thread pool to allow concurrent requests
366
  try:
367
+ loop = asyncio.get_event_loop()
368
+ result = await loop.run_in_executor(
369
+ executor,
370
+ process_pdf_pipeline,
371
+ temp_pdf_path,
372
+ tempfile.gettempdir(),
373
+ stamp_model_path,
374
+ stamp_conf,
375
+ dpi,
376
+ False # save_intermediate
377
  )
378
 
379
  # Return the result as JSON