Namra-Satva commited on
Commit
085d554
·
verified ·
1 Parent(s): a43cb8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -9,6 +9,7 @@ from PIL import Image
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
 
12
 
13
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
14
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
@@ -25,6 +26,7 @@ UPLOAD_DIR = "/tmp/uploads"
25
  os.makedirs(UPLOAD_DIR, exist_ok=True)
26
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
27
  MAX_FILES_PER_REQUEST = 10
 
28
 
29
  app.add_middleware(
30
  CORSMiddleware,
@@ -54,21 +56,22 @@ async def process_single_file(file: UploadFile) -> dict:
54
  # Save uploaded file temporarily
55
  with open(file_path, "wb") as f:
56
  shutil.copyfileobj(file.file, f)
57
-
58
- if file_ext == ".pdf":
59
- # Convert PDF's first page to image
60
- images = convert_from_path(file_path, dpi=300)
61
- if not images:
62
- return {"error": f"No pages found in PDF: {file.filename}"}
63
- img = resize_to_640(images[0])
64
- image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
65
- img.save(image_path)
66
- else:
67
- image_path = file_path
68
-
69
- # Run inference
70
- extracted_data = extract_invoice_data_from_image(image_path)
71
- return {"filename": file.filename, "data": extracted_data}
 
72
 
73
  except Exception as ex:
74
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
 
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
12
+ from concurrent.futures import ThreadPoolExecutor
13
 
14
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
15
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
 
26
  os.makedirs(UPLOAD_DIR, exist_ok=True)
27
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
28
  MAX_FILES_PER_REQUEST = 10
29
+ MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4) # Dynamic thread limit
30
 
31
  app.add_middleware(
32
  CORSMiddleware,
 
56
  # Save uploaded file temporarily
57
  with open(file_path, "wb") as f:
58
  shutil.copyfileobj(file.file, f)
59
+ loop = asyncio.get_event_loop()
60
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
61
+ if file_ext == ".pdf":
62
+ # Convert PDF to image in a thread
63
+ images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
64
+ if not images:
65
+ return {"error": f"No pages found in PDF: {file.filename}"}
66
+ img = resize_to_640(images[0])
67
+ image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
68
+ await loop.run_in_executor(executor, img.save, image_path)
69
+ else:
70
+ image_path = file_path
71
+
72
+ # Run inference in a thread
73
+ extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
74
+ return {"filename": file.filename, "data": extracted_data}
75
 
76
  except Exception as ex:
77
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}