Namra-Satva commited on
Commit
c05b853
·
verified ·
1 Parent(s): 085d554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -18
app.py CHANGED
@@ -9,7 +9,6 @@ from PIL import Image
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
12
- from concurrent.futures import ThreadPoolExecutor
13
 
14
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
15
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
@@ -26,7 +25,6 @@ UPLOAD_DIR = "/tmp/uploads"
26
  os.makedirs(UPLOAD_DIR, exist_ok=True)
27
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
28
  MAX_FILES_PER_REQUEST = 10
29
- MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4) # Dynamic thread limit
30
 
31
  app.add_middleware(
32
  CORSMiddleware,
@@ -56,22 +54,21 @@ async def process_single_file(file: UploadFile) -> dict:
56
  # Save uploaded file temporarily
57
  with open(file_path, "wb") as f:
58
  shutil.copyfileobj(file.file, f)
59
- loop = asyncio.get_event_loop()
60
- with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
61
- if file_ext == ".pdf":
62
- # Convert PDF to image in a thread
63
- images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
64
- if not images:
65
- return {"error": f"No pages found in PDF: {file.filename}"}
66
- img = resize_to_640(images[0])
67
- image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
68
- await loop.run_in_executor(executor, img.save, image_path)
69
- else:
70
- image_path = file_path
71
-
72
- # Run inference in a thread
73
- extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
74
- return {"filename": file.filename, "data": extracted_data}
75
 
76
  except Exception as ex:
77
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
 
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
 
12
 
13
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
14
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
 
25
  os.makedirs(UPLOAD_DIR, exist_ok=True)
26
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
27
  MAX_FILES_PER_REQUEST = 10
 
28
 
29
  app.add_middleware(
30
  CORSMiddleware,
 
54
  # Save uploaded file temporarily
55
  with open(file_path, "wb") as f:
56
  shutil.copyfileobj(file.file, f)
57
+
58
+ if file_ext == ".pdf":
59
+ # Convert PDF's first page to image
60
+ images = convert_from_path(file_path, dpi=300)
61
+ if not images:
62
+ return {"error": f"No pages found in PDF: {file.filename}"}
63
+ img = resize_to_640(images[0])
64
+ image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
65
+ img.save(image_path)
66
+ else:
67
+ image_path = file_path
68
+
69
+ # Run inference
70
+ extracted_data = extract_invoice_data_from_image(image_path)
71
+ return {"filename": file.filename, "data": extracted_data}
 
72
 
73
  except Exception as ex:
74
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}