Namra-Satva commited on
Commit
a43cb8d
·
verified ·
1 Parent(s): 5dfe83f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -18
app.py CHANGED
@@ -9,7 +9,6 @@ from PIL import Image
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
12
- from concurrent.futures import ThreadPoolExecutor
13
 
14
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
15
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
@@ -26,7 +25,6 @@ UPLOAD_DIR = "/tmp/uploads"
26
  os.makedirs(UPLOAD_DIR, exist_ok=True)
27
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
28
  MAX_FILES_PER_REQUEST = 10
29
- MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4)
30
 
31
  app.add_middleware(
32
  CORSMiddleware,
@@ -57,22 +55,20 @@ async def process_single_file(file: UploadFile) -> dict:
57
  with open(file_path, "wb") as f:
58
  shutil.copyfileobj(file.file, f)
59
 
60
- loop = asyncio.get_event_loop()
61
- with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
62
- if file_ext == ".pdf":
63
- # Convert PDF to image in a thread
64
- images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
65
- if not images:
66
- return {"error": f"No pages found in PDF: {file.filename}"}
67
- img = resize_to_640(images[0])
68
- image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
69
- await loop.run_in_executor(executor, img.save, image_path)
70
- else:
71
- image_path = file_path
72
-
73
- # Run inference in a thread
74
- extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
75
- return {"filename": file.filename, "data": extracted_data}
76
 
77
  except Exception as ex:
78
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
 
9
  from model_utils import extract_invoice_data_from_image
10
  from typing import List
11
  import asyncio
 
12
 
13
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
14
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
 
25
  os.makedirs(UPLOAD_DIR, exist_ok=True)
26
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
27
  MAX_FILES_PER_REQUEST = 10
 
28
 
29
  app.add_middleware(
30
  CORSMiddleware,
 
55
  with open(file_path, "wb") as f:
56
  shutil.copyfileobj(file.file, f)
57
 
58
+ if file_ext == ".pdf":
59
+ # Convert PDF's first page to image
60
+ images = convert_from_path(file_path, dpi=300)
61
+ if not images:
62
+ return {"error": f"No pages found in PDF: {file.filename}"}
63
+ img = resize_to_640(images[0])
64
+ image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
65
+ img.save(image_path)
66
+ else:
67
+ image_path = file_path
68
+
69
+ # Run inference
70
+ extracted_data = extract_invoice_data_from_image(image_path)
71
+ return {"filename": file.filename, "data": extracted_data}
 
 
72
 
73
  except Exception as ex:
74
  return {"error": f"Processing failed for {file.filename}: {str(ex)}"}