Spaces:

satvaSolutions
/

invoice-ocr-api

Sleeping

Namra-Satva commited on Apr 17, 2025

Commit

ec2fbb0

verified ·

1 Parent(s): c05b853

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -51,24 +51,24 @@ async def process_single_file(file: UploadFile) -> dict:
     image_path = None
     try:
-        # Save uploaded file temporarily
         with open(file_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
-        if file_ext == ".pdf":
-            # Convert PDF's first page to image
-            images = convert_from_path(file_path, dpi=300)
-            if not images:
-                return {"error": f"No pages found in PDF: {file.filename}"}
-            img = resize_to_640(images[0])
-            image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
-            img.save(image_path)
-        else:
-            image_path = file_path
-        # Run inference
-        extracted_data = extract_invoice_data_from_image(image_path)
-        return {"filename": file.filename, "data": extracted_data}
     except Exception as ex:
         return {"error": f"Processing failed for {file.filename}: {str(ex)}"}

     image_path = None
     try:
         with open(file_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
+        loop = asyncio.get_event_loop()
+        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+            if file_ext == ".pdf":
+                # Convert PDF to image in a thread
+                images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
+                if not images:
+                    return {"error": f"No pages found in PDF: {file.filename}"}
+                img = resize_to_640(images[0])
+                image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
+                await loop.run_in_executor(executor, img.save, image_path)
+            else:
+                image_path = file_path
+            # Run inference in a thread
+            extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
+            return {"filename": file.filename, "data": extracted_data}
     except Exception as ex:
         return {"error": f"Processing failed for {file.filename}: {str(ex)}"}