Spaces:

satvaSolutions
/

invoice-ocr-api

Sleeping

App Files Files Community

Namra-Satva commited on Apr 17, 2025

Commit

c05b853

verified ·

1 Parent(s): 085d554

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -18

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ from PIL import Image
 from model_utils import extract_invoice_data_from_image
 from typing import List
 import asyncio
-from concurrent.futures import ThreadPoolExecutor
 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
 os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
@@ -26,7 +25,6 @@ UPLOAD_DIR = "/tmp/uploads"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
 MAX_FILES_PER_REQUEST = 10
-MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4)  # Dynamic thread limit
 app.add_middleware(
     CORSMiddleware,
@@ -56,22 +54,21 @@ async def process_single_file(file: UploadFile) -> dict:
         # Save uploaded file temporarily
         with open(file_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
-        loop = asyncio.get_event_loop()
-        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-            if file_ext == ".pdf":
-                # Convert PDF to image in a thread
-                images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
-                if not images:
-                    return {"error": f"No pages found in PDF: {file.filename}"}
-                img = resize_to_640(images[0])
-                image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
-                await loop.run_in_executor(executor, img.save, image_path)
-            else:
-                image_path = file_path
-            # Run inference in a thread
-            extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
-            return {"filename": file.filename, "data": extracted_data}
     except Exception as ex:
         return {"error": f"Processing failed for {file.filename}: {str(ex)}"}

 from model_utils import extract_invoice_data_from_image
 from typing import List
 import asyncio
 os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
 os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
 MAX_FILES_PER_REQUEST = 10
 app.add_middleware(
     CORSMiddleware,
         # Save uploaded file temporarily
         with open(file_path, "wb") as f:
             shutil.copyfileobj(file.file, f)
+        if file_ext == ".pdf":
+            # Convert PDF's first page to image
+            images = convert_from_path(file_path, dpi=300)
+            if not images:
+                return {"error": f"No pages found in PDF: {file.filename}"}
+            img = resize_to_640(images[0])
+            image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
+            img.save(image_path)
+        else:
+            image_path = file_path
+        # Run inference
+        extracted_data = extract_invoice_data_from_image(image_path)
+        return {"filename": file.filename, "data": extracted_data}
     except Exception as ex:
         return {"error": f"Processing failed for {file.filename}: {str(ex)}"}