Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from PIL import Image
|
|
| 9 |
from model_utils import extract_invoice_data_from_image
|
| 10 |
from typing import List
|
| 11 |
import asyncio
|
|
|
|
| 12 |
|
| 13 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
| 14 |
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
|
|
@@ -25,6 +26,7 @@ UPLOAD_DIR = "/tmp/uploads"
|
|
| 25 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 26 |
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
|
| 27 |
MAX_FILES_PER_REQUEST = 10
|
|
|
|
| 28 |
|
| 29 |
app.add_middleware(
|
| 30 |
CORSMiddleware,
|
|
@@ -54,21 +56,22 @@ async def process_single_file(file: UploadFile) -> dict:
|
|
| 54 |
# Save uploaded file temporarily
|
| 55 |
with open(file_path, "wb") as f:
|
| 56 |
shutil.copyfileobj(file.file, f)
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
except Exception as ex:
|
| 74 |
return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
|
|
|
|
| 9 |
from model_utils import extract_invoice_data_from_image
|
| 10 |
from typing import List
|
| 11 |
import asyncio
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
|
| 14 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
| 15 |
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
|
|
|
|
| 26 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 27 |
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
|
| 28 |
MAX_FILES_PER_REQUEST = 10
|
| 29 |
+
MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4) # Dynamic thread limit
|
| 30 |
|
| 31 |
app.add_middleware(
|
| 32 |
CORSMiddleware,
|
|
|
|
| 56 |
# Save uploaded file temporarily
|
| 57 |
with open(file_path, "wb") as f:
|
| 58 |
shutil.copyfileobj(file.file, f)
|
| 59 |
+
loop = asyncio.get_event_loop()
|
| 60 |
+
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
| 61 |
+
if file_ext == ".pdf":
|
| 62 |
+
# Convert PDF to image in a thread
|
| 63 |
+
images = await loop.run_in_executor(executor, convert_from_path, file_path, 300)
|
| 64 |
+
if not images:
|
| 65 |
+
return {"error": f"No pages found in PDF: {file.filename}"}
|
| 66 |
+
img = resize_to_640(images[0])
|
| 67 |
+
image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
|
| 68 |
+
await loop.run_in_executor(executor, img.save, image_path)
|
| 69 |
+
else:
|
| 70 |
+
image_path = file_path
|
| 71 |
+
|
| 72 |
+
# Run inference in a thread
|
| 73 |
+
extracted_data = await loop.run_in_executor(executor, extract_invoice_data_from_image, image_path)
|
| 74 |
+
return {"filename": file.filename, "data": extracted_data}
|
| 75 |
|
| 76 |
except Exception as ex:
|
| 77 |
return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
|