Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ from PIL import Image
|
|
| 9 |
from model_utils import extract_invoice_data_from_image
|
| 10 |
from typing import List
|
| 11 |
import asyncio
|
| 12 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 13 |
|
| 14 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
| 15 |
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
|
|
@@ -26,7 +25,6 @@ UPLOAD_DIR = "/tmp/uploads"
|
|
| 26 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 27 |
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
|
| 28 |
MAX_FILES_PER_REQUEST = 10
|
| 29 |
-
MAX_WORKERS = min(MAX_FILES_PER_REQUEST, os.cpu_count() or 4) # Dynamic thread limit
|
| 30 |
|
| 31 |
app.add_middleware(
|
| 32 |
CORSMiddleware,
|
|
@@ -56,22 +54,21 @@ async def process_single_file(file: UploadFile) -> dict:
|
|
| 56 |
# Save uploaded file temporarily
|
| 57 |
with open(file_path, "wb") as f:
|
| 58 |
shutil.copyfileobj(file.file, f)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
return {"filename": file.filename, "data": extracted_data}
|
| 75 |
|
| 76 |
except Exception as ex:
|
| 77 |
return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
|
|
|
|
| 9 |
from model_utils import extract_invoice_data_from_image
|
| 10 |
from typing import List
|
| 11 |
import asyncio
|
|
|
|
| 12 |
|
| 13 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
| 14 |
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
|
|
|
|
| 25 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 26 |
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
|
| 27 |
MAX_FILES_PER_REQUEST = 10
|
|
|
|
| 28 |
|
| 29 |
app.add_middleware(
|
| 30 |
CORSMiddleware,
|
|
|
|
| 54 |
# Save uploaded file temporarily
|
| 55 |
with open(file_path, "wb") as f:
|
| 56 |
shutil.copyfileobj(file.file, f)
|
| 57 |
+
|
| 58 |
+
if file_ext == ".pdf":
|
| 59 |
+
# Convert PDF's first page to image
|
| 60 |
+
images = convert_from_path(file_path, dpi=300)
|
| 61 |
+
if not images:
|
| 62 |
+
return {"error": f"No pages found in PDF: {file.filename}"}
|
| 63 |
+
img = resize_to_640(images[0])
|
| 64 |
+
image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
|
| 65 |
+
img.save(image_path)
|
| 66 |
+
else:
|
| 67 |
+
image_path = file_path
|
| 68 |
+
|
| 69 |
+
# Run inference
|
| 70 |
+
extracted_data = extract_invoice_data_from_image(image_path)
|
| 71 |
+
return {"filename": file.filename, "data": extracted_data}
|
|
|
|
| 72 |
|
| 73 |
except Exception as ex:
|
| 74 |
return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
|