Spaces:
Sleeping
Sleeping
File size: 3,369 Bytes
5e52ed2 e7f8326 5e52ed2 8992be6 b4fa10f 8992be6 b65136b e7f8326 55d9c6b e7f8326 8992be6 b4fa10f fc1fbb4 88d3b0d fc1fbb4 e7f8326 8992be6 b4fa10f 5e52ed2 b4fa10f 5e52ed2 8992be6 b4fa10f 5e52ed2 1a6a4fd 8992be6 5e52ed2 1a6a4fd 5e52ed2 b4fa10f 5e52ed2 8992be6 b4fa10f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from fastapi import FastAPI, File, UploadFile,HTTPException
from fastapi.responses import JSONResponse
import shutil
import os
from fastapi.middleware.cors import CORSMiddleware
import uuid
from pdf2image import convert_from_path
from PIL import Image
from model_utils import extract_invoice_data_from_image
from typing import List
import asyncio
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
os.environ["XDG_CACHE_HOME"] = "/tmp"
os.environ["FONTCONFIG_PATH"] = "/tmp"
os.makedirs("/tmp/matplotlib", exist_ok=True)
os.makedirs("/tmp/ultralytics", exist_ok=True)
os.makedirs("/tmp/fontconfig", exist_ok=True)
app = FastAPI()
UPLOAD_DIR = "/tmp/uploads"
os.makedirs(UPLOAD_DIR, exist_ok=True)
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
MAX_FILES_PER_REQUEST = 10
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def resize_to_640(img: Image.Image) -> Image.Image:
w_percent = 640 / float(img.size[0])
h_size = int((float(img.size[1]) * float(w_percent)))
return img.resize((640, h_size), Image.LANCZOS)
async def process_single_file(file: UploadFile) -> dict:
file_ext = os.path.splitext(file.filename)[-1].lower()
if file_ext not in ALLOWED_EXTENSIONS:
raise HTTPException(status_code=400, detail=f"Unsupported format: {file.filename}. Supported: .png, .jpg, .jpeg, .pdf")
unique_filename = f"{uuid.uuid4().hex}{file_ext}"
file_path = os.path.join(UPLOAD_DIR, unique_filename)
image_path = None
try:
# Save uploaded file temporarily
with open(file_path, "wb") as f:
shutil.copyfileobj(file.file, f)
if file_ext == ".pdf":
# Convert PDF's first page to image
images = convert_from_path(file_path, dpi=300)
if not images:
return {"error": f"No pages found in PDF: {file.filename}"}
img = resize_to_640(images[0])
image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
img.save(image_path)
else:
image_path = file_path
# Run inference
extracted_data = extract_invoice_data_from_image(image_path)
return {"filename": file.filename, "data": extracted_data}
except Exception as ex:
return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
finally:
# Clean up temp files
if os.path.exists(file_path):
os.remove(file_path)
if image_path and os.path.exists(image_path) and image_path != file_path:
os.remove(image_path)
@app.post("/extract-invoice")
async def extract_invoice(files: List[UploadFile] = File(..., max_files=MAX_FILES_PER_REQUEST)):
if not files:
raise HTTPException(status_code=400, detail="No files uploaded")
# Process files concurrently
tasks = [process_single_file(file) for file in files]
results = await asyncio.gather(*tasks)
# Aggregate results
success_count = sum(1 for r in results if "error" not in r)
error_count = len(results) - success_count
return JSONResponse(content={
"success": True,
"message": f"Processed {len(files)} invoices. {success_count} succeeded, {error_count} failed.",
"data": results
}) |