File size: 3,369 Bytes
5e52ed2
e7f8326
 
 
5e52ed2
 
8992be6
 
 
b4fa10f
 
8992be6
b65136b
 
 
 
 
 
 
 
 
e7f8326
 
55d9c6b
e7f8326
8992be6
b4fa10f
 
fc1fbb4
 
88d3b0d
fc1fbb4
 
 
 
e7f8326
8992be6
 
 
 
 
 
b4fa10f
5e52ed2
 
 
b4fa10f
5e52ed2
 
8992be6
b4fa10f
5e52ed2
 
1a6a4fd
8992be6
5e52ed2
 
1a6a4fd
 
 
 
 
 
 
 
 
 
 
 
 
 
5e52ed2
 
b4fa10f
5e52ed2
 
8992be6
 
 
b4fa10f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from fastapi import FastAPI, File, UploadFile,HTTPException
from fastapi.responses import JSONResponse
import shutil
import os
from fastapi.middleware.cors import CORSMiddleware
import uuid
from pdf2image import convert_from_path
from PIL import Image
from model_utils import extract_invoice_data_from_image
from typing import List
import asyncio

os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
os.environ["XDG_CACHE_HOME"] = "/tmp"
os.environ["FONTCONFIG_PATH"] = "/tmp"

os.makedirs("/tmp/matplotlib", exist_ok=True)
os.makedirs("/tmp/ultralytics", exist_ok=True)
os.makedirs("/tmp/fontconfig", exist_ok=True)

app = FastAPI()

UPLOAD_DIR = "/tmp/uploads"
os.makedirs(UPLOAD_DIR, exist_ok=True)
ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
MAX_FILES_PER_REQUEST = 10

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

def resize_to_640(img: Image.Image) -> Image.Image:
    w_percent = 640 / float(img.size[0])
    h_size = int((float(img.size[1]) * float(w_percent)))
    return img.resize((640, h_size), Image.LANCZOS)


async def process_single_file(file: UploadFile) -> dict:
    file_ext = os.path.splitext(file.filename)[-1].lower()

    if file_ext not in ALLOWED_EXTENSIONS:
        raise HTTPException(status_code=400, detail=f"Unsupported format: {file.filename}. Supported: .png, .jpg, .jpeg, .pdf")

    unique_filename = f"{uuid.uuid4().hex}{file_ext}"
    file_path = os.path.join(UPLOAD_DIR, unique_filename)
    image_path = None

    try:
        # Save uploaded file temporarily
        with open(file_path, "wb") as f:
            shutil.copyfileobj(file.file, f)

        if file_ext == ".pdf":
            # Convert PDF's first page to image
            images = convert_from_path(file_path, dpi=300)
            if not images:
                return {"error": f"No pages found in PDF: {file.filename}"}
            img = resize_to_640(images[0])
            image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
            img.save(image_path)
        else:
            image_path = file_path

        # Run inference
        extracted_data = extract_invoice_data_from_image(image_path)
        return {"filename": file.filename, "data": extracted_data}

    except Exception as ex:
        return {"error": f"Processing failed for {file.filename}: {str(ex)}"}

    finally:
        # Clean up temp files
        if os.path.exists(file_path):
            os.remove(file_path)
        if image_path and os.path.exists(image_path) and image_path != file_path:
            os.remove(image_path)

@app.post("/extract-invoice")
async def extract_invoice(files: List[UploadFile] = File(..., max_files=MAX_FILES_PER_REQUEST)):
    if not files:
        raise HTTPException(status_code=400, detail="No files uploaded")

    # Process files concurrently
    tasks = [process_single_file(file) for file in files]
    results = await asyncio.gather(*tasks)

    # Aggregate results
    success_count = sum(1 for r in results if "error" not in r)
    error_count = len(results) - success_count

    return JSONResponse(content={
        "success": True,
        "message": f"Processed {len(files)} invoices. {success_count} succeeded, {error_count} failed.",
        "data": results
    })