Namra-Satva commited on
Commit
b4fa10f
·
verified ·
1 Parent(s): b8d1763

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -16
app.py CHANGED
@@ -7,6 +7,8 @@ import uuid
7
  from pdf2image import convert_from_path
8
  from PIL import Image
9
  from model_utils import extract_invoice_data_from_image
 
 
10
 
11
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
12
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
@@ -22,6 +24,8 @@ app = FastAPI()
22
  UPLOAD_DIR = "/tmp/uploads"
23
  os.makedirs(UPLOAD_DIR, exist_ok=True)
24
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
 
 
25
  app.add_middleware(
26
  CORSMiddleware,
27
  allow_origins=["*"],
@@ -36,24 +40,26 @@ def resize_to_640(img: Image.Image) -> Image.Image:
36
  return img.resize((640, h_size), Image.LANCZOS)
37
 
38
 
39
- @app.post("/extract-invoice")
40
- async def extract_invoice(file: UploadFile = File(...)):
41
  file_ext = os.path.splitext(file.filename)[-1].lower()
42
 
43
  if file_ext not in ALLOWED_EXTENSIONS:
44
- raise HTTPException(status_code=400, detail="Supported formats: .png, .jpg, .jpeg, .pdf")
45
 
46
  unique_filename = f"{uuid.uuid4().hex}{file_ext}"
47
  file_path = os.path.join(UPLOAD_DIR, unique_filename)
 
48
 
49
  try:
50
- # Save uploaded file
51
  with open(file_path, "wb") as f:
52
  shutil.copyfileobj(file.file, f)
53
 
54
  if file_ext == ".pdf":
55
  # Convert PDF's first page to image
56
  images = convert_from_path(file_path, dpi=300)
 
 
57
  img = resize_to_640(images[0])
58
  image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
59
  img.save(image_path)
@@ -62,22 +68,33 @@ async def extract_invoice(file: UploadFile = File(...)):
62
 
63
  # Run inference
64
  extracted_data = extract_invoice_data_from_image(image_path)
65
-
66
- return JSONResponse(content={
67
- "success": True,
68
- "message": "Invoice data extracted successfully.",
69
- "data": extracted_data
70
- })
71
 
72
  except Exception as ex:
73
- return JSONResponse(
74
- status_code=500,
75
- content={"success": False, "error": f"Internal Server Error: {str(ex)}"}
76
- )
77
 
78
  finally:
79
  # Clean up temp files
80
  if os.path.exists(file_path):
81
  os.remove(file_path)
82
- if file_ext == ".pdf" and os.path.exists(image_path):
83
- os.remove(image_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from pdf2image import convert_from_path
8
  from PIL import Image
9
  from model_utils import extract_invoice_data_from_image
10
+ from typing import List
11
+ import asyncio
12
 
13
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
14
  os.environ["YOLO_CONFIG_DIR"] = "/tmp/ultralytics"
 
24
  UPLOAD_DIR = "/tmp/uploads"
25
  os.makedirs(UPLOAD_DIR, exist_ok=True)
26
  ALLOWED_EXTENSIONS = {".png", ".jpg", ".jpeg",".pdf"}
27
+ MAX_FILES_PER_REQUEST = 10
28
+
29
  app.add_middleware(
30
  CORSMiddleware,
31
  allow_origins=["*"],
 
40
  return img.resize((640, h_size), Image.LANCZOS)
41
 
42
 
43
+ async def process_single_file(file: UploadFile) -> dict:
 
44
  file_ext = os.path.splitext(file.filename)[-1].lower()
45
 
46
  if file_ext not in ALLOWED_EXTENSIONS:
47
+ raise HTTPException(status_code=400, detail=f"Unsupported format: {file.filename}. Supported: .png, .jpg, .jpeg, .pdf")
48
 
49
  unique_filename = f"{uuid.uuid4().hex}{file_ext}"
50
  file_path = os.path.join(UPLOAD_DIR, unique_filename)
51
+ image_path = None
52
 
53
  try:
54
+ # Save uploaded file temporarily
55
  with open(file_path, "wb") as f:
56
  shutil.copyfileobj(file.file, f)
57
 
58
  if file_ext == ".pdf":
59
  # Convert PDF's first page to image
60
  images = convert_from_path(file_path, dpi=300)
61
+ if not images:
62
+ return {"error": f"No pages found in PDF: {file.filename}"}
63
  img = resize_to_640(images[0])
64
  image_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}.png")
65
  img.save(image_path)
 
68
 
69
  # Run inference
70
  extracted_data = extract_invoice_data_from_image(image_path)
71
+ return {"filename": file.filename, "data": extracted_data}
 
 
 
 
 
72
 
73
  except Exception as ex:
74
+ return {"error": f"Processing failed for {file.filename}: {str(ex)}"}
 
 
 
75
 
76
  finally:
77
  # Clean up temp files
78
  if os.path.exists(file_path):
79
  os.remove(file_path)
80
+ if image_path and os.path.exists(image_path) and image_path != file_path:
81
+ os.remove(image_path)
82
+
83
+ @app.post("/extract-invoice")
84
+ async def extract_invoice(files: List[UploadFile] = File(..., max_files=MAX_FILES_PER_REQUEST)):
85
+ if not files:
86
+ raise HTTPException(status_code=400, detail="No files uploaded")
87
+
88
+ # Process files concurrently
89
+ tasks = [process_single_file(file) for file in files]
90
+ results = await asyncio.gather(*tasks)
91
+
92
+ # Aggregate results
93
+ success_count = sum(1 for r in results if "error" not in r)
94
+ error_count = len(results) - success_count
95
+
96
+ return JSONResponse(content={
97
+ "success": True,
98
+ "message": f"Processed {len(files)} invoices. {success_count} succeeded, {error_count} failed.",
99
+ "data": results
100
+ })