Demo_Build / main.py
blessedpug's picture
Pre-Docker backup
a4a400b
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import JSONResponse, FileResponse
from typing import List, Optional
from PIL import Image
import tempfile
import os
import shutil
import json # Added json import
# Corrected and consolidated imports from pipeline
from pipeline import (
extract_info,
# extract_info_batch, # This function in pipeline.py takes file paths, FastAPI will call extract_info individually
extract_reimbursement_form_info,
extract_medical_info,
extract_medical_info_batch
)
# Assuming models.py contains necessary Pydantic models, though not directly used in this file for request validation beyond FastAPI's
# from models import ReceiptData, ChildFeeForm
app = FastAPI()
# Ensure output directory exists
os.makedirs("outputs", exist_ok=True)
@app.get("/")
async def read_root():
return {"message": "Welcome to the Document Processing API"}
@app.post("/extract_receipt_info_batch/")
async def extract_receipt_batch_endpoint(files: List[UploadFile] = File(...)):
results = []
if not files:
raise HTTPException(status_code=400, detail="No files uploaded.")
for file_upload in files: # Renamed to avoid conflict
try:
if not file_upload.content_type.startswith("image/"):
results.append({"filename": file_upload.filename, "error": "File is not an image."})
continue
pil_image = Image.open(file_upload.file)
result_json_str = extract_info(pil_image)
if result_json_str.startswith("```json"):
actual_json_content = result_json_str[7:-4].strip()
results.append({"filename": file_upload.filename, "data": json.loads(actual_json_content)})
else:
results.append({"filename": file_upload.filename, "data": json.loads(result_json_str)})
except Exception as e:
results.append({"filename": file_upload.filename, "error": str(e)})
finally:
file_upload.file.close() # Ensure file is closed
return JSONResponse(content=results)
@app.post("/extract_reimbursement_form_batch/")
async def extract_reimbursement_form_batch_endpoint(
files: List[UploadFile] = File(...),
emp_name: str = Form(...),
emp_code: str = Form(...),
department: str = Form(...),
form_name: str = Form(...)
):
pil_images = []
if not files:
raise HTTPException(status_code=400, detail="No files uploaded for child fee processing.")
for file_upload in files:
try:
if not file_upload.content_type.startswith("image/"):
# Consider how to handle mix of valid/invalid files; for now, error out
raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")
pil_images.append(Image.open(file_upload.file))
except Exception as e: # Catch error during Image.open or content_type check
# Clean up already opened files if any before raising
for uploaded_file_obj in files: # Close all originally uploaded file objects
if hasattr(uploaded_file_obj, 'file') and not uploaded_file_obj.file.closed:
uploaded_file_obj.file.close()
raise HTTPException(status_code=400, detail=f"Error processing file '{file_upload.filename}': {str(e)}")
# We don't close file_upload.file here, Image.open() might keep it open or it might be closed by PIL.
# The finally block will handle closing all files.
if not pil_images: # Should be caught by `if not files` or the loop erroring, but as a safeguard.
raise HTTPException(status_code=400, detail="No valid images could be processed.")
try:
pdf_path = extract_reimbursement_form_info(
img_inputs=pil_images,
emp_name=emp_name,
emp_code=emp_code,
department=department,
form_name=form_name
)
if pdf_path and os.path.exists(pdf_path):
return FileResponse(pdf_path, media_type='application/pdf', filename=os.path.basename(pdf_path))
else:
# This implies extract_reimbursement_form_info returned None (e.g. no items extracted, or PDF gen error)
raise HTTPException(status_code=500, detail="Failed to generate PDF. No items might have been extracted or an internal error occurred.")
except HTTPException as he:
raise he
except Exception as e:
return JSONResponse(status_code=500, content={"error": "Failed to process child fee form batch", "detail": str(e)})
finally:
# Ensure all uploaded files are closed
for file_upload in files:
if hasattr(file_upload, 'file') and not file_upload.file.closed:
file_upload.file.close()
@app.post("/extract_medical_info_batch/")
async def extract_medical_batch_endpoint(
files: List[UploadFile] = File(...),
emp_name: str = Form(...),
emp_code: str = Form(...),
department: str = Form(...),
designation: str = Form(...),
company: str = Form(...),
extension_no: str = Form(...)
):
if not files:
raise HTTPException(status_code=400, detail="No files uploaded.")
temp_files_info = []
temp_dir = tempfile.mkdtemp()
try:
for file_upload in files:
if not file_upload.content_type.startswith("image/"):
# Clean up for this specific error case
for temp_info_obj in temp_files_info: # Iterate over created MockFileObject
if os.path.exists(temp_info_obj.name):
os.remove(temp_info_obj.name)
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")
temp_file_path = ""
try:
# Ensure filename is somewhat safe for path joining, though mkdtemp helps isolate
safe_filename = os.path.basename(file_upload.filename) if file_upload.filename else "unknown_file"
temp_file_path = os.path.join(temp_dir, safe_filename)
with open(temp_file_path, "wb") as f_temp:
shutil.copyfileobj(file_upload.file, f_temp)
class MockFileObject: # Defined inside or ensure it's available
def __init__(self, path, original_filename):
self.name = path
self.original_filename = original_filename
temp_files_info.append(MockFileObject(temp_file_path, file_upload.filename))
finally:
file_upload.file.close()
if not temp_files_info:
if os.path.exists(temp_dir): # Cleanup if no valid files were processed
shutil.rmtree(temp_dir)
raise HTTPException(status_code=400, detail="No valid image files to process after filtering.")
html_path = extract_medical_info_batch(
image_file_list=temp_files_info, # Pass list of MockFileObjects
emp_name=emp_name,
emp_code=emp_code,
department=department,
designation=designation,
company=company,
extension_no=extension_no
)
if html_path and os.path.exists(html_path):
status_code_to_return = 200
# Check if the returned path is for the error HTML page generated when no images are provided.
if "error_no_medical_form_images" in os.path.basename(html_path):
status_code_to_return = 400
# This endpoint now directly returns the HTML from the pipeline
return FileResponse(html_path, media_type='text/html', filename=os.path.basename(html_path), status_code=status_code_to_return)
else:
# This means extract_medical_info_batch returned None (e.g., no images were processed or HTML generation failed)
raise HTTPException(status_code=500, detail="Failed to generate consolidated HTML medical form. This could be due to no images or an internal error during HTML generation.")
except HTTPException as he:
# General cleanup for HTTPExceptions raised within the main try
for temp_info_obj in temp_files_info:
if os.path.exists(temp_info_obj.name):
os.remove(temp_info_obj.name)
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
raise he
except Exception as e:
# General cleanup for other exceptions
for temp_info_obj in temp_files_info:
if os.path.exists(temp_info_obj.name):
os.remove(temp_info_obj.name)
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
return JSONResponse(status_code=500, content={"error": "Failed to process batch medical forms", "detail": str(e)})
finally:
# This finally block attempts cleanup again, belt-and-suspenders.
# It's particularly for the temp_dir itself if not cleaned by specific error handlers.
# Individual files in temp_files_info should ideally be cleaned by the except blocks.
if 'temp_dir' in locals() and os.path.exists(temp_dir):
# Aggressively try to clean contents if not already done
for item_name in os.listdir(temp_dir):
item_path = os.path.join(temp_dir, item_name)
try:
if os.path.isfile(item_path) or os.path.islink(item_path):
os.unlink(item_path)
elif os.path.isdir(item_path): # Should not happen if temp_files are files
shutil.rmtree(item_path)
except Exception as e_clean_item:
print(f"Error cleaning up item {item_path} in temp_dir: {e_clean_item}")
try:
shutil.rmtree(temp_dir) # Remove the directory itself
except Exception as e_clean_dir:
print(f"Error final cleanup of temp directory {temp_dir}: {e_clean_dir}")
# Ensure no trailing comments like "# We will add more endpoints below"