Spaces:

blessedpug
/

Demo_Build

Sleeping

App Files Files Community

Demo_Build / main.py

blessedpug

Pre-Docker backup

a4a400b 8 months ago

raw

history blame contribute delete

10.3 kB

	from fastapi import FastAPI, File, UploadFile, Form, HTTPException
	from fastapi.responses import JSONResponse, FileResponse
	from typing import List, Optional
	from PIL import Image
	import tempfile
	import os
	import shutil
	import json # Added json import

	# Corrected and consolidated imports from pipeline
	from pipeline import (
	extract_info,
	# extract_info_batch, # This function in pipeline.py takes file paths, FastAPI will call extract_info individually
	extract_reimbursement_form_info,
	extract_medical_info,
	extract_medical_info_batch
	)
	# Assuming models.py contains necessary Pydantic models, though not directly used in this file for request validation beyond FastAPI's
	# from models import ReceiptData, ChildFeeForm

	app = FastAPI()

	# Ensure output directory exists
	os.makedirs("outputs", exist_ok=True)

	@app.get("/")
	async def read_root():
	return {"message": "Welcome to the Document Processing API"}

	@app.post("/extract_receipt_info_batch/")
	async def extract_receipt_batch_endpoint(files: List[UploadFile] = File(...)):
	results = []
	if not files:
	raise HTTPException(status_code=400, detail="No files uploaded.")

	for file_upload in files: # Renamed to avoid conflict
	try:
	if not file_upload.content_type.startswith("image/"):
	results.append({"filename": file_upload.filename, "error": "File is not an image."})
	continue

	pil_image = Image.open(file_upload.file)
	result_json_str = extract_info(pil_image)

	if result_json_str.startswith("```json"):
	actual_json_content = result_json_str[7:-4].strip()
	results.append({"filename": file_upload.filename, "data": json.loads(actual_json_content)})
	else:
	results.append({"filename": file_upload.filename, "data": json.loads(result_json_str)})
	except Exception as e:
	results.append({"filename": file_upload.filename, "error": str(e)})
	finally:
	file_upload.file.close() # Ensure file is closed

	return JSONResponse(content=results)

	@app.post("/extract_reimbursement_form_batch/")
	async def extract_reimbursement_form_batch_endpoint(
	files: List[UploadFile] = File(...),
	emp_name: str = Form(...),
	emp_code: str = Form(...),
	department: str = Form(...),
	form_name: str = Form(...)
	):
	pil_images = []
	if not files:
	raise HTTPException(status_code=400, detail="No files uploaded for child fee processing.")

	for file_upload in files:
	try:
	if not file_upload.content_type.startswith("image/"):
	# Consider how to handle mix of valid/invalid files; for now, error out
	raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")
	pil_images.append(Image.open(file_upload.file))
	except Exception as e: # Catch error during Image.open or content_type check
	# Clean up already opened files if any before raising
	for uploaded_file_obj in files: # Close all originally uploaded file objects
	if hasattr(uploaded_file_obj, 'file') and not uploaded_file_obj.file.closed:
	uploaded_file_obj.file.close()
	raise HTTPException(status_code=400, detail=f"Error processing file '{file_upload.filename}': {str(e)}")
	# We don't close file_upload.file here, Image.open() might keep it open or it might be closed by PIL.
	# The finally block will handle closing all files.

	if not pil_images: # Should be caught by `if not files` or the loop erroring, but as a safeguard.
	raise HTTPException(status_code=400, detail="No valid images could be processed.")

	try:
	pdf_path = extract_reimbursement_form_info(
	img_inputs=pil_images,
	emp_name=emp_name,
	emp_code=emp_code,
	department=department,
	form_name=form_name
	)

	if pdf_path and os.path.exists(pdf_path):
	return FileResponse(pdf_path, media_type='application/pdf', filename=os.path.basename(pdf_path))
	else:
	# This implies extract_reimbursement_form_info returned None (e.g. no items extracted, or PDF gen error)
	raise HTTPException(status_code=500, detail="Failed to generate PDF. No items might have been extracted or an internal error occurred.")
	except HTTPException as he:
	raise he
	except Exception as e:
	return JSONResponse(status_code=500, content={"error": "Failed to process child fee form batch", "detail": str(e)})
	finally:
	# Ensure all uploaded files are closed
	for file_upload in files:
	if hasattr(file_upload, 'file') and not file_upload.file.closed:
	file_upload.file.close()


	@app.post("/extract_medical_info_batch/")
	async def extract_medical_batch_endpoint(
	files: List[UploadFile] = File(...),
	emp_name: str = Form(...),
	emp_code: str = Form(...),
	department: str = Form(...),
	designation: str = Form(...),
	company: str = Form(...),
	extension_no: str = Form(...)
	):
	if not files:
	raise HTTPException(status_code=400, detail="No files uploaded.")

	temp_files_info = []
	temp_dir = tempfile.mkdtemp()

	try:
	for file_upload in files:
	if not file_upload.content_type.startswith("image/"):
	# Clean up for this specific error case
	for temp_info_obj in temp_files_info: # Iterate over created MockFileObject
	if os.path.exists(temp_info_obj.name):
	os.remove(temp_info_obj.name)
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)
	raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")

	temp_file_path = ""
	try:
	# Ensure filename is somewhat safe for path joining, though mkdtemp helps isolate
	safe_filename = os.path.basename(file_upload.filename) if file_upload.filename else "unknown_file"
	temp_file_path = os.path.join(temp_dir, safe_filename)

	with open(temp_file_path, "wb") as f_temp:
	shutil.copyfileobj(file_upload.file, f_temp)

	class MockFileObject: # Defined inside or ensure it's available
	def __init__(self, path, original_filename):
	self.name = path
	self.original_filename = original_filename

	temp_files_info.append(MockFileObject(temp_file_path, file_upload.filename))
	finally:
	file_upload.file.close()

	if not temp_files_info:
	if os.path.exists(temp_dir): # Cleanup if no valid files were processed
	shutil.rmtree(temp_dir)
	raise HTTPException(status_code=400, detail="No valid image files to process after filtering.")

	html_path = extract_medical_info_batch(
	image_file_list=temp_files_info, # Pass list of MockFileObjects
	emp_name=emp_name,
	emp_code=emp_code,
	department=department,
	designation=designation,
	company=company,
	extension_no=extension_no
	)

	if html_path and os.path.exists(html_path):
	status_code_to_return = 200
	# Check if the returned path is for the error HTML page generated when no images are provided.
	if "error_no_medical_form_images" in os.path.basename(html_path):
	status_code_to_return = 400
	# This endpoint now directly returns the HTML from the pipeline
	return FileResponse(html_path, media_type='text/html', filename=os.path.basename(html_path), status_code=status_code_to_return)
	else:
	# This means extract_medical_info_batch returned None (e.g., no images were processed or HTML generation failed)
	raise HTTPException(status_code=500, detail="Failed to generate consolidated HTML medical form. This could be due to no images or an internal error during HTML generation.")

	except HTTPException as he:
	# General cleanup for HTTPExceptions raised within the main try
	for temp_info_obj in temp_files_info:
	if os.path.exists(temp_info_obj.name):
	os.remove(temp_info_obj.name)
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)
	raise he
	except Exception as e:
	# General cleanup for other exceptions
	for temp_info_obj in temp_files_info:
	if os.path.exists(temp_info_obj.name):
	os.remove(temp_info_obj.name)
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)
	return JSONResponse(status_code=500, content={"error": "Failed to process batch medical forms", "detail": str(e)})
	finally:
	# This finally block attempts cleanup again, belt-and-suspenders.
	# It's particularly for the temp_dir itself if not cleaned by specific error handlers.
	# Individual files in temp_files_info should ideally be cleaned by the except blocks.
	if 'temp_dir' in locals() and os.path.exists(temp_dir):
	# Aggressively try to clean contents if not already done
	for item_name in os.listdir(temp_dir):
	item_path = os.path.join(temp_dir, item_name)
	try:
	if os.path.isfile(item_path) or os.path.islink(item_path):
	os.unlink(item_path)
	elif os.path.isdir(item_path): # Should not happen if temp_files are files
	shutil.rmtree(item_path)
	except Exception as e_clean_item:
	print(f"Error cleaning up item {item_path} in temp_dir: {e_clean_item}")
	try:
	shutil.rmtree(temp_dir) # Remove the directory itself
	except Exception as e_clean_dir:
	print(f"Error final cleanup of temp directory {temp_dir}: {e_clean_dir}")

	# Ensure no trailing comments like "# We will add more endpoints below"