blessedpug commited on
Commit
a1a13bb
·
1 Parent(s): 8b27fa0

Implemented FastAPI endpoints - Implemented batch processing for pdf forms

Browse files
Files changed (4) hide show
  1. app.py +11 -10
  2. data.json +88 -0
  3. main.py +216 -0
  4. pipeline.py +103 -57
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from pipeline import extract_info_batch, extract_child_fee_info, extract_medical_info, extract_medical_info_batch
3
  from PIL import Image
4
 
5
 
@@ -20,6 +20,8 @@ with gr.Blocks() as demo:
20
  )
21
 
22
  with gr.Column(scale=2):
 
 
23
  batch_output_box = gr.Markdown(
24
  value="Upload Images to extract information",
25
  label="Batch Extracted Info",
@@ -37,13 +39,12 @@ with gr.Blocks() as demo:
37
  with gr.Tab("Reimbursement Forms"):
38
  with gr.Row():
39
  with gr.Column(scale=2):
40
- img_input = gr.Image(
41
- type="pil",
42
- label="Image Upload",
43
- elem_id="upload-img",
44
- show_label=False,
45
- height=512,
46
- width=512
47
  )
48
 
49
  with gr.Column(scale=2):
@@ -65,8 +66,8 @@ with gr.Blocks() as demo:
65
 
66
 
67
  upload_btn.click(
68
- fn=extract_child_fee_info,
69
- inputs=[img_input, emp_name, emp_code, department,form_name],
70
  outputs=preview_output
71
  )
72
 
 
1
  import gradio as gr
2
+ from pipeline import extract_info_batch, extract_reimbursement_form_info, extract_medical_info, extract_medical_info_batch
3
  from PIL import Image
4
 
5
 
 
20
  )
21
 
22
  with gr.Column(scale=2):
23
+ gr.Markdown("## Receipt Reimbursement Portal")
24
+
25
  batch_output_box = gr.Markdown(
26
  value="Upload Images to extract information",
27
  label="Batch Extracted Info",
 
39
  with gr.Tab("Reimbursement Forms"):
40
  with gr.Row():
41
  with gr.Column(scale=2):
42
+ reimbursement_img_input = gr.File(
43
+ file_types=["image"],
44
+ label="Batch Image Upload",
45
+ elem_id="batch-upload-img",
46
+ show_label=True,
47
+ file_count="multiple"
 
48
  )
49
 
50
  with gr.Column(scale=2):
 
66
 
67
 
68
  upload_btn.click(
69
+ fn=extract_reimbursement_form_info,
70
+ inputs=[reimbursement_img_input, emp_name, emp_code, department,form_name],
71
  outputs=preview_output
72
  )
73
 
data.json CHANGED
@@ -90,5 +90,93 @@
90
  "amount": 9.48
91
  }
92
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
  ]
 
90
  "amount": 9.48
91
  }
92
  ]
93
+ },
94
+ {
95
+ "fraud_check": [],
96
+ "merchant": "CSH Pharmacy",
97
+ "date": "17/01/2025",
98
+ "total_amount": 500.0,
99
+ "items": [
100
+ {
101
+ "description": "Arinac Tab (w)",
102
+ "amount": 8.6
103
+ },
104
+ {
105
+ "description": "Tarivid 200mg Tab (w)",
106
+ "amount": 301.8
107
+ },
108
+ {
109
+ "description": "Soflin 10mg Tab 30,s",
110
+ "amount": 18.05
111
+ }
112
+ ]
113
+ },
114
+ {
115
+ "fraud_check": [],
116
+ "merchant": "CSH Pharmacy",
117
+ "date": "17/01/2025",
118
+ "total_amount": 1449.0,
119
+ "items": [
120
+ {
121
+ "description": "Bofalgan 1g/100ml Inj (w)",
122
+ "amount": 225.0
123
+ },
124
+ {
125
+ "description": "Oxidil Inj 1gm Iv N/p",
126
+ "amount": 450.0
127
+ },
128
+ {
129
+ "description": "Drip Sot Max Care",
130
+ "amount": 200.0
131
+ },
132
+ {
133
+ "description": "10cc Syringe (smd)",
134
+ "amount": 50.0
135
+ },
136
+ {
137
+ "description": "9% 100ml Medisol",
138
+ "amount": 93.23
139
+ },
140
+ {
141
+ "description": "Iv Branula 24g (b Braun) W B",
142
+ "amount": 430.0
143
+ }
144
+ ]
145
+ },
146
+ {
147
+ "fraud_check": [],
148
+ "merchant": "CHUGHTAI PHARMACY",
149
+ "date": "15/01/2025",
150
+ "total_amount": 1394.0,
151
+ "items": [
152
+ {
153
+ "description": "N/s Plasamine inf 0.9% 100ml",
154
+ "amount": 102.0
155
+ },
156
+ {
157
+ "description": "Drip Set (classic)",
158
+ "amount": 150.0
159
+ },
160
+ {
161
+ "description": "B. Braun Branula Introcan 24 G",
162
+ "amount": 430.0
163
+ },
164
+ {
165
+ "description": "Apple Syringe 10cc 100s",
166
+ "amount": 390.0
167
+ },
168
+ {
169
+ "description": "Oxidil inj 1 W 1 Gm 1 Vial",
170
+ "amount": 352.0
171
+ },
172
+ {
173
+ "description": "Neurobion inj 25 Ampx3",
174
+ "amount": 120.0
175
+ },
176
+ {
177
+ "description": "Bofalgan inj 1 Ampx100 ml",
178
+ "amount": 207.0
179
+ }
180
+ ]
181
  }
182
  ]
main.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
2
+ from fastapi.responses import JSONResponse, FileResponse
3
+ from typing import List, Optional
4
+ from PIL import Image
5
+ import tempfile
6
+ import os
7
+ import shutil
8
+ import json # Added json import
9
+
10
+ # Corrected and consolidated imports from pipeline
11
+ from pipeline import (
12
+ extract_info,
13
+ # extract_info_batch, # This function in pipeline.py takes file paths, FastAPI will call extract_info individually
14
+ extract_reimbursement_form_info,
15
+ extract_medical_info,
16
+ extract_medical_info_batch
17
+ )
18
+ # Assuming models.py contains necessary Pydantic models, though not directly used in this file for request validation beyond FastAPI's
19
+ # from models import ReceiptData, ChildFeeForm
20
+
21
+ app = FastAPI()
22
+
23
+ # Ensure output directory exists
24
+ os.makedirs("outputs", exist_ok=True)
25
+
26
+ @app.get("/")
27
+ async def read_root():
28
+ return {"message": "Welcome to the Document Processing API"}
29
+
30
+ @app.post("/extract_receipt_info_batch/")
31
+ async def extract_receipt_batch_endpoint(files: List[UploadFile] = File(...)):
32
+ results = []
33
+ if not files:
34
+ raise HTTPException(status_code=400, detail="No files uploaded.")
35
+
36
+ for file_upload in files: # Renamed to avoid conflict
37
+ try:
38
+ if not file_upload.content_type.startswith("image/"):
39
+ results.append({"filename": file_upload.filename, "error": "File is not an image."})
40
+ continue
41
+
42
+ pil_image = Image.open(file_upload.file)
43
+ result_json_str = extract_info(pil_image)
44
+
45
+ if result_json_str.startswith("```json"):
46
+ actual_json_content = result_json_str[7:-4].strip()
47
+ results.append({"filename": file_upload.filename, "data": json.loads(actual_json_content)})
48
+ else:
49
+ results.append({"filename": file_upload.filename, "data": json.loads(result_json_str)})
50
+ except Exception as e:
51
+ results.append({"filename": file_upload.filename, "error": str(e)})
52
+ finally:
53
+ file_upload.file.close() # Ensure file is closed
54
+
55
+ return JSONResponse(content=results)
56
+
57
+ @app.post("/extract_reimbursement_form_batch/")
58
+ async def extract_reimbursement_form_batch_endpoint(
59
+ files: List[UploadFile] = File(...),
60
+ emp_name: str = Form(...),
61
+ emp_code: str = Form(...),
62
+ department: str = Form(...),
63
+ form_name: str = Form(...)
64
+ ):
65
+ pil_images = []
66
+ if not files:
67
+ raise HTTPException(status_code=400, detail="No files uploaded for child fee processing.")
68
+
69
+ for file_upload in files:
70
+ try:
71
+ if not file_upload.content_type.startswith("image/"):
72
+ # Consider how to handle mix of valid/invalid files; for now, error out
73
+ raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")
74
+ pil_images.append(Image.open(file_upload.file))
75
+ except Exception as e: # Catch error during Image.open or content_type check
76
+ # Clean up already opened files if any before raising
77
+ for uploaded_file_obj in files: # Close all originally uploaded file objects
78
+ if hasattr(uploaded_file_obj, 'file') and not uploaded_file_obj.file.closed:
79
+ uploaded_file_obj.file.close()
80
+ raise HTTPException(status_code=400, detail=f"Error processing file '{file_upload.filename}': {str(e)}")
81
+ # We don't close file_upload.file here, Image.open() might keep it open or it might be closed by PIL.
82
+ # The finally block will handle closing all files.
83
+
84
+ if not pil_images: # Should be caught by `if not files` or the loop erroring, but as a safeguard.
85
+ raise HTTPException(status_code=400, detail="No valid images could be processed.")
86
+
87
+ try:
88
+ pdf_path = extract_reimbursement_form_info(
89
+ img_inputs=pil_images,
90
+ emp_name=emp_name,
91
+ emp_code=emp_code,
92
+ department=department,
93
+ form_name=form_name
94
+ )
95
+
96
+ if pdf_path and os.path.exists(pdf_path):
97
+ return FileResponse(pdf_path, media_type='application/pdf', filename=os.path.basename(pdf_path))
98
+ else:
99
+ # This implies extract_reimbursement_form_info returned None (e.g. no items extracted, or PDF gen error)
100
+ raise HTTPException(status_code=500, detail="Failed to generate PDF. No items might have been extracted or an internal error occurred.")
101
+ except HTTPException as he:
102
+ raise he
103
+ except Exception as e:
104
+ return JSONResponse(status_code=500, content={"error": "Failed to process child fee form batch", "detail": str(e)})
105
+ finally:
106
+ # Ensure all uploaded files are closed
107
+ for file_upload in files:
108
+ if hasattr(file_upload, 'file') and not file_upload.file.closed:
109
+ file_upload.file.close()
110
+
111
+
112
+ @app.post("/extract_medical_info_batch/")
113
+ async def extract_medical_batch_endpoint(
114
+ files: List[UploadFile] = File(...),
115
+ emp_name: str = Form(...),
116
+ emp_code: str = Form(...),
117
+ department: str = Form(...),
118
+ designation: str = Form(...),
119
+ company: str = Form(...),
120
+ extension_no: str = Form(...)
121
+ ):
122
+ if not files:
123
+ raise HTTPException(status_code=400, detail="No files uploaded.")
124
+
125
+ temp_files_info = []
126
+ temp_dir = tempfile.mkdtemp()
127
+
128
+ try:
129
+ for file_upload in files:
130
+ if not file_upload.content_type.startswith("image/"):
131
+ # Clean up for this specific error case
132
+ for temp_info_obj in temp_files_info: # Iterate over created MockFileObject
133
+ if os.path.exists(temp_info_obj.name):
134
+ os.remove(temp_info_obj.name)
135
+ if os.path.exists(temp_dir):
136
+ shutil.rmtree(temp_dir)
137
+ raise HTTPException(status_code=400, detail=f"File '{file_upload.filename}' is not an image.")
138
+
139
+ temp_file_path = ""
140
+ try:
141
+ # Ensure filename is somewhat safe for path joining, though mkdtemp helps isolate
142
+ safe_filename = os.path.basename(file_upload.filename) if file_upload.filename else "unknown_file"
143
+ temp_file_path = os.path.join(temp_dir, safe_filename)
144
+
145
+ with open(temp_file_path, "wb") as f_temp:
146
+ shutil.copyfileobj(file_upload.file, f_temp)
147
+
148
+ class MockFileObject: # Defined inside or ensure it's available
149
+ def __init__(self, path, original_filename):
150
+ self.name = path
151
+ self.original_filename = original_filename
152
+
153
+ temp_files_info.append(MockFileObject(temp_file_path, file_upload.filename))
154
+ finally:
155
+ file_upload.file.close()
156
+
157
+ if not temp_files_info:
158
+ if os.path.exists(temp_dir): # Cleanup if no valid files were processed
159
+ shutil.rmtree(temp_dir)
160
+ raise HTTPException(status_code=400, detail="No valid image files to process after filtering.")
161
+
162
+ html_path = extract_medical_info_batch(
163
+ image_file_list=temp_files_info, # Pass list of MockFileObjects
164
+ emp_name=emp_name,
165
+ emp_code=emp_code,
166
+ department=department,
167
+ designation=designation,
168
+ company=company,
169
+ extension_no=extension_no
170
+ )
171
+
172
+ if html_path and os.path.exists(html_path):
173
+ status_code_to_return = 200
174
+ if "error_no_medical_form_images" in os.path.basename(html_path):
175
+ status_code_to_return = 400
176
+ return FileResponse(html_path, media_type='text/html', filename=os.path.basename(html_path), status_code=status_code_to_return)
177
+ else:
178
+ raise HTTPException(status_code=500, detail="Failed to generate consolidated HTML medical form. The batch function may have returned an invalid path or None, or the file doesn't exist.")
179
+
180
+ except HTTPException as he:
181
+ # General cleanup for HTTPExceptions raised within the main try
182
+ for temp_info_obj in temp_files_info:
183
+ if os.path.exists(temp_info_obj.name):
184
+ os.remove(temp_info_obj.name)
185
+ if os.path.exists(temp_dir):
186
+ shutil.rmtree(temp_dir)
187
+ raise he
188
+ except Exception as e:
189
+ # General cleanup for other exceptions
190
+ for temp_info_obj in temp_files_info:
191
+ if os.path.exists(temp_info_obj.name):
192
+ os.remove(temp_info_obj.name)
193
+ if os.path.exists(temp_dir):
194
+ shutil.rmtree(temp_dir)
195
+ return JSONResponse(status_code=500, content={"error": "Failed to process batch medical forms", "detail": str(e)})
196
+ finally:
197
+ # This finally block attempts cleanup again, belt-and-suspenders.
198
+ # It's particularly for the temp_dir itself if not cleaned by specific error handlers.
199
+ # Individual files in temp_files_info should ideally be cleaned by the except blocks.
200
+ if 'temp_dir' in locals() and os.path.exists(temp_dir):
201
+ # Aggressively try to clean contents if not already done
202
+ for item_name in os.listdir(temp_dir):
203
+ item_path = os.path.join(temp_dir, item_name)
204
+ try:
205
+ if os.path.isfile(item_path) or os.path.islink(item_path):
206
+ os.unlink(item_path)
207
+ elif os.path.isdir(item_path): # Should not happen if temp_files are files
208
+ shutil.rmtree(item_path)
209
+ except Exception as e_clean_item:
210
+ print(f"Error cleaning up item {item_path} in temp_dir: {e_clean_item}")
211
+ try:
212
+ shutil.rmtree(temp_dir) # Remove the directory itself
213
+ except Exception as e_clean_dir:
214
+ print(f"Error final cleanup of temp directory {temp_dir}: {e_clean_dir}")
215
+
216
+ # Ensure no trailing comments like "# We will add more endpoints below"
pipeline.py CHANGED
@@ -10,6 +10,7 @@ from form_fill import fill_child_fee_pdf, fill_medical_pdf
10
  from fraud import process_receipt
11
  from datetime import datetime
12
  import html
 
13
 
14
 
15
  load_dotenv()
@@ -24,7 +25,7 @@ reciept_system_prompt = (
24
  " description: str\n"
25
  " amount: float\n\n"
26
  "class FraudData(BaseModel):\n"
27
- " fraud_detected: bool \n"
28
  " fraud_type: Optional[str] = None # Type of fraud if detected, e.g., \"duplicate\", \"suspicious\" \n\n"
29
  "class ReceiptData(BaseModel):\n"
30
  " fraud_check: Optional[List[FraudData]] = [] # Optional field for fraud detection, always set to empty list\n"
@@ -145,83 +146,128 @@ def extract_info(pil_img):
145
  return f"```json\n{json.dumps({'error': str(e), 'raw_output': raw_output}, indent=2)}\n```"
146
 
147
 
148
- def extract_child_fee_info(img_input, emp_name, emp_code, department, form_name):
149
- print(emp_name, emp_code, department)
150
- processed_image = preprocess_image(img_input)
151
- img_bytes = pil_to_bytes(processed_image)
152
- img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
153
- response = openai.chat.completions.create(
154
- model="gpt-4o",
155
- messages=[
156
- {"role": "system", "content": fee_bill_system_prompt},
157
- {"role": "user",
158
- "content": [
159
- {"type": "text", "text": "Here is a child fee bill image:"},
160
- {"type": "image_url", "image_url": {"url": "data:image/png;base64," + img_base64}}
161
- ]}
162
- ]
163
- )
164
- raw_output = response.choices[0].message.content
165
- try:
166
- if raw_output.startswith("```"):
167
- raw_output = raw_output.strip("` \n")
168
- if raw_output.startswith("json"):
169
- raw_output = raw_output[4:].strip()
170
- data = json.loads(raw_output)
171
- print(data)
172
- # Validate if needed:
173
- # ChildFeeForm(**data)
174
 
175
- # Extract bill_month from first item if available, else use empty string
176
- items = data.get("items", [])
177
- bill_month = ""
178
- if items and "bill_month" in items[0]:
179
- bill_month = items[0]["bill_month"]
180
 
181
 
182
- os.makedirs("outputs", exist_ok=True)
183
 
184
- if form_name == "Child Fee Reimbursement Form":
185
-
186
- output_pdf_path = f"outputs/filled_child_fee_reimbursement_form_{uuid.uuid4().hex}.pdf"
187
 
188
- elif form_name == "Internet Charges Form":
189
 
190
- output_pdf_path = f"outputs/filled_internet_charges_reimbursement_form_{uuid.uuid4().hex}.pdf"
 
 
 
 
 
 
191
 
192
- elif form_name == "Mobile Reimbursement Form":
 
 
 
 
 
 
 
 
 
193
 
194
- output_pdf_path = f"outputs/filled_mobile_reimbursement_form_{uuid.uuid4().hex}.pdf"
195
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
 
198
  filled_pdf_path = fill_child_fee_pdf(
199
  template_pdf_path="templates/REIMBURSEMENT FORM.pdf",
200
  output_pdf_path=output_pdf_path,
201
  emp_name=emp_name,
202
  emp_code=emp_code,
203
  department=department,
204
- bill_month=bill_month,
205
- items=items,
206
- total=data.get("total", "")
207
  )
208
-
209
- return filled_pdf_path # Return path to Gradio for download
210
  except Exception as e:
211
- print("ERROR:", e)
212
- return None # or f"Error: {str(e)}"
213
 
214
 
215
 
216
- def extract_info_batch(file_list):
217
- """
218
- Accepts a list of file objects/paths, processes each as a PIL image, and returns results.
219
- """
220
- results = []
221
- for file in file_list:
222
- img = Image.open(file)
223
- results.append(extract_info(img))
224
- return "\n\n".join(results)
225
 
226
 
227
 
 
10
  from fraud import process_receipt
11
  from datetime import datetime
12
  import html
13
+ from typing import List
14
 
15
 
16
  load_dotenv()
 
25
  " description: str\n"
26
  " amount: float\n\n"
27
  "class FraudData(BaseModel):\n"
28
+ " fraud_detected: bool # either True or False\n"
29
  " fraud_type: Optional[str] = None # Type of fraud if detected, e.g., \"duplicate\", \"suspicious\" \n\n"
30
  "class ReceiptData(BaseModel):\n"
31
  " fraud_check: Optional[List[FraudData]] = [] # Optional field for fraud detection, always set to empty list\n"
 
146
  return f"```json\n{json.dumps({'error': str(e), 'raw_output': raw_output}, indent=2)}\n```"
147
 
148
 
149
+ def extract_info_batch(file_list):
150
+ """
151
+ Accepts a list of file objects/paths, processes each as a PIL image, and returns results.
152
+ """
153
+ results = []
154
+ for file in file_list:
155
+ img = Image.open(file)
156
+ results.append(extract_info(img))
157
+ return "\n\n".join(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
 
 
 
 
 
159
 
160
 
 
161
 
 
 
 
162
 
 
163
 
164
+ def extract_reimbursement_form_info(img_inputs: List[Image.Image], emp_name: str, emp_code: str, department: str, form_name: str):
165
+ print(f"Processing child fee info for: {emp_name}, {emp_code}, {department}, Form: {form_name}")
166
+
167
+ consolidated_items = []
168
+ consolidated_total = 0.0
169
+ first_bill_month_found = ""
170
+ processed_image_count = 0
171
 
172
+ for i, img_input_item in enumerate(img_inputs):
173
+ print(f"Processing image {i+1} of {len(img_inputs)} for child fee form...")
174
+ try:
175
+ current_pil_img = None
176
+ if isinstance(img_input_item, Image.Image):
177
+ current_pil_img = img_input_item
178
+ else:
179
+ # Assume img_input_item is a path, filename, or a file-like object
180
+ # that Image.open() can handle (like Gradio's NamedString if it behaves like a path or has a read method)
181
+ current_pil_img = Image.open(img_input_item)
182
 
183
+ processed_image = preprocess_image(current_pil_img)
184
+ img_bytes = pil_to_bytes(processed_image)
185
+ img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
186
+
187
+ response = openai.chat.completions.create(
188
+ model="gpt-4o",
189
+ messages=[
190
+ {"role": "system", "content": fee_bill_system_prompt},
191
+ {"role": "user",
192
+ "content": [
193
+ {"type": "text", "text": f"Here is a child fee bill image (part {i+1} of a batch):"},
194
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64," + img_base64}}
195
+ ]}
196
+ ]
197
+ )
198
+ raw_output = response.choices[0].message.content
199
+ print(f"Raw output from LLM for image {i+1}: {raw_output}")
200
 
201
+ if raw_output.startswith("```"):
202
+ raw_output = raw_output.strip("` \n")
203
+ if raw_output.startswith("json"):
204
+ raw_output = raw_output[4:].strip()
205
+
206
+ data = json.loads(raw_output)
207
+ print(f"Parsed data from LLM for image {i+1}: {data}")
208
+
209
+ current_items = data.get("items", [])
210
+ if current_items:
211
+ consolidated_items.extend(current_items)
212
+ # Summing up totals from each bill, or summing items directly for more accuracy
213
+ for item in current_items:
214
+ consolidated_total += float(item.get("amount", 0) or 0)
215
+
216
+ if not first_bill_month_found and current_items and "bill_month" in current_items[0]:
217
+ first_bill_month_found = current_items[0]["bill_month"]
218
+
219
+ processed_image_count +=1
220
+
221
+ except Exception as e:
222
+ print(f"ERROR processing image {i+1} for child fee form: {e}")
223
+ # Decide if one error should stop the whole batch or just skip the problematic image
224
+ # For now, we skip and continue
225
+ continue
226
+
227
+ if not consolidated_items: # No items extracted from any image
228
+ print("No items were extracted from any of the provided images for child fee form.")
229
+ # Potentially return an error or an empty PDF/status message
230
+ # For now, let's create an empty PDF as the function expects to return a path
231
+ # Or, it might be better to return None and let the API endpoint handle the error response.
232
+ return None
233
+
234
+ print(f"Consolidated {len(consolidated_items)} items from {processed_image_count} images.")
235
+ print(f"Final total: {consolidated_total}, Bill month to use: {first_bill_month_found}")
236
+
237
+ os.makedirs("outputs", exist_ok=True)
238
+
239
+ # Adjust filename to indicate consolidation if multiple images were processed
240
+ file_suffix = f"{uuid.uuid4().hex}"
241
+ if len(img_inputs) > 1:
242
+ file_suffix = f"batch_{file_suffix}"
243
+
244
+ if form_name == "Child Fee Reimbursement Form":
245
+ output_pdf_path = f"outputs/filled_child_fee_reimbursement_form_{file_suffix}.pdf"
246
+ elif form_name == "Internet Charges Form":
247
+ output_pdf_path = f"outputs/filled_internet_charges_reimbursement_form_{file_suffix}.pdf"
248
+ elif form_name == "Mobile Reimbursement Form":
249
+ output_pdf_path = f"outputs/filled_mobile_reimbursement_form_{file_suffix}.pdf"
250
+ else: # Default or error case
251
+ output_pdf_path = f"outputs/filled_unknown_reimbursement_form_{file_suffix}.pdf"
252
 
253
+ try:
254
  filled_pdf_path = fill_child_fee_pdf(
255
  template_pdf_path="templates/REIMBURSEMENT FORM.pdf",
256
  output_pdf_path=output_pdf_path,
257
  emp_name=emp_name,
258
  emp_code=emp_code,
259
  department=department,
260
+ bill_month=first_bill_month_found,
261
+ items=consolidated_items, # Use consolidated items
262
+ total=consolidated_total # Use consolidated total
263
  )
264
+ return filled_pdf_path
 
265
  except Exception as e:
266
+ print(f"ERROR during PDF generation for consolidated child fee form: {e}")
267
+ return None
268
 
269
 
270
 
 
 
 
 
 
 
 
 
 
271
 
272
 
273