rairo commited on
Commit
b1b48cd
·
verified ·
1 Parent(s): 91c8199

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -41
app.py CHANGED
@@ -178,7 +178,252 @@ Do not name the company if name is not there and return just the report and noth
178
  st.error(f"An unexpected error occurred during Gemini report generation: {e}") # Catch other potential errors
179
  return None
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  # Install required libraries:
183
  # pip install fpdf2 beautifulsoup4 markdown
184
 
@@ -571,12 +816,12 @@ def main():
571
  statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
572
 
573
  if st.button("Generate Financial Report"):
574
- st.info(f"User clicked 'Generate Financial Report' for {statement_type} from {start_date} to {end_date}.") # Log button click
575
  if not all_transactions:
576
- st.error("No transactions available to generate report. Please upload files first.") # Log
577
  else:
578
  # Filter transactions by date
579
- st.info(f"Filtering {len(all_transactions)} transactions for the period {start_date} to {end_date}...") # Log filtering
580
  filtered_transactions = []
581
  for transaction in all_transactions:
582
  try:
@@ -584,53 +829,56 @@ def main():
584
  if start_date <= transaction_date <= end_date:
585
  filtered_transactions.append(transaction)
586
  except (ValueError, TypeError):
587
- st.warning(f"Could not parse date for transaction, skipping: {transaction}") # Log problematic transactions
588
  continue
589
 
590
  if not filtered_transactions:
591
- st.warning("No transactions found within the selected date range. Please adjust dates or upload relevant files.") # Log
592
  else:
593
- st.info(f"Found {len(filtered_transactions)} transactions within the selected date range.") # Log filtered count
594
  try:
595
  model1 = configure_gemini1(api_key)
596
- combined_json = {"transactions": filtered_transactions}
597
- with st.spinner("Generating financial report..."):
598
- report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
599
- if report_text:
600
- st.success("Financial report generated successfully by Gemini!") # Log report text ready
601
-
602
- # Display the report as markdown
603
- st.markdown("### Financial Report Preview")
604
- st.markdown(report_text)
605
-
606
- # Create PDF from markdown
607
- try:
608
- st.info("Attempting to generate PDF from the report markdown.") # Log PDF start
609
- pdf_buffer = create_pdf_report(report_text)
610
- st.download_button(
611
- label="Download Financial Report as PDF",
612
- data=pdf_buffer.getvalue(),
613
- file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
614
- mime="application/pdf"
615
- )
616
- st.success("PDF download button enabled.") # Log
617
- except Exception as e:
618
- st.error(f"Error generating PDF for download: {str(e)}") # Log PDF error
619
- st.info("For better PDF generation, please ensure NotoSans fonts are installed in the same directory.")
620
- st.exception(e) # Show traceback
621
- except exceptions.ServiceUnavailable as e:
622
- if e.response.status_code == 504:
623
- st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
624
  else:
625
- st.error(f"Error generating financial report due to Gemini API issue: {str(e)}") # Log API error
626
- st.exception(e) # Show traceback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  except Exception as e:
628
- st.error(f"An unexpected error occurred while generating the financial report: {str(e)}") # Log general error
629
  if "504" in str(e):
630
- st.info("The Gemini API might be overloaded. Consider generating reports for smaller time periods.")
631
- elif len(filtered_transactions) > 500:
632
- st.info("For large datasets, consider generating reports for smaller time periods.")
633
- st.exception(e) # Show traceback
634
 
635
  if __name__ == "__main__":
636
  main()
 
178
  st.error(f"An unexpected error occurred during Gemini report generation: {e}") # Catch other potential errors
179
  return None
180
 
181
+ def chunk_transactions(transactions, batch_size=400):
182
+ """Split transactions into smaller batches for processing."""
183
+ batches = []
184
+ for i in range(0, len(transactions), batch_size):
185
+ batch = transactions[i:i + batch_size]
186
+ batches.append(batch)
187
+ st.info(f"Split {len(transactions)} transactions into {len(batches)} batches of up to {batch_size} transactions each.")
188
+ return batches
189
+
190
+ def generate_batch_summary(model, json_data, start_date, end_date, statement_type, batch_num, total_batches):
191
+ """Generate a summary analysis for a batch of transactions."""
192
+ st.info(f"Processing batch {batch_num}/{total_batches} with {len(json_data['transactions'])} transactions...")
193
+
194
+ prompt = f"""Analyze this batch of transactions (batch {batch_num} of {total_batches}) for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}.
195
+
196
+ Transaction data:
197
+ {json.dumps(json_data)}
198
+
199
+ Create a structured summary focusing on aggregation and categorization. Return ONLY the following JSON structure:
200
+
201
+ {{
202
+ "batch_info": {{
203
+ "batch_number": {batch_num},
204
+ "total_batches": {total_batches},
205
+ "transaction_count": {len(json_data['transactions'])},
206
+ "date_range": "{start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}"
207
+ }},
208
+ "financial_summary": {{
209
+ "total_income": 0,
210
+ "total_expenses": 0,
211
+ "net_position": 0
212
+ }},
213
+ "income_breakdown": {{
214
+ "by_customer": {{}},
215
+ "by_month": {{}}
216
+ }},
217
+ "expense_breakdown": {{
218
+ "by_category": {{}},
219
+ "by_month": {{}}
220
+ }},
221
+ "key_transactions": [
222
+ // Top 5 largest transactions (income and expense)
223
+ ],
224
+ "monthly_totals": {{
225
+ // Format: "YYYY-MM": {{"income": 0, "expenses": 0, "net": 0}}
226
+ }}
227
+ }}
228
+
229
+ Focus on numerical aggregation and categorization. Be precise with calculations."""
230
+
231
+ try:
232
+ response = model.generate_content([prompt])
233
+ time.sleep(4)
234
+ return response.text
235
+ except exceptions.ServiceUnavailable as e:
236
+ if e.response.status_code == 504:
237
+ st.error(f"Batch {batch_num} timed out. Skipping this batch.")
238
+ return None
239
+ else:
240
+ st.error(f"API error processing batch {batch_num}: {e}")
241
+ raise
242
+ except Exception as e:
243
+ st.error(f"Error processing batch {batch_num}: {e}")
244
+ return None
245
+
246
+ def consolidate_batch_summaries(batch_summaries, start_date, end_date, statement_type):
247
+ """Combine multiple batch summaries into aggregated data structure."""
248
+ st.info(f"Consolidating {len(batch_summaries)} batch summaries...")
249
+
250
+ consolidated = {
251
+ "total_batches": len(batch_summaries),
252
+ "total_transactions": 0,
253
+ "date_range": f"{start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}",
254
+ "financial_summary": {
255
+ "total_income": 0,
256
+ "total_expenses": 0,
257
+ "net_position": 0
258
+ },
259
+ "income_breakdown": {
260
+ "by_customer": {},
261
+ "by_month": {}
262
+ },
263
+ "expense_breakdown": {
264
+ "by_category": {},
265
+ "by_month": {}
266
+ },
267
+ "key_transactions": [],
268
+ "monthly_totals": {}
269
+ }
270
+
271
+ # Process each batch summary
272
+ for batch_data in batch_summaries:
273
+ if not batch_data:
274
+ continue
275
+
276
+ try:
277
+ # Extract JSON from response if needed
278
+ if isinstance(batch_data, str):
279
+ start_idx = batch_data.find('{')
280
+ end_idx = batch_data.rfind('}') + 1
281
+ if start_idx != -1 and end_idx > start_idx:
282
+ json_str = batch_data[start_idx:end_idx]
283
+ batch_data = json.loads(json_str)
284
+ else:
285
+ st.warning("Could not extract JSON from batch summary")
286
+ continue
287
+
288
+ # Aggregate financial summary
289
+ if 'financial_summary' in batch_data:
290
+ fs = batch_data['financial_summary']
291
+ consolidated['financial_summary']['total_income'] += fs.get('total_income', 0)
292
+ consolidated['financial_summary']['total_expenses'] += fs.get('total_expenses', 0)
293
+
294
+ # Aggregate transaction count
295
+ if 'batch_info' in batch_data:
296
+ consolidated['total_transactions'] += batch_data['batch_info'].get('transaction_count', 0)
297
+
298
+ # Merge income breakdown by customer
299
+ if 'income_breakdown' in batch_data:
300
+ for customer, amount in batch_data['income_breakdown'].get('by_customer', {}).items():
301
+ consolidated['income_breakdown']['by_customer'][customer] = \
302
+ consolidated['income_breakdown']['by_customer'].get(customer, 0) + amount
303
+
304
+ # Merge income by month
305
+ for month, amount in batch_data['income_breakdown'].get('by_month', {}).items():
306
+ consolidated['income_breakdown']['by_month'][month] = \
307
+ consolidated['income_breakdown']['by_month'].get(month, 0) + amount
308
+
309
+ # Merge expense breakdown by category
310
+ if 'expense_breakdown' in batch_data:
311
+ for category, amount in batch_data['expense_breakdown'].get('by_category', {}).items():
312
+ consolidated['expense_breakdown']['by_category'][category] = \
313
+ consolidated['expense_breakdown']['by_category'].get(category, 0) + amount
314
+
315
+ # Merge expenses by month
316
+ for month, amount in batch_data['expense_breakdown'].get('by_month', {}).items():
317
+ consolidated['expense_breakdown']['by_month'][month] = \
318
+ consolidated['expense_breakdown']['by_month'].get(month, 0) + amount
319
+
320
+ # Collect key transactions
321
+ if 'key_transactions' in batch_data:
322
+ consolidated['key_transactions'].extend(batch_data.get('key_transactions', []))
323
+
324
+ # Merge monthly totals
325
+ if 'monthly_totals' in batch_data:
326
+ for month, totals in batch_data['monthly_totals'].items():
327
+ if month not in consolidated['monthly_totals']:
328
+ consolidated['monthly_totals'][month] = {"income": 0, "expenses": 0, "net": 0}
329
+
330
+ consolidated['monthly_totals'][month]['income'] += totals.get('income', 0)
331
+ consolidated['monthly_totals'][month]['expenses'] += totals.get('expenses', 0)
332
+ consolidated['monthly_totals'][month]['net'] += totals.get('net', 0)
333
+
334
+ except json.JSONDecodeError as e:
335
+ st.warning(f"Could not parse batch summary JSON: {e}")
336
+ continue
337
+ except Exception as e:
338
+ st.warning(f"Error processing batch summary: {e}")
339
+ continue
340
+
341
+ # Calculate final net position
342
+ consolidated['financial_summary']['net_position'] = \
343
+ consolidated['financial_summary']['total_income'] - consolidated['financial_summary']['total_expenses']
344
+
345
+ st.success(f"Successfully consolidated data from {len(batch_summaries)} batches covering {consolidated['total_transactions']} transactions.")
346
+ return consolidated
347
+
348
+ def generate_final_report(model, consolidated_data, statement_type):
349
+ """Generate the final comprehensive report using consolidated batch data."""
350
+ st.info("Generating final comprehensive report from consolidated data...")
351
+
352
+ prompt = f"""Using this consolidated financial data, generate a comprehensive {statement_type} report:
353
+
354
+ Consolidated Data:
355
+ {json.dumps(consolidated_data, indent=2)}
356
 
357
+ Generate a detailed {statement_type} report with the following requirements:
358
+
359
+ 1. **Professional Format**: Use standard South African accounting format and terminology
360
+ 2. **Clear Structure**: Organize with proper headings, subheadings, and sections
361
+ 3. **Comprehensive Analysis**: Include:
362
+ - Executive Summary
363
+ - Detailed breakdown by categories/customers
364
+ - Monthly trend analysis
365
+ - Key performance indicators
366
+ - Notable transactions and patterns
367
+ 4. **Visual Elements**: Use tables, proper formatting for better readability
368
+ 5. **Insights**: Provide meaningful business insights based on the data
369
+ 6. **Currency**: Use "R" for South African Rand where appropriate
370
+
371
+ Return the report in well-formatted Markdown. Do not include company name if not available.
372
+ Focus on creating a professional, comprehensive financial statement that provides clear insights into the business performance."""
373
+
374
+ try:
375
+ response = model.generate_content([prompt])
376
+ time.sleep(6)
377
+ st.success("Final comprehensive report generated successfully!")
378
+ return response.text
379
+ except exceptions.ServiceUnavailable as e:
380
+ if e.response.status_code == 504:
381
+ st.error("Final report generation timed out. The consolidated data might be too large.")
382
+ return None
383
+ else:
384
+ st.error(f"API error generating final report: {e}")
385
+ raise
386
+ except Exception as e:
387
+ st.error(f"Error generating final report: {e}")
388
+ return None
389
+
390
+ def generate_batched_financial_report(model, filtered_transactions, start_date, end_date, statement_type, batch_size=400):
391
+ """Main function to generate financial report using batch processing."""
392
+ st.info(f"Starting batched financial report generation for {len(filtered_transactions)} transactions...")
393
+
394
+ # Step 1: Split transactions into batches
395
+ transaction_batches = chunk_transactions(filtered_transactions, batch_size)
396
+
397
+ # Step 2: Process each batch
398
+ batch_summaries = []
399
+ progress_bar = st.progress(0)
400
+ status_text = st.empty()
401
+
402
+ for i, batch in enumerate(transaction_batches):
403
+ progress = (i + 1) / len(transaction_batches)
404
+ progress_bar.progress(progress)
405
+ status_text.text(f"Processing batch {i + 1} of {len(transaction_batches)}...")
406
+
407
+ batch_json = {"transactions": batch}
408
+ summary = generate_batch_summary(model, batch_json, start_date, end_date, statement_type, i + 1, len(transaction_batches))
409
+
410
+ if summary:
411
+ batch_summaries.append(summary)
412
+
413
+ progress_bar.progress(1.0)
414
+ status_text.text("All batches processed!")
415
+
416
+ if not batch_summaries:
417
+ st.error("No batch summaries were successfully generated.")
418
+ return None
419
+
420
+ # Step 3: Consolidate batch summaries
421
+ consolidated_data = consolidate_batch_summaries(batch_summaries, start_date, end_date, statement_type)
422
+
423
+ # Step 4: Generate final comprehensive report
424
+ final_report = generate_final_report(model, consolidated_data, statement_type)
425
+
426
+ return final_report
427
  # Install required libraries:
428
  # pip install fpdf2 beautifulsoup4 markdown
429
 
 
816
  statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
817
 
818
  if st.button("Generate Financial Report"):
819
+ st.info(f"User clicked 'Generate Financial Report' for {statement_type} from {start_date} to {end_date}.")
820
  if not all_transactions:
821
+ st.error("No transactions available to generate report. Please upload files first.")
822
  else:
823
  # Filter transactions by date
824
+ st.info(f"Filtering {len(all_transactions)} transactions for the period {start_date} to {end_date}...")
825
  filtered_transactions = []
826
  for transaction in all_transactions:
827
  try:
 
829
  if start_date <= transaction_date <= end_date:
830
  filtered_transactions.append(transaction)
831
  except (ValueError, TypeError):
832
+ st.warning(f"Could not parse date for transaction, skipping: {transaction}")
833
  continue
834
 
835
  if not filtered_transactions:
836
+ st.warning("No transactions found within the selected date range. Please adjust dates or upload relevant files.")
837
  else:
838
+ st.info(f"Found {len(filtered_transactions)} transactions within the selected date range.")
839
  try:
840
  model1 = configure_gemini1(api_key)
841
+
842
+ # Decide whether to use batched or regular processing
843
+ if len(filtered_transactions) > 600:
844
+ st.info(f"Large dataset detected ({len(filtered_transactions)} transactions). Using batched processing...")
845
+ with st.spinner("Generating batched financial report..."):
846
+ report_text = generate_batched_financial_report(
847
+ model1, filtered_transactions, start_date, end_date, statement_type
848
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  else:
850
+ st.info("Using standard processing for smaller dataset...")
851
+ combined_json = {"transactions": filtered_transactions}
852
+ with st.spinner("Generating financial report..."):
853
+ report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
854
+
855
+ if report_text:
856
+ st.success("Financial report generated successfully!")
857
+
858
+ # Display the report as markdown
859
+ st.markdown("### Financial Report Preview")
860
+ st.markdown(report_text)
861
+
862
+ # Create PDF from markdown
863
+ try:
864
+ st.info("Generating PDF from the report...")
865
+ pdf_buffer = create_pdf_report(report_text)
866
+ st.download_button(
867
+ label="Download Financial Report as PDF",
868
+ data=pdf_buffer.getvalue(),
869
+ file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
870
+ mime="application/pdf"
871
+ )
872
+ st.success("PDF download ready.")
873
+ except Exception as e:
874
+ st.error(f"Error generating PDF: {str(e)}")
875
+ st.exception(e)
876
+
877
  except Exception as e:
878
+ st.error(f"Error generating financial report: {str(e)}")
879
  if "504" in str(e):
880
+ st.info("Consider using a smaller date range or fewer transactions.")
881
+ st.exception(e)
 
 
882
 
883
  if __name__ == "__main__":
884
  main()