rairo commited on
Commit
63d3cad
·
verified ·
1 Parent(s): 0f9dd99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -47
app.py CHANGED
@@ -2,7 +2,7 @@ import re
2
  import json
3
  import os
4
  import time
5
- from datetime import datetime, date
6
  from io import BytesIO
7
 
8
  import pandas as pd
@@ -10,9 +10,10 @@ import streamlit as st
10
  import google.generativeai as genai
11
  import pypdf
12
  from fpdf import FPDF
 
13
 
14
  # Configure API key for Gemini
15
- api_key = os.environ['Gemini']
16
 
17
  def configure_gemini(api_key):
18
  genai.configure(api_key=api_key)
@@ -40,7 +41,7 @@ def process_with_gemini(model, text):
40
  - City (In address of bank statement)
41
  - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
42
  - ignore opening or closing balances.
43
-
44
  Return ONLY valid JSON with this structure:
45
  {
46
  "transactions": [
@@ -55,39 +56,54 @@ def process_with_gemini(model, text):
55
  }
56
  ]
57
  }"""
58
- response = model.generate_content([prompt, text])
59
- time.sleep(5) # Sleep for 5 seconds to work around rate limit
60
- return response.text
 
 
 
 
 
 
 
61
 
62
  # Generate financial report from aggregated JSON transactions and chosen parameters
63
  def generate_financial_report(model, json_data, start_date, end_date, statement_type):
64
  prompt = f"""Based on the following transactions JSON data:
65
  {json.dumps(json_data)}
66
- For transactions between {start_date.strftime('%d/%m/%Y')} and {end_date.strftime('%d/%m/%Y')}, generate a detailed {statement_type} report presented in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
67
 
68
  Specific Formatting and Content Requirements:
69
 
70
- Standard Accounting Structure (Zimbabwe Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
71
  Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
72
  Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
73
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
74
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
75
  Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
76
  Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
77
- response = model.generate_content([prompt])
78
- time.sleep(5) # Sleep for 7 seconds to work around rate limit
79
- return response.text
 
 
 
 
 
 
 
 
80
 
81
  # Create a PDF file from the report text
82
  def create_pdf_report(report_text):
83
  pdf = FPDF()
84
  pdf.add_page()
85
  pdf.set_font("Arial", size=12)
86
-
87
  # Split report text into lines and add them to the PDF
88
  for line in report_text.split('\n'):
89
  pdf.multi_cell(0, 10, line)
90
-
91
  pdf_buffer = BytesIO()
92
  pdf.output(pdf_buffer)
93
  pdf_buffer.seek(0)
@@ -96,12 +112,16 @@ def create_pdf_report(report_text):
96
  def main():
97
  st.title("Quantitlytix AI")
98
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
99
-
 
 
 
 
100
  # Sidebar: Select input type: Bulk PDF or CSV Upload
101
  input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
102
-
103
  all_transactions = []
104
-
105
  if input_type == "Bulk Bank Statement Upload":
106
  uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
107
  if uploaded_files:
@@ -115,16 +135,20 @@ def main():
115
  continue
116
  with st.spinner(f"Processing {uploaded_file.name}..."):
117
  json_response = process_with_gemini(model, pdf_text)
118
- start_idx = json_response.find('{')
119
- end_idx = json_response.rfind('}') + 1
120
- if start_idx == -1 or end_idx == -1:
121
- st.warning(f"Invalid JSON response for {uploaded_file.name}.")
122
- continue
123
- json_str = json_response[start_idx:end_idx]
124
- json_str = json_str.replace('```json', '').replace('```', '')
125
- data = json.loads(json_str)
126
- transactions = data.get('transactions', [])
127
- all_transactions.extend(transactions)
 
 
 
 
128
  except Exception as e:
129
  st.error(f"Error processing PDF documents: {str(e)}")
130
  st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
@@ -140,7 +164,7 @@ def main():
140
  all_transactions.extend(transactions)
141
  except Exception as e:
142
  st.error(f"Error processing CSV file: {str(e)}")
143
-
144
  # If transactions are loaded, show DataFrame
145
  if all_transactions:
146
  df = pd.DataFrame(all_transactions)
@@ -155,7 +179,7 @@ def main():
155
  st.dataframe(df)
156
  else:
157
  st.info("No transactions loaded yet.")
158
-
159
  # Financial report generation parameters
160
  st.write("### Generate Financial Report")
161
  col1, col2 = st.columns(2)
@@ -163,30 +187,54 @@ def main():
163
  start_date = st.date_input("Start Date", date(2024, 1, 1))
164
  with col2:
165
  end_date = st.date_input("End Date", date(2024, 12, 31))
166
-
167
  statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
168
-
169
  if st.button("Generate Financial Report"):
170
  if not all_transactions:
171
  st.error("No transactions available to generate report.")
172
  else:
173
- try:
174
- model = configure_gemini(api_key)
175
- combined_json = {"transactions": all_transactions}
176
- with st.spinner("Generating financial report..."):
177
- report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
178
- st.success("Financial report generated!")
179
- st.text_area("Financial Report", report_text, height=300)
180
-
181
- pdf_buffer = create_pdf_report(report_text)
182
- st.download_button(
183
- label="Download Financial Report as PDF",
184
- data=pdf_buffer,
185
- file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
186
- mime="application/pdf"
187
- )
188
- except Exception as e:
189
- st.error(f"Error generating financial report: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  if __name__ == "__main__":
192
  main()
 
2
  import json
3
  import os
4
  import time
5
+ from datetime import datetime, date, timedelta
6
  from io import BytesIO
7
 
8
  import pandas as pd
 
10
  import google.generativeai as genai
11
  import pypdf
12
  from fpdf import FPDF
13
+ from google.api_core import exceptions
14
 
15
  # Configure API key for Gemini
16
+ api_key = os.environ.get('Gemini')
17
 
18
  def configure_gemini(api_key):
19
  genai.configure(api_key=api_key)
 
41
  - City (In address of bank statement)
42
  - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
43
  - ignore opening or closing balances.
44
+
45
  Return ONLY valid JSON with this structure:
46
  {
47
  "transactions": [
 
56
  }
57
  ]
58
  }"""
59
+ try:
60
+ response = model.generate_content([prompt, text])
61
+ time.sleep(5) # Sleep for 5 seconds to work around rate limit
62
+ return response.text
63
+ except exceptions.ServiceUnavailable as e:
64
+ if e.response.status_code == 504:
65
+ st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
66
+ return None
67
+ else:
68
+ raise
69
 
70
  # Generate financial report from aggregated JSON transactions and chosen parameters
71
  def generate_financial_report(model, json_data, start_date, end_date, statement_type):
72
  prompt = f"""Based on the following transactions JSON data:
73
  {json.dumps(json_data)}
74
+ Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
75
 
76
  Specific Formatting and Content Requirements:
77
 
78
+ Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
79
  Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
80
  Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
81
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
82
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
83
  Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
84
  Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
85
+ try:
86
+ response = model.generate_content([prompt])
87
+ time.sleep(7) # Sleep for 7 seconds to work around rate limit
88
+ return response.text
89
+ except exceptions.ServiceUnavailable as e:
90
+ if e.response.status_code == 504:
91
+ st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
92
+ st.session_state['last_error'] = "504" # Store the error in session state
93
+ return None
94
+ else:
95
+ raise
96
 
97
  # Create a PDF file from the report text
98
  def create_pdf_report(report_text):
99
  pdf = FPDF()
100
  pdf.add_page()
101
  pdf.set_font("Arial", size=12)
102
+
103
  # Split report text into lines and add them to the PDF
104
  for line in report_text.split('\n'):
105
  pdf.multi_cell(0, 10, line)
106
+
107
  pdf_buffer = BytesIO()
108
  pdf.output(pdf_buffer)
109
  pdf_buffer.seek(0)
 
112
  def main():
113
  st.title("Quantitlytix AI")
114
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
115
+
116
+ # Initialize session state for last error
117
+ if 'last_error' not in st.session_state:
118
+ st.session_state['last_error'] = None
119
+
120
  # Sidebar: Select input type: Bulk PDF or CSV Upload
121
  input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
122
+
123
  all_transactions = []
124
+
125
  if input_type == "Bulk Bank Statement Upload":
126
  uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
127
  if uploaded_files:
 
135
  continue
136
  with st.spinner(f"Processing {uploaded_file.name}..."):
137
  json_response = process_with_gemini(model, pdf_text)
138
+ if json_response:
139
+ start_idx = json_response.find('{')
140
+ end_idx = json_response.rfind('}') + 1
141
+ if start_idx == -1 or end_idx == -1:
142
+ st.warning(f"Invalid JSON response for {uploaded_file.name}.")
143
+ continue
144
+ json_str = json_response[start_idx:end_idx]
145
+ json_str = json_str.replace('```json', '').replace('```', '')
146
+ try:
147
+ data = json.loads(json_str)
148
+ transactions = data.get('transactions', [])
149
+ all_transactions.extend(transactions)
150
+ except json.JSONDecodeError as e:
151
+ st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
152
  except Exception as e:
153
  st.error(f"Error processing PDF documents: {str(e)}")
154
  st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
 
164
  all_transactions.extend(transactions)
165
  except Exception as e:
166
  st.error(f"Error processing CSV file: {str(e)}")
167
+
168
  # If transactions are loaded, show DataFrame
169
  if all_transactions:
170
  df = pd.DataFrame(all_transactions)
 
179
  st.dataframe(df)
180
  else:
181
  st.info("No transactions loaded yet.")
182
+
183
  # Financial report generation parameters
184
  st.write("### Generate Financial Report")
185
  col1, col2 = st.columns(2)
 
187
  start_date = st.date_input("Start Date", date(2024, 1, 1))
188
  with col2:
189
  end_date = st.date_input("End Date", date(2024, 12, 31))
190
+
191
  statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
192
+
193
  if st.button("Generate Financial Report"):
194
  if not all_transactions:
195
  st.error("No transactions available to generate report.")
196
  else:
197
+ # Filter transactions by date
198
+ filtered_transactions = []
199
+ for transaction in all_transactions:
200
+ try:
201
+ transaction_date = datetime.strptime(transaction.get('Date'), '%d/%m/%Y').date()
202
+ if start_date <= transaction_date <= end_date:
203
+ filtered_transactions.append(transaction)
204
+ except (ValueError, TypeError):
205
+ st.warning(f"Could not parse date for transaction: {transaction}")
206
+ continue
207
+
208
+ if not filtered_transactions:
209
+ st.warning("No transactions found within the selected date range.")
210
+ else:
211
+ try:
212
+ model = configure_gemini(api_key)
213
+ combined_json = {"transactions": filtered_transactions}
214
+ with st.spinner("Generating financial report..."):
215
+ report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
216
+ if report_text:
217
+ st.success("Financial report generated!")
218
+ st.text_area("Financial Report", report_text, height=300)
219
+
220
+ pdf_buffer = create_pdf_report(report_text)
221
+ st.download_button(
222
+ label="Download Financial Report as PDF",
223
+ data=pdf_buffer,
224
+ file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
225
+ mime="application/pdf"
226
+ )
227
+ except exceptions.ServiceUnavailable as e:
228
+ if e.response.status_code == 504:
229
+ st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
230
+ else:
231
+ st.error(f"Error generating financial report: {str(e)}")
232
+ except Exception as e:
233
+ st.error(f"Error generating financial report: {str(e)}")
234
+ if "504" in str(e):
235
+ st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
236
+ elif len(filtered_transactions) > 500: # Example threshold, adjust as needed
237
+ st.info("For very large datasets, consider generating reports for smaller time periods (e.g., monthly) and combining them manually if a single comprehensive report fails.")
238
 
239
  if __name__ == "__main__":
240
  main()