rairo commited on
Commit
4a6a531
·
verified ·
1 Parent(s): 588e3c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -91
app.py CHANGED
@@ -2,7 +2,7 @@ import re
2
  import json
3
  import os
4
  import time
5
- from datetime import datetime
6
  from io import BytesIO
7
 
8
  import pandas as pd
@@ -32,40 +32,50 @@ def read_pdf(file_obj):
32
  # Process PDF text with Gemini to extract transactions as JSON
33
  def process_with_gemini(model, text):
34
  prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
35
- - Date (format DD/MM/YYYY)
36
- - Description
37
- - Amount (just the integer value)
38
- - Type (is 'income' if 'credit amount', else 'expense')
39
- - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
40
- - City (In address of bank statement)
41
-
42
- Return ONLY valid JSON with this structure:
43
- {
44
- "transactions": [
45
- {
46
- "Date": "string",
47
- "Description": "string",
48
- "Customer_name": "string",
49
- "City": "string",
50
- "Amount": number,
51
- "Type": "string"
52
- }
53
- ]
54
- }"""
 
 
 
55
  response = model.generate_content([prompt, text])
56
  time.sleep(7) # Sleep for 7 seconds to work around rate limit
57
  return response.text
58
 
59
- # Generate financial report from aggregated JSON transactions and chosen sections
60
- def generate_financial_report(model, json_data, report_types):
61
  prompt = f"""Based on the following transactions JSON data:
62
  {json.dumps(json_data)}
 
63
 
64
- Generate a detailed financial report that includes the following sections: {', '.join(report_types)}.
65
- Ensure that each section is clearly formatted with headings and includes insights and summaries.
66
- Return the complete report as plain text."""
 
 
 
 
 
 
67
  response = model.generate_content([prompt])
68
- time.sleep(7) # Sleep for 7 seconds to work around rate limit
69
  return response.text
70
 
71
  # Create a PDF file from the report text
@@ -87,84 +97,96 @@ def main():
87
  st.title("Quantitlytix AI")
88
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
89
 
90
- # Allow multiple PDF uploads
91
- uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
92
 
93
- # Debug: Show number of files uploaded
94
- if uploaded_files:
95
- st.write(f"{len(uploaded_files)} file(s) uploaded.")
96
 
97
- if uploaded_files:
98
- try:
99
- # Initialize the Gemini model
100
- model = configure_gemini(api_key)
101
-
102
- all_transactions = []
103
- for uploaded_file in uploaded_files:
104
- # Read PDF text directly from the uploaded file
105
- pdf_text = read_pdf(uploaded_file)
106
- if not pdf_text:
107
- st.warning(f"No text found in {uploaded_file.name}.")
108
- continue
109
-
110
- with st.spinner(f"Processing {uploaded_file.name}..."):
111
- json_response = process_with_gemini(model, pdf_text)
112
- # Extract valid JSON from the response
113
- start_idx = json_response.find('{')
114
- end_idx = json_response.rfind('}') + 1
115
- if start_idx == -1 or end_idx == -1:
116
- st.warning(f"Invalid JSON response for {uploaded_file.name}.")
117
  continue
118
- json_str = json_response[start_idx:end_idx]
119
- json_str = json_str.replace('```json', '').replace('```', '')
120
- data = json.loads(json_str)
121
- transactions = data.get('transactions', [])
122
- all_transactions.extend(transactions)
123
-
124
- # Combine transactions into one JSON object
125
- combined_json = {"transactions": all_transactions}
126
-
127
- # Display extracted transactions in a DataFrame if available
128
- if all_transactions:
129
- df = pd.DataFrame(all_transactions)
130
- # Convert amounts to numeric and format
131
- df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
132
- df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
133
- df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
134
- st.success("Extraction complete!")
135
- st.write("### Extracted Transactions")
136
- st.dataframe(df)
137
- else:
138
- st.warning("No transactions were extracted from the uploaded files.")
139
-
140
- # Allow user to select financial report sections
141
- st.write("### Generate Financial Report")
142
- report_options = st.multiselect(
143
- "Select financial report sections to include",
144
- ["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"],
145
- default=["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"]
146
- )
147
-
148
- if st.button("Generate Financial Report"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  with st.spinner("Generating financial report..."):
150
- report_text = generate_financial_report(model, combined_json, report_options)
151
  st.success("Financial report generated!")
152
  st.text_area("Financial Report", report_text, height=300)
153
 
154
- # Create PDF from the report text
155
  pdf_buffer = create_pdf_report(report_text)
156
-
157
- # Provide a download button for the PDF report
158
  st.download_button(
159
  label="Download Financial Report as PDF",
160
  data=pdf_buffer,
161
  file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
162
  mime="application/pdf"
163
  )
164
-
165
- except Exception as e:
166
- st.error(f"Error processing documents: {str(e)}")
167
- st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
168
 
169
  if __name__ == "__main__":
170
  main()
 
2
  import json
3
  import os
4
  import time
5
+ from datetime import datetime, date
6
  from io import BytesIO
7
 
8
  import pandas as pd
 
32
  # Process PDF text with Gemini to extract transactions as JSON
33
  def process_with_gemini(model, text):
34
  prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
35
+ - Date (format DD/MM/YYYY)
36
+ - Description
37
+ - Amount (just the integer value)
38
+ - Type (is 'income' if 'credit amount', else 'expense')
39
+ - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
40
+ - City (In address of bank statement)
41
+ - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
42
+ - ignore opening or closing balances.
43
+
44
+ Return ONLY valid JSON with this structure:
45
+ {
46
+ "transactions": [
47
+ {
48
+ "Date": "string",
49
+ "Description": "string",
50
+ "Customer_name": "string",
51
+ "City": "string",
52
+ "Amount": number,
53
+ "Type": "string",
54
+ "Category_of_expense": "string"
55
+ }
56
+ ]
57
+ }"""
58
  response = model.generate_content([prompt, text])
59
  time.sleep(7) # Sleep for 7 seconds to work around rate limit
60
  return response.text
61
 
62
+ # Generate financial report from aggregated JSON transactions and chosen parameters
63
+ def generate_financial_report(model, json_data, start_date, end_date, statement_type):
64
  prompt = f"""Based on the following transactions JSON data:
65
  {json.dumps(json_data)}
66
+ For transactions between {start_date.strftime('%d/%m/%Y')} and {end_date.strftime('%d/%m/%Y')}, generate a detailed {statement_type} report presented in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
67
 
68
+ Specific Formatting and Content Requirements:
69
+
70
+ Standard Accounting Structure (Zimbabwe Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
71
+ Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
72
+ Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
73
+ Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
74
+ Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
75
+ Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
76
+ Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
77
  response = model.generate_content([prompt])
78
+ time.sleep(5) # Sleep for 7 seconds to work around rate limit
79
  return response.text
80
 
81
  # Create a PDF file from the report text
 
97
  st.title("Quantitlytix AI")
98
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
99
 
100
+ # Sidebar: Select input type: Bulk PDF or CSV Upload
101
+ input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
102
 
103
+ all_transactions = []
 
 
104
 
105
+ if input_type == "Bulk Bank Statement Upload":
106
+ uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
107
+ if uploaded_files:
108
+ st.write(f"{len(uploaded_files)} PDF file(s) uploaded.")
109
+ try:
110
+ model = configure_gemini(api_key)
111
+ for uploaded_file in uploaded_files:
112
+ pdf_text = read_pdf(uploaded_file)
113
+ if not pdf_text:
114
+ st.warning(f"No text found in {uploaded_file.name}.")
 
 
 
 
 
 
 
 
 
 
115
  continue
116
+ with st.spinner(f"Processing {uploaded_file.name}..."):
117
+ json_response = process_with_gemini(model, pdf_text)
118
+ start_idx = json_response.find('{')
119
+ end_idx = json_response.rfind('}') + 1
120
+ if start_idx == -1 or end_idx == -1:
121
+ st.warning(f"Invalid JSON response for {uploaded_file.name}.")
122
+ continue
123
+ json_str = json_response[start_idx:end_idx]
124
+ json_str = json_str.replace('```json', '').replace('```', '')
125
+ data = json.loads(json_str)
126
+ transactions = data.get('transactions', [])
127
+ all_transactions.extend(transactions)
128
+ except Exception as e:
129
+ st.error(f"Error processing PDF documents: {str(e)}")
130
+ st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
131
+ elif input_type == "CSV Upload":
132
+ uploaded_csv = st.file_uploader("Upload CSV of transactions", type="csv")
133
+ if uploaded_csv:
134
+ try:
135
+ df = pd.read_csv(uploaded_csv)
136
+ st.write("CSV Data Preview:")
137
+ st.dataframe(df.head())
138
+ # Convert dataframe to list of transaction dictionaries
139
+ transactions = df.to_dict(orient='records')
140
+ all_transactions.extend(transactions)
141
+ except Exception as e:
142
+ st.error(f"Error processing CSV file: {str(e)}")
143
+
144
+ # If transactions are loaded, show DataFrame
145
+ if all_transactions:
146
+ df = pd.DataFrame(all_transactions)
147
+ try:
148
+ df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
149
+ df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
150
+ df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
151
+ except Exception as e:
152
+ st.warning("Some data could not be formatted correctly.")
153
+ st.success("Transactions loaded successfully!")
154
+ st.write("### Extracted Transactions")
155
+ st.dataframe(df)
156
+ else:
157
+ st.info("No transactions loaded yet.")
158
+
159
+ # Financial report generation parameters
160
+ st.write("### Generate Financial Report")
161
+ col1, col2 = st.columns(2)
162
+ with col1:
163
+ start_date = st.date_input("Start Date", date(2024, 1, 1))
164
+ with col2:
165
+ end_date = st.date_input("End Date", date(2024, 12, 31))
166
+
167
+ statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
168
+
169
+ if st.button("Generate Financial Report"):
170
+ if not all_transactions:
171
+ st.error("No transactions available to generate report.")
172
+ else:
173
+ try:
174
+ model = configure_gemini(api_key)
175
+ combined_json = {"transactions": all_transactions}
176
  with st.spinner("Generating financial report..."):
177
+ report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
178
  st.success("Financial report generated!")
179
  st.text_area("Financial Report", report_text, height=300)
180
 
 
181
  pdf_buffer = create_pdf_report(report_text)
 
 
182
  st.download_button(
183
  label="Download Financial Report as PDF",
184
  data=pdf_buffer,
185
  file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
186
  mime="application/pdf"
187
  )
188
+ except Exception as e:
189
+ st.error(f"Error generating financial report: {str(e)}")
 
 
190
 
191
  if __name__ == "__main__":
192
  main()