Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import re
|
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
-
from datetime import datetime, date
|
| 6 |
from io import BytesIO
|
| 7 |
|
| 8 |
import pandas as pd
|
|
@@ -10,9 +10,10 @@ import streamlit as st
|
|
| 10 |
import google.generativeai as genai
|
| 11 |
import pypdf
|
| 12 |
from fpdf import FPDF
|
|
|
|
| 13 |
|
| 14 |
# Configure API key for Gemini
|
| 15 |
-
api_key = os.environ
|
| 16 |
|
| 17 |
def configure_gemini(api_key):
|
| 18 |
genai.configure(api_key=api_key)
|
|
@@ -40,7 +41,7 @@ def process_with_gemini(model, text):
|
|
| 40 |
- City (In address of bank statement)
|
| 41 |
- Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
|
| 42 |
- ignore opening or closing balances.
|
| 43 |
-
|
| 44 |
Return ONLY valid JSON with this structure:
|
| 45 |
{
|
| 46 |
"transactions": [
|
|
@@ -55,39 +56,54 @@ def process_with_gemini(model, text):
|
|
| 55 |
}
|
| 56 |
]
|
| 57 |
}"""
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
# Generate financial report from aggregated JSON transactions and chosen parameters
|
| 63 |
def generate_financial_report(model, json_data, start_date, end_date, statement_type):
|
| 64 |
prompt = f"""Based on the following transactions JSON data:
|
| 65 |
{json.dumps(json_data)}
|
| 66 |
-
|
| 67 |
|
| 68 |
Specific Formatting and Content Requirements:
|
| 69 |
|
| 70 |
-
Standard Accounting Structure (
|
| 71 |
Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
|
| 72 |
Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
|
| 73 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 74 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
| 75 |
Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
|
| 76 |
Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Create a PDF file from the report text
|
| 82 |
def create_pdf_report(report_text):
|
| 83 |
pdf = FPDF()
|
| 84 |
pdf.add_page()
|
| 85 |
pdf.set_font("Arial", size=12)
|
| 86 |
-
|
| 87 |
# Split report text into lines and add them to the PDF
|
| 88 |
for line in report_text.split('\n'):
|
| 89 |
pdf.multi_cell(0, 10, line)
|
| 90 |
-
|
| 91 |
pdf_buffer = BytesIO()
|
| 92 |
pdf.output(pdf_buffer)
|
| 93 |
pdf_buffer.seek(0)
|
|
@@ -96,12 +112,16 @@ def create_pdf_report(report_text):
|
|
| 96 |
def main():
|
| 97 |
st.title("Quantitlytix AI")
|
| 98 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
# Sidebar: Select input type: Bulk PDF or CSV Upload
|
| 101 |
input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
|
| 102 |
-
|
| 103 |
all_transactions = []
|
| 104 |
-
|
| 105 |
if input_type == "Bulk Bank Statement Upload":
|
| 106 |
uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
|
| 107 |
if uploaded_files:
|
|
@@ -115,16 +135,20 @@ def main():
|
|
| 115 |
continue
|
| 116 |
with st.spinner(f"Processing {uploaded_file.name}..."):
|
| 117 |
json_response = process_with_gemini(model, pdf_text)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
st.error(f"Error processing PDF documents: {str(e)}")
|
| 130 |
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
|
@@ -140,7 +164,7 @@ def main():
|
|
| 140 |
all_transactions.extend(transactions)
|
| 141 |
except Exception as e:
|
| 142 |
st.error(f"Error processing CSV file: {str(e)}")
|
| 143 |
-
|
| 144 |
# If transactions are loaded, show DataFrame
|
| 145 |
if all_transactions:
|
| 146 |
df = pd.DataFrame(all_transactions)
|
|
@@ -155,7 +179,7 @@ def main():
|
|
| 155 |
st.dataframe(df)
|
| 156 |
else:
|
| 157 |
st.info("No transactions loaded yet.")
|
| 158 |
-
|
| 159 |
# Financial report generation parameters
|
| 160 |
st.write("### Generate Financial Report")
|
| 161 |
col1, col2 = st.columns(2)
|
|
@@ -163,30 +187,54 @@ def main():
|
|
| 163 |
start_date = st.date_input("Start Date", date(2024, 1, 1))
|
| 164 |
with col2:
|
| 165 |
end_date = st.date_input("End Date", date(2024, 12, 31))
|
| 166 |
-
|
| 167 |
statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
|
| 168 |
-
|
| 169 |
if st.button("Generate Financial Report"):
|
| 170 |
if not all_transactions:
|
| 171 |
st.error("No transactions available to generate report.")
|
| 172 |
else:
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
if __name__ == "__main__":
|
| 192 |
main()
|
|
|
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
+
from datetime import datetime, date, timedelta
|
| 6 |
from io import BytesIO
|
| 7 |
|
| 8 |
import pandas as pd
|
|
|
|
| 10 |
import google.generativeai as genai
|
| 11 |
import pypdf
|
| 12 |
from fpdf import FPDF
|
| 13 |
+
from google.api_core import exceptions
|
| 14 |
|
| 15 |
# Configure API key for Gemini
|
| 16 |
+
api_key = os.environ.get('Gemini')
|
| 17 |
|
| 18 |
def configure_gemini(api_key):
|
| 19 |
genai.configure(api_key=api_key)
|
|
|
|
| 41 |
- City (In address of bank statement)
|
| 42 |
- Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
|
| 43 |
- ignore opening or closing balances.
|
| 44 |
+
|
| 45 |
Return ONLY valid JSON with this structure:
|
| 46 |
{
|
| 47 |
"transactions": [
|
|
|
|
| 56 |
}
|
| 57 |
]
|
| 58 |
}"""
|
| 59 |
+
try:
|
| 60 |
+
response = model.generate_content([prompt, text])
|
| 61 |
+
time.sleep(5) # Sleep for 5 seconds to work around rate limit
|
| 62 |
+
return response.text
|
| 63 |
+
except exceptions.ServiceUnavailable as e:
|
| 64 |
+
if e.response.status_code == 504:
|
| 65 |
+
st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
|
| 66 |
+
return None
|
| 67 |
+
else:
|
| 68 |
+
raise
|
| 69 |
|
| 70 |
# Generate financial report from aggregated JSON transactions and chosen parameters
|
| 71 |
def generate_financial_report(model, json_data, start_date, end_date, statement_type):
|
| 72 |
prompt = f"""Based on the following transactions JSON data:
|
| 73 |
{json.dumps(json_data)}
|
| 74 |
+
Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
|
| 75 |
|
| 76 |
Specific Formatting and Content Requirements:
|
| 77 |
|
| 78 |
+
Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
|
| 79 |
Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
|
| 80 |
Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
|
| 81 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 82 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
| 83 |
Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
|
| 84 |
Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
|
| 85 |
+
try:
|
| 86 |
+
response = model.generate_content([prompt])
|
| 87 |
+
time.sleep(7) # Sleep for 7 seconds to work around rate limit
|
| 88 |
+
return response.text
|
| 89 |
+
except exceptions.ServiceUnavailable as e:
|
| 90 |
+
if e.response.status_code == 504:
|
| 91 |
+
st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
|
| 92 |
+
st.session_state['last_error'] = "504" # Store the error in session state
|
| 93 |
+
return None
|
| 94 |
+
else:
|
| 95 |
+
raise
|
| 96 |
|
| 97 |
# Create a PDF file from the report text
|
| 98 |
def create_pdf_report(report_text):
|
| 99 |
pdf = FPDF()
|
| 100 |
pdf.add_page()
|
| 101 |
pdf.set_font("Arial", size=12)
|
| 102 |
+
|
| 103 |
# Split report text into lines and add them to the PDF
|
| 104 |
for line in report_text.split('\n'):
|
| 105 |
pdf.multi_cell(0, 10, line)
|
| 106 |
+
|
| 107 |
pdf_buffer = BytesIO()
|
| 108 |
pdf.output(pdf_buffer)
|
| 109 |
pdf_buffer.seek(0)
|
|
|
|
| 112 |
def main():
|
| 113 |
st.title("Quantitlytix AI")
|
| 114 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
| 115 |
+
|
| 116 |
+
# Initialize session state for last error
|
| 117 |
+
if 'last_error' not in st.session_state:
|
| 118 |
+
st.session_state['last_error'] = None
|
| 119 |
+
|
| 120 |
# Sidebar: Select input type: Bulk PDF or CSV Upload
|
| 121 |
input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
|
| 122 |
+
|
| 123 |
all_transactions = []
|
| 124 |
+
|
| 125 |
if input_type == "Bulk Bank Statement Upload":
|
| 126 |
uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
|
| 127 |
if uploaded_files:
|
|
|
|
| 135 |
continue
|
| 136 |
with st.spinner(f"Processing {uploaded_file.name}..."):
|
| 137 |
json_response = process_with_gemini(model, pdf_text)
|
| 138 |
+
if json_response:
|
| 139 |
+
start_idx = json_response.find('{')
|
| 140 |
+
end_idx = json_response.rfind('}') + 1
|
| 141 |
+
if start_idx == -1 or end_idx == -1:
|
| 142 |
+
st.warning(f"Invalid JSON response for {uploaded_file.name}.")
|
| 143 |
+
continue
|
| 144 |
+
json_str = json_response[start_idx:end_idx]
|
| 145 |
+
json_str = json_str.replace('```json', '').replace('```', '')
|
| 146 |
+
try:
|
| 147 |
+
data = json.loads(json_str)
|
| 148 |
+
transactions = data.get('transactions', [])
|
| 149 |
+
all_transactions.extend(transactions)
|
| 150 |
+
except json.JSONDecodeError as e:
|
| 151 |
+
st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
|
| 152 |
except Exception as e:
|
| 153 |
st.error(f"Error processing PDF documents: {str(e)}")
|
| 154 |
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
|
|
|
| 164 |
all_transactions.extend(transactions)
|
| 165 |
except Exception as e:
|
| 166 |
st.error(f"Error processing CSV file: {str(e)}")
|
| 167 |
+
|
| 168 |
# If transactions are loaded, show DataFrame
|
| 169 |
if all_transactions:
|
| 170 |
df = pd.DataFrame(all_transactions)
|
|
|
|
| 179 |
st.dataframe(df)
|
| 180 |
else:
|
| 181 |
st.info("No transactions loaded yet.")
|
| 182 |
+
|
| 183 |
# Financial report generation parameters
|
| 184 |
st.write("### Generate Financial Report")
|
| 185 |
col1, col2 = st.columns(2)
|
|
|
|
| 187 |
start_date = st.date_input("Start Date", date(2024, 1, 1))
|
| 188 |
with col2:
|
| 189 |
end_date = st.date_input("End Date", date(2024, 12, 31))
|
| 190 |
+
|
| 191 |
statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
|
| 192 |
+
|
| 193 |
if st.button("Generate Financial Report"):
|
| 194 |
if not all_transactions:
|
| 195 |
st.error("No transactions available to generate report.")
|
| 196 |
else:
|
| 197 |
+
# Filter transactions by date
|
| 198 |
+
filtered_transactions = []
|
| 199 |
+
for transaction in all_transactions:
|
| 200 |
+
try:
|
| 201 |
+
transaction_date = datetime.strptime(transaction.get('Date'), '%d/%m/%Y').date()
|
| 202 |
+
if start_date <= transaction_date <= end_date:
|
| 203 |
+
filtered_transactions.append(transaction)
|
| 204 |
+
except (ValueError, TypeError):
|
| 205 |
+
st.warning(f"Could not parse date for transaction: {transaction}")
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
if not filtered_transactions:
|
| 209 |
+
st.warning("No transactions found within the selected date range.")
|
| 210 |
+
else:
|
| 211 |
+
try:
|
| 212 |
+
model = configure_gemini(api_key)
|
| 213 |
+
combined_json = {"transactions": filtered_transactions}
|
| 214 |
+
with st.spinner("Generating financial report..."):
|
| 215 |
+
report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
|
| 216 |
+
if report_text:
|
| 217 |
+
st.success("Financial report generated!")
|
| 218 |
+
st.text_area("Financial Report", report_text, height=300)
|
| 219 |
+
|
| 220 |
+
pdf_buffer = create_pdf_report(report_text)
|
| 221 |
+
st.download_button(
|
| 222 |
+
label="Download Financial Report as PDF",
|
| 223 |
+
data=pdf_buffer,
|
| 224 |
+
file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
|
| 225 |
+
mime="application/pdf"
|
| 226 |
+
)
|
| 227 |
+
except exceptions.ServiceUnavailable as e:
|
| 228 |
+
if e.response.status_code == 504:
|
| 229 |
+
st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
|
| 230 |
+
else:
|
| 231 |
+
st.error(f"Error generating financial report: {str(e)}")
|
| 232 |
+
except Exception as e:
|
| 233 |
+
st.error(f"Error generating financial report: {str(e)}")
|
| 234 |
+
if "504" in str(e):
|
| 235 |
+
st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
|
| 236 |
+
elif len(filtered_transactions) > 500: # Example threshold, adjust as needed
|
| 237 |
+
st.info("For very large datasets, consider generating reports for smaller time periods (e.g., monthly) and combining them manually if a single comprehensive report fails.")
|
| 238 |
|
| 239 |
if __name__ == "__main__":
|
| 240 |
main()
|