Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import re
|
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
-
from datetime import datetime
|
| 6 |
from io import BytesIO
|
| 7 |
|
| 8 |
import pandas as pd
|
|
@@ -32,40 +32,50 @@ def read_pdf(file_obj):
|
|
| 32 |
# Process PDF text with Gemini to extract transactions as JSON
|
| 33 |
def process_with_gemini(model, text):
|
| 34 |
prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
|
| 35 |
-
- Date (format DD/MM/YYYY)
|
| 36 |
-
- Description
|
| 37 |
-
- Amount (just the integer value)
|
| 38 |
-
- Type (is 'income' if 'credit amount', else 'expense')
|
| 39 |
-
- Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
|
| 40 |
-
- City (In address of bank statement)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
| 55 |
response = model.generate_content([prompt, text])
|
| 56 |
time.sleep(7) # Sleep for 7 seconds to work around rate limit
|
| 57 |
return response.text
|
| 58 |
|
| 59 |
-
# Generate financial report from aggregated JSON transactions and chosen
|
| 60 |
-
def generate_financial_report(model, json_data,
|
| 61 |
prompt = f"""Based on the following transactions JSON data:
|
| 62 |
{json.dumps(json_data)}
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
response = model.generate_content([prompt])
|
| 68 |
-
time.sleep(
|
| 69 |
return response.text
|
| 70 |
|
| 71 |
# Create a PDF file from the report text
|
|
@@ -87,84 +97,96 @@ def main():
|
|
| 87 |
st.title("Quantitlytix AI")
|
| 88 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
|
| 92 |
|
| 93 |
-
|
| 94 |
-
if uploaded_files:
|
| 95 |
-
st.write(f"{len(uploaded_files)} file(s) uploaded.")
|
| 96 |
|
| 97 |
-
if
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
st.warning(f"No text found in {uploaded_file.name}.")
|
| 108 |
-
continue
|
| 109 |
-
|
| 110 |
-
with st.spinner(f"Processing {uploaded_file.name}..."):
|
| 111 |
-
json_response = process_with_gemini(model, pdf_text)
|
| 112 |
-
# Extract valid JSON from the response
|
| 113 |
-
start_idx = json_response.find('{')
|
| 114 |
-
end_idx = json_response.rfind('}') + 1
|
| 115 |
-
if start_idx == -1 or end_idx == -1:
|
| 116 |
-
st.warning(f"Invalid JSON response for {uploaded_file.name}.")
|
| 117 |
continue
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
st.
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
with st.spinner("Generating financial report..."):
|
| 150 |
-
report_text = generate_financial_report(model, combined_json,
|
| 151 |
st.success("Financial report generated!")
|
| 152 |
st.text_area("Financial Report", report_text, height=300)
|
| 153 |
|
| 154 |
-
# Create PDF from the report text
|
| 155 |
pdf_buffer = create_pdf_report(report_text)
|
| 156 |
-
|
| 157 |
-
# Provide a download button for the PDF report
|
| 158 |
st.download_button(
|
| 159 |
label="Download Financial Report as PDF",
|
| 160 |
data=pdf_buffer,
|
| 161 |
file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
|
| 162 |
mime="application/pdf"
|
| 163 |
)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
st.error(f"Error processing documents: {str(e)}")
|
| 167 |
-
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
| 168 |
|
| 169 |
if __name__ == "__main__":
|
| 170 |
main()
|
|
|
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
+
from datetime import datetime, date
|
| 6 |
from io import BytesIO
|
| 7 |
|
| 8 |
import pandas as pd
|
|
|
|
| 32 |
# Process PDF text with Gemini to extract transactions as JSON
|
| 33 |
def process_with_gemini(model, text):
|
| 34 |
prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
|
| 35 |
+
- Date (format DD/MM/YYYY)
|
| 36 |
+
- Description
|
| 37 |
+
- Amount (just the integer value)
|
| 38 |
+
- Type (is 'income' if 'credit amount', else 'expense')
|
| 39 |
+
- Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
|
| 40 |
+
- City (In address of bank statement)
|
| 41 |
+
- Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
|
| 42 |
+
- ignore opening or closing balances.
|
| 43 |
+
|
| 44 |
+
Return ONLY valid JSON with this structure:
|
| 45 |
+
{
|
| 46 |
+
"transactions": [
|
| 47 |
+
{
|
| 48 |
+
"Date": "string",
|
| 49 |
+
"Description": "string",
|
| 50 |
+
"Customer_name": "string",
|
| 51 |
+
"City": "string",
|
| 52 |
+
"Amount": number,
|
| 53 |
+
"Type": "string",
|
| 54 |
+
"Category_of_expense": "string"
|
| 55 |
+
}
|
| 56 |
+
]
|
| 57 |
+
}"""
|
| 58 |
response = model.generate_content([prompt, text])
|
| 59 |
time.sleep(7) # Sleep for 7 seconds to work around rate limit
|
| 60 |
return response.text
|
| 61 |
|
| 62 |
+
# Generate financial report from aggregated JSON transactions and chosen parameters
|
| 63 |
+
def generate_financial_report(model, json_data, start_date, end_date, statement_type):
|
| 64 |
prompt = f"""Based on the following transactions JSON data:
|
| 65 |
{json.dumps(json_data)}
|
| 66 |
+
For transactions between {start_date.strftime('%d/%m/%Y')} and {end_date.strftime('%d/%m/%Y')}, generate a detailed {statement_type} report presented in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
|
| 67 |
|
| 68 |
+
Specific Formatting and Content Requirements:
|
| 69 |
+
|
| 70 |
+
Standard Accounting Structure (Zimbabwe Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
|
| 71 |
+
Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
|
| 72 |
+
Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "ZWL" for Zimbabwe Dollar if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
|
| 73 |
+
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 74 |
+
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
| 75 |
+
Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
|
| 76 |
+
Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}."""
|
| 77 |
response = model.generate_content([prompt])
|
| 78 |
+
time.sleep(5) # Sleep for 7 seconds to work around rate limit
|
| 79 |
return response.text
|
| 80 |
|
| 81 |
# Create a PDF file from the report text
|
|
|
|
| 97 |
st.title("Quantitlytix AI")
|
| 98 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
| 99 |
|
| 100 |
+
# Sidebar: Select input type: Bulk PDF or CSV Upload
|
| 101 |
+
input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
|
| 102 |
|
| 103 |
+
all_transactions = []
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
if input_type == "Bulk Bank Statement Upload":
|
| 106 |
+
uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
|
| 107 |
+
if uploaded_files:
|
| 108 |
+
st.write(f"{len(uploaded_files)} PDF file(s) uploaded.")
|
| 109 |
+
try:
|
| 110 |
+
model = configure_gemini(api_key)
|
| 111 |
+
for uploaded_file in uploaded_files:
|
| 112 |
+
pdf_text = read_pdf(uploaded_file)
|
| 113 |
+
if not pdf_text:
|
| 114 |
+
st.warning(f"No text found in {uploaded_file.name}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
continue
|
| 116 |
+
with st.spinner(f"Processing {uploaded_file.name}..."):
|
| 117 |
+
json_response = process_with_gemini(model, pdf_text)
|
| 118 |
+
start_idx = json_response.find('{')
|
| 119 |
+
end_idx = json_response.rfind('}') + 1
|
| 120 |
+
if start_idx == -1 or end_idx == -1:
|
| 121 |
+
st.warning(f"Invalid JSON response for {uploaded_file.name}.")
|
| 122 |
+
continue
|
| 123 |
+
json_str = json_response[start_idx:end_idx]
|
| 124 |
+
json_str = json_str.replace('```json', '').replace('```', '')
|
| 125 |
+
data = json.loads(json_str)
|
| 126 |
+
transactions = data.get('transactions', [])
|
| 127 |
+
all_transactions.extend(transactions)
|
| 128 |
+
except Exception as e:
|
| 129 |
+
st.error(f"Error processing PDF documents: {str(e)}")
|
| 130 |
+
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
| 131 |
+
elif input_type == "CSV Upload":
|
| 132 |
+
uploaded_csv = st.file_uploader("Upload CSV of transactions", type="csv")
|
| 133 |
+
if uploaded_csv:
|
| 134 |
+
try:
|
| 135 |
+
df = pd.read_csv(uploaded_csv)
|
| 136 |
+
st.write("CSV Data Preview:")
|
| 137 |
+
st.dataframe(df.head())
|
| 138 |
+
# Convert dataframe to list of transaction dictionaries
|
| 139 |
+
transactions = df.to_dict(orient='records')
|
| 140 |
+
all_transactions.extend(transactions)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
st.error(f"Error processing CSV file: {str(e)}")
|
| 143 |
+
|
| 144 |
+
# If transactions are loaded, show DataFrame
|
| 145 |
+
if all_transactions:
|
| 146 |
+
df = pd.DataFrame(all_transactions)
|
| 147 |
+
try:
|
| 148 |
+
df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
|
| 149 |
+
df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
|
| 150 |
+
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
|
| 151 |
+
except Exception as e:
|
| 152 |
+
st.warning("Some data could not be formatted correctly.")
|
| 153 |
+
st.success("Transactions loaded successfully!")
|
| 154 |
+
st.write("### Extracted Transactions")
|
| 155 |
+
st.dataframe(df)
|
| 156 |
+
else:
|
| 157 |
+
st.info("No transactions loaded yet.")
|
| 158 |
+
|
| 159 |
+
# Financial report generation parameters
|
| 160 |
+
st.write("### Generate Financial Report")
|
| 161 |
+
col1, col2 = st.columns(2)
|
| 162 |
+
with col1:
|
| 163 |
+
start_date = st.date_input("Start Date", date(2024, 1, 1))
|
| 164 |
+
with col2:
|
| 165 |
+
end_date = st.date_input("End Date", date(2024, 12, 31))
|
| 166 |
+
|
| 167 |
+
statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
|
| 168 |
+
|
| 169 |
+
if st.button("Generate Financial Report"):
|
| 170 |
+
if not all_transactions:
|
| 171 |
+
st.error("No transactions available to generate report.")
|
| 172 |
+
else:
|
| 173 |
+
try:
|
| 174 |
+
model = configure_gemini(api_key)
|
| 175 |
+
combined_json = {"transactions": all_transactions}
|
| 176 |
with st.spinner("Generating financial report..."):
|
| 177 |
+
report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
|
| 178 |
st.success("Financial report generated!")
|
| 179 |
st.text_area("Financial Report", report_text, height=300)
|
| 180 |
|
|
|
|
| 181 |
pdf_buffer = create_pdf_report(report_text)
|
|
|
|
|
|
|
| 182 |
st.download_button(
|
| 183 |
label="Download Financial Report as PDF",
|
| 184 |
data=pdf_buffer,
|
| 185 |
file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
|
| 186 |
mime="application/pdf"
|
| 187 |
)
|
| 188 |
+
except Exception as e:
|
| 189 |
+
st.error(f"Error generating financial report: {str(e)}")
|
|
|
|
|
|
|
| 190 |
|
| 191 |
if __name__ == "__main__":
|
| 192 |
main()
|