Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,14 +11,20 @@ import google.generativeai as genai
|
|
| 11 |
import pypdf
|
| 12 |
from fpdf import FPDF
|
| 13 |
from google.api_core import exceptions
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Configure API key for Gemini
|
| 16 |
api_key = os.environ.get('Gemini')
|
| 17 |
|
| 18 |
def configure_gemini(api_key):
|
| 19 |
genai.configure(api_key=api_key)
|
| 20 |
-
return genai.GenerativeModel('gemini-2.
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Read PDF content from a file-like object (from Streamlit uploader)
|
| 23 |
def read_pdf(file_obj):
|
| 24 |
file_obj.seek(0) # Ensure the file pointer is at the start
|
|
@@ -39,7 +45,7 @@ def process_with_gemini(model, text):
|
|
| 39 |
- Type (is 'income' if 'credit amount', else 'expense')
|
| 40 |
- Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
|
| 41 |
- City (In address of bank statement)
|
| 42 |
-
- Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to
|
| 43 |
- ignore opening or closing balances.
|
| 44 |
|
| 45 |
Return ONLY valid JSON with this structure:
|
|
@@ -58,7 +64,7 @@ def process_with_gemini(model, text):
|
|
| 58 |
}"""
|
| 59 |
try:
|
| 60 |
response = model.generate_content([prompt, text])
|
| 61 |
-
time.sleep(
|
| 62 |
return response.text
|
| 63 |
except exceptions.ServiceUnavailable as e:
|
| 64 |
if e.response.status_code == 504:
|
|
@@ -81,7 +87,10 @@ Consistent Formatting: Maintain consistent formatting for monetary values (e.g.,
|
|
| 81 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 82 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
| 83 |
Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
|
| 84 |
-
Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}.
|
|
|
|
|
|
|
|
|
|
| 85 |
try:
|
| 86 |
response = model.generate_content([prompt])
|
| 87 |
time.sleep(7) # Sleep for 7 seconds to work around rate limit
|
|
@@ -94,31 +103,57 @@ Concise Summary: Provide a concluding summary paragraph that encapsulates the ov
|
|
| 94 |
else:
|
| 95 |
raise
|
| 96 |
|
| 97 |
-
# Create a PDF file from the report text
|
| 98 |
def create_pdf_report(report_text):
|
| 99 |
pdf = FPDF()
|
| 100 |
pdf.add_page()
|
| 101 |
-
pdf.set_font("Arial",
|
| 102 |
-
|
| 103 |
-
#
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
# Create BytesIO object
|
| 108 |
pdf_buffer = BytesIO()
|
| 109 |
|
| 110 |
-
#
|
| 111 |
-
# Fix: Use the dest parameter to write to bytes
|
| 112 |
-
pdf.output(dest='S').encode('latin-1') # Get PDF as string and encode to bytes
|
| 113 |
-
|
| 114 |
-
# Write the encoded bytes to our BytesIO buffer
|
| 115 |
pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
|
| 116 |
pdf_buffer.seek(0)
|
| 117 |
|
| 118 |
return pdf_buffer
|
| 119 |
|
| 120 |
|
| 121 |
-
|
| 122 |
def main():
|
| 123 |
st.title("Quantitlytix AI")
|
| 124 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
|
@@ -135,15 +170,27 @@ def main():
|
|
| 135 |
if input_type == "Bulk Bank Statement Upload":
|
| 136 |
uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
|
| 137 |
if uploaded_files:
|
| 138 |
-
|
|
|
|
| 139 |
try:
|
| 140 |
model = configure_gemini(api_key)
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
pdf_text = read_pdf(uploaded_file)
|
| 143 |
if not pdf_text:
|
| 144 |
st.warning(f"No text found in {uploaded_file.name}.")
|
| 145 |
continue
|
| 146 |
-
|
|
|
|
| 147 |
json_response = process_with_gemini(model, pdf_text)
|
| 148 |
if json_response:
|
| 149 |
start_idx = json_response.find('{')
|
|
@@ -159,6 +206,11 @@ def main():
|
|
| 159 |
all_transactions.extend(transactions)
|
| 160 |
except json.JSONDecodeError as e:
|
| 161 |
st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
except Exception as e:
|
| 163 |
st.error(f"Error processing PDF documents: {str(e)}")
|
| 164 |
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
|
@@ -223,15 +275,18 @@ def main():
|
|
| 223 |
st.warning("No transactions found within the selected date range.")
|
| 224 |
else:
|
| 225 |
try:
|
| 226 |
-
|
| 227 |
combined_json = {"transactions": filtered_transactions}
|
| 228 |
with st.spinner("Generating financial report..."):
|
| 229 |
-
report_text = generate_financial_report(
|
| 230 |
if report_text:
|
| 231 |
st.success("Financial report generated!")
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
| 234 |
|
|
|
|
| 235 |
pdf_buffer = create_pdf_report(report_text)
|
| 236 |
st.download_button(
|
| 237 |
label="Download Financial Report as PDF",
|
|
|
|
| 11 |
import pypdf
|
| 12 |
from fpdf import FPDF
|
| 13 |
from google.api_core import exceptions
|
| 14 |
+
import markdown
|
| 15 |
+
from markdown.extensions.tables import TableExtension
|
| 16 |
|
| 17 |
# Configure API key for Gemini
|
| 18 |
api_key = os.environ.get('Gemini')
|
| 19 |
|
| 20 |
def configure_gemini(api_key):
|
| 21 |
genai.configure(api_key=api_key)
|
| 22 |
+
return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
|
| 23 |
|
| 24 |
+
def configure_gemini1(api_key):
|
| 25 |
+
genai.configure(api_key=api_key)
|
| 26 |
+
return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
|
| 27 |
+
|
| 28 |
# Read PDF content from a file-like object (from Streamlit uploader)
|
| 29 |
def read_pdf(file_obj):
|
| 30 |
file_obj.seek(0) # Ensure the file pointer is at the start
|
|
|
|
| 45 |
- Type (is 'income' if 'credit amount', else 'expense')
|
| 46 |
- Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
|
| 47 |
- City (In address of bank statement)
|
| 48 |
+
- Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to 'Other expenses'. If 'Type' is 'income' set Destination_of_funds to 'income'.)
|
| 49 |
- ignore opening or closing balances.
|
| 50 |
|
| 51 |
Return ONLY valid JSON with this structure:
|
|
|
|
| 64 |
}"""
|
| 65 |
try:
|
| 66 |
response = model.generate_content([prompt, text])
|
| 67 |
+
time.sleep(6) # Sleep for 8 seconds to work around rate limit
|
| 68 |
return response.text
|
| 69 |
except exceptions.ServiceUnavailable as e:
|
| 70 |
if e.response.status_code == 504:
|
|
|
|
| 87 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 88 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
| 89 |
Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
|
| 90 |
+
Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}.
|
| 91 |
+
|
| 92 |
+
Format the report in Markdown for better visual structure.
|
| 93 |
+
Do not name the company if name is not there and return just the report and nothing else."""
|
| 94 |
try:
|
| 95 |
response = model.generate_content([prompt])
|
| 96 |
time.sleep(7) # Sleep for 7 seconds to work around rate limit
|
|
|
|
| 103 |
else:
|
| 104 |
raise
|
| 105 |
|
| 106 |
+
# Create a PDF file from the markdown report text
|
| 107 |
def create_pdf_report(report_text):
|
| 108 |
pdf = FPDF()
|
| 109 |
pdf.add_page()
|
| 110 |
+
pdf.set_font("Arial", "B", 16)
|
| 111 |
+
|
| 112 |
+
# Convert markdown to HTML
|
| 113 |
+
html = markdown.markdown(report_text, extensions=[TableExtension()])
|
| 114 |
+
|
| 115 |
+
# Process the HTML to extract content with some basic formatting
|
| 116 |
+
# This is a simplified approach - for complete markdown to PDF, consider using other libraries
|
| 117 |
+
|
| 118 |
+
# Handle headers
|
| 119 |
+
lines = report_text.split('\n')
|
| 120 |
+
for line in lines:
|
| 121 |
+
# Handle headers
|
| 122 |
+
if line.startswith('# '):
|
| 123 |
+
pdf.set_font("Arial", "B", 18)
|
| 124 |
+
pdf.cell(0, 10, line[2:], 0, 1)
|
| 125 |
+
pdf.ln(5)
|
| 126 |
+
elif line.startswith('## '):
|
| 127 |
+
pdf.set_font("Arial", "B", 16)
|
| 128 |
+
pdf.cell(0, 10, line[3:], 0, 1)
|
| 129 |
+
pdf.ln(3)
|
| 130 |
+
elif line.startswith('### '):
|
| 131 |
+
pdf.set_font("Arial", "B", 14)
|
| 132 |
+
pdf.cell(0, 10, line[4:], 0, 1)
|
| 133 |
+
pdf.ln(3)
|
| 134 |
+
# Handle bullet points
|
| 135 |
+
elif line.startswith('* ') or line.startswith('- '):
|
| 136 |
+
pdf.set_font("Arial", "", 12)
|
| 137 |
+
pdf.cell(10, 10, "•", 0, 0)
|
| 138 |
+
pdf.multi_cell(0, 10, line[2:])
|
| 139 |
+
# Handle normal text
|
| 140 |
+
elif line.strip():
|
| 141 |
+
pdf.set_font("Arial", "", 12)
|
| 142 |
+
pdf.multi_cell(0, 10, line)
|
| 143 |
+
# Handle empty lines
|
| 144 |
+
else:
|
| 145 |
+
pdf.ln(5)
|
| 146 |
|
| 147 |
# Create BytesIO object
|
| 148 |
pdf_buffer = BytesIO()
|
| 149 |
|
| 150 |
+
# Write the PDF to the buffer
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
|
| 152 |
pdf_buffer.seek(0)
|
| 153 |
|
| 154 |
return pdf_buffer
|
| 155 |
|
| 156 |
|
|
|
|
| 157 |
def main():
|
| 158 |
st.title("Quantitlytix AI")
|
| 159 |
st.markdown("*Bank Statement Parser & Financial Report Generator*")
|
|
|
|
| 170 |
if input_type == "Bulk Bank Statement Upload":
|
| 171 |
uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
|
| 172 |
if uploaded_files:
|
| 173 |
+
total_files = len(uploaded_files)
|
| 174 |
+
st.write(f"{total_files} PDF file(s) uploaded.")
|
| 175 |
try:
|
| 176 |
model = configure_gemini(api_key)
|
| 177 |
+
|
| 178 |
+
# Create a progress bar
|
| 179 |
+
progress_bar = st.progress(0)
|
| 180 |
+
status_text = st.empty()
|
| 181 |
+
|
| 182 |
+
for index, uploaded_file in enumerate(uploaded_files):
|
| 183 |
+
# Update progress bar and status text
|
| 184 |
+
progress = (index) / total_files
|
| 185 |
+
progress_bar.progress(progress)
|
| 186 |
+
status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
|
| 187 |
+
|
| 188 |
pdf_text = read_pdf(uploaded_file)
|
| 189 |
if not pdf_text:
|
| 190 |
st.warning(f"No text found in {uploaded_file.name}.")
|
| 191 |
continue
|
| 192 |
+
|
| 193 |
+
with st.spinner(f"Processing {uploaded_file.name}... ({index+1}/{total_files})"):
|
| 194 |
json_response = process_with_gemini(model, pdf_text)
|
| 195 |
if json_response:
|
| 196 |
start_idx = json_response.find('{')
|
|
|
|
| 206 |
all_transactions.extend(transactions)
|
| 207 |
except json.JSONDecodeError as e:
|
| 208 |
st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
|
| 209 |
+
|
| 210 |
+
# Complete the progress bar
|
| 211 |
+
progress_bar.progress(1.0)
|
| 212 |
+
status_text.text(f"Completed processing {total_files} files!")
|
| 213 |
+
|
| 214 |
except Exception as e:
|
| 215 |
st.error(f"Error processing PDF documents: {str(e)}")
|
| 216 |
st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
|
|
|
|
| 275 |
st.warning("No transactions found within the selected date range.")
|
| 276 |
else:
|
| 277 |
try:
|
| 278 |
+
model1 = configure_gemini1(api_key)
|
| 279 |
combined_json = {"transactions": filtered_transactions}
|
| 280 |
with st.spinner("Generating financial report..."):
|
| 281 |
+
report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
|
| 282 |
if report_text:
|
| 283 |
st.success("Financial report generated!")
|
| 284 |
+
|
| 285 |
+
# Display the report as markdown
|
| 286 |
+
st.markdown("### Financial Report Preview")
|
| 287 |
+
st.markdown(report_text)
|
| 288 |
|
| 289 |
+
# Create PDF from markdown
|
| 290 |
pdf_buffer = create_pdf_report(report_text)
|
| 291 |
st.download_button(
|
| 292 |
label="Download Financial Report as PDF",
|