Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

931320e

verified ·

1 Parent(s): 8be6607

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -23

app.py CHANGED Viewed

@@ -11,14 +11,20 @@ import google.generativeai as genai
 import pypdf
 from fpdf import FPDF
 from google.api_core import exceptions
 # Configure API key for Gemini
 api_key = os.environ.get('Gemini')
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
-    return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
 # Read PDF content from a file-like object (from Streamlit uploader)
 def read_pdf(file_obj):
     file_obj.seek(0)  # Ensure the file pointer is at the start
@@ -39,7 +45,7 @@ def process_with_gemini(model, text):
     - Type (is 'income' if 'credit amount', else 'expense')
     - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
     - City (In address of bank statement)
-    - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to ‘Other expenses’. If 'Type' is 'income' set Destination_of_funds to ‘income’.)
     - ignore opening or closing balances.
     Return ONLY valid JSON with this structure:
@@ -58,7 +64,7 @@ def process_with_gemini(model, text):
     }"""
     try:
         response = model.generate_content([prompt, text])
-        time.sleep(8)  # Sleep for 8 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
@@ -81,7 +87,10 @@ Consistent Formatting: Maintain consistent formatting for monetary values (e.g.,
 Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
 Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
 Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
-Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}. Return just the report and nothing else."""
     try:
         response = model.generate_content([prompt])
         time.sleep(7)  # Sleep for 7 seconds to work around rate limit
@@ -94,31 +103,57 @@ Concise Summary: Provide a concluding summary paragraph that encapsulates the ov
         else:
             raise
-# Create a PDF file from the report text
 def create_pdf_report(report_text):
     pdf = FPDF()
     pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    # Split report text into lines and add them to the PDF
-    for line in report_text.split('\n'):
-        pdf.multi_cell(0, 10, line)
     # Create BytesIO object
     pdf_buffer = BytesIO()
-    # The problem is here - FPDF needs a filepath string or bytestream parameter
-    # Fix: Use the dest parameter to write to bytes
-    pdf.output(dest='S').encode('latin-1')  # Get PDF as string and encode to bytes
-    # Write the encoded bytes to our BytesIO buffer
     pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
@@ -135,15 +170,27 @@ def main():
     if input_type == "Bulk Bank Statement Upload":
         uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
         if uploaded_files:
-            st.write(f"{len(uploaded_files)} PDF file(s) uploaded.")
             try:
                 model = configure_gemini(api_key)
-                for uploaded_file in uploaded_files:
                     pdf_text = read_pdf(uploaded_file)
                     if not pdf_text:
                         st.warning(f"No text found in {uploaded_file.name}.")
                         continue
-                    with st.spinner(f"Processing {uploaded_file.name}..."):
                         json_response = process_with_gemini(model, pdf_text)
                         if json_response:
                             start_idx = json_response.find('{')
@@ -159,6 +206,11 @@ def main():
                                 all_transactions.extend(transactions)
                             except json.JSONDecodeError as e:
                                 st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
             except Exception as e:
                 st.error(f"Error processing PDF documents: {str(e)}")
                 st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
@@ -223,15 +275,18 @@ def main():
                 st.warning("No transactions found within the selected date range.")
             else:
                 try:
-                    model = configure_gemini(api_key)
                     combined_json = {"transactions": filtered_transactions}
                     with st.spinner("Generating financial report..."):
-                        report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
                         if report_text:
                             st.success("Financial report generated!")
-                            st.text_area("Financial Report", report_text, height=300)
                             pdf_buffer = create_pdf_report(report_text)
                             st.download_button(
                                 label="Download Financial Report as PDF",

 import pypdf
 from fpdf import FPDF
 from google.api_core import exceptions
+import markdown
+from markdown.extensions.tables import TableExtension
 # Configure API key for Gemini
 api_key = os.environ.get('Gemini')
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
+    return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
+def configure_gemini1(api_key):
+    genai.configure(api_key=api_key)
+    return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
 # Read PDF content from a file-like object (from Streamlit uploader)
 def read_pdf(file_obj):
     file_obj.seek(0)  # Ensure the file pointer is at the start
     - Type (is 'income' if 'credit amount', else 'expense')
     - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
     - City (In address of bank statement)
+    - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to 'Other expenses'. If 'Type' is 'income' set Destination_of_funds to 'income'.)
     - ignore opening or closing balances.
     Return ONLY valid JSON with this structure:
     }"""
     try:
         response = model.generate_content([prompt, text])
+        time.sleep(6)  # Sleep for 8 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
 Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
 Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
 Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
+Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}.
+Format the report in Markdown for better visual structure.
+Do not name the company if name is not there and return just the report and nothing else."""
     try:
         response = model.generate_content([prompt])
         time.sleep(7)  # Sleep for 7 seconds to work around rate limit
         else:
             raise
+# Create a PDF file from the markdown report text
 def create_pdf_report(report_text):
     pdf = FPDF()
     pdf.add_page()
+    pdf.set_font("Arial", "B", 16)
+    # Convert markdown to HTML
+    html = markdown.markdown(report_text, extensions=[TableExtension()])
+    # Process the HTML to extract content with some basic formatting
+    # This is a simplified approach - for complete markdown to PDF, consider using other libraries
+    # Handle headers
+    lines = report_text.split('\n')
+    for line in lines:
+        # Handle headers
+        if line.startswith('# '):
+            pdf.set_font("Arial", "B", 18)
+            pdf.cell(0, 10, line[2:], 0, 1)
+            pdf.ln(5)
+        elif line.startswith('## '):
+            pdf.set_font("Arial", "B", 16)
+            pdf.cell(0, 10, line[3:], 0, 1)
+            pdf.ln(3)
+        elif line.startswith('### '):
+            pdf.set_font("Arial", "B", 14)
+            pdf.cell(0, 10, line[4:], 0, 1)
+            pdf.ln(3)
+        # Handle bullet points
+        elif line.startswith('* ') or line.startswith('- '):
+            pdf.set_font("Arial", "", 12)
+            pdf.cell(10, 10, "•", 0, 0)
+            pdf.multi_cell(0, 10, line[2:])
+        # Handle normal text
+        elif line.strip():
+            pdf.set_font("Arial", "", 12)
+            pdf.multi_cell(0, 10, line)
+        # Handle empty lines
+        else:
+            pdf.ln(5)
     # Create BytesIO object
     pdf_buffer = BytesIO()
+    # Write the PDF to the buffer
     pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
     if input_type == "Bulk Bank Statement Upload":
         uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
         if uploaded_files:
+            total_files = len(uploaded_files)
+            st.write(f"{total_files} PDF file(s) uploaded.")
             try:
                 model = configure_gemini(api_key)
+                # Create a progress bar
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                for index, uploaded_file in enumerate(uploaded_files):
+                    # Update progress bar and status text
+                    progress = (index) / total_files
+                    progress_bar.progress(progress)
+                    status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
                     pdf_text = read_pdf(uploaded_file)
                     if not pdf_text:
                         st.warning(f"No text found in {uploaded_file.name}.")
                         continue
+                    with st.spinner(f"Processing {uploaded_file.name}... ({index+1}/{total_files})"):
                         json_response = process_with_gemini(model, pdf_text)
                         if json_response:
                             start_idx = json_response.find('{')
                                 all_transactions.extend(transactions)
                             except json.JSONDecodeError as e:
                                 st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
+                # Complete the progress bar
+                progress_bar.progress(1.0)
+                status_text.text(f"Completed processing {total_files} files!")
             except Exception as e:
                 st.error(f"Error processing PDF documents: {str(e)}")
                 st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
                 st.warning("No transactions found within the selected date range.")
             else:
                 try:
+                    model1 = configure_gemini1(api_key)
                     combined_json = {"transactions": filtered_transactions}
                     with st.spinner("Generating financial report..."):
+                        report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
                         if report_text:
                             st.success("Financial report generated!")
+                            # Display the report as markdown
+                            st.markdown("### Financial Report Preview")
+                            st.markdown(report_text)
+                            # Create PDF from markdown
                             pdf_buffer = create_pdf_report(report_text)
                             st.download_button(
                                 label="Download Financial Report as PDF",