Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

0f9a906

verified ·

1 Parent(s): 7a28b23

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -68

app.py CHANGED Viewed

@@ -1,101 +1,152 @@
 import re
 import pandas as pd
 import streamlit as st
 import google.generativeai as genai
 import pypdf
-import json
-from datetime import datetime
-import os
 api_key = os.environ['Gemini']
-# Configure Gemini
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
-    return genai.GenerativeModel('gemini-2.0-flash-exp')
-# Read PDF content
-def read_pdf(file_path):
     text_content = []
-    with open(file_path, 'rb') as file:
-        pdf_reader = pypdf.PdfReader(file)
-        for page in pdf_reader.pages:
-            text = page.extract_text()
-            if text:
-                text_content.append(text)
     return "\n".join(text_content)
-# Process text with Gemini
 def process_with_gemini(model, text):
     prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
-    - Date (format DD/MM/YYYY)
-    - Description
-    - Amount (just the integer value)
-    - Type (is 'income' if 'credit amount', else 'expense')
-    - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
-    - City (In address of bank statement)
-    Return ONLY valid JSON with this structure:
-    {
-        "transactions": [
-            {
-                "Date": "string",
-                "Description": "string",
-                "Customer_name": "string",
-                "City": "string",
-                "Amount": number,
-                "Type": "string"
-            }
-        ]
-    }"""
     response = model.generate_content([prompt, text])
     return response.text
-# Main Streamlit app
-def main():
-    st.title("Bank Statement Parser with Gemini AI")
-    uploaded_file = st.file_uploader("Upload a PDF bank statement", type="pdf")
-    if uploaded_file:
         try:
-            # Configure Gemini
             model = configure_gemini(api_key)
-            # Save and read PDF
-            with open("temp.pdf", "wb") as f:
-                f.write(uploaded_file.getbuffer())
-            pdf_text = read_pdf("temp.pdf")
-            # Process with Gemini
-            with st.spinner("Analyzing statement with Gemini AI..."):
-                json_response = process_with_gemini(model, pdf_text)
-                # Clean JSON response
-                json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
-                json_str = json_str.replace('```json', '').replace('```', '')
-                data = json.loads(json_str)
-                transactions = data.get('transactions', [])
-                # Create DataFrame
-                df = pd.DataFrame(transactions)
-                # Format amounts
-                if not df.empty:
-                    df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
-                    df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y').dt.strftime('%d/%m/%Y')
-            st.success("Analysis complete!")
-            st.write("### Extracted Transactions")
-            st.dataframe(df)
         except Exception as e:
-            st.error(f"Error processing document: {str(e)}")
-            st.error("Please ensure you're using a valid bank statement PDF and API key")
 if __name__ == "__main__":
     main()

 import re
+import json
+import os
+from datetime import datetime
+from io import BytesIO
 import pandas as pd
 import streamlit as st
 import google.generativeai as genai
 import pypdf
+from fpdf import FPDF
+# Configure API key for Gemini
 api_key = os.environ['Gemini']
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
+    return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
+# Read PDF content from a file-like object (from Streamlit uploader)
+def read_pdf(file_obj):
     text_content = []
+    pdf_reader = pypdf.PdfReader(file_obj)
+    for page in pdf_reader.pages:
+        text = page.extract_text()
+        if text:
+            text_content.append(text)
     return "\n".join(text_content)
+# Process PDF text with Gemini to extract transactions as JSON
 def process_with_gemini(model, text):
     prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
+- Date (format DD/MM/YYYY)
+- Description
+- Amount (just the integer value)
+- Type (is 'income' if 'credit amount', else 'expense')
+- Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
+- City (In address of bank statement)
+Return ONLY valid JSON with this structure:
+{
+    "transactions": [
+        {
+            "Date": "string",
+            "Description": "string",
+            "Customer_name": "string",
+            "City": "string",
+            "Amount": number,
+            "Type": "string"
+        }
+    ]
+}"""
     response = model.generate_content([prompt, text])
     return response.text
+# Generate financial report from aggregated JSON transactions and chosen sections
+def generate_financial_report(model, json_data, report_types):
+    prompt = f"""Based on the following transactions JSON data:
+{json.dumps(json_data)}
+Generate a detailed financial report that includes the following sections: {', '.join(report_types)}.
+Ensure that each section is clearly formatted with headings and includes insights and summaries.
+Return the complete report as plain text."""
+    response = model.generate_content([prompt])
+    return response.text
+# Create a PDF file from the report text
+def create_pdf_report(report_text):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    # Split report text into lines and add them to the PDF
+    for line in report_text.split('\n'):
+        pdf.multi_cell(0, 10, line)
+    pdf_buffer = BytesIO()
+    pdf.output(pdf_buffer)
+    pdf_buffer.seek(0)
+    return pdf_buffer
+def main():
+    st.title("Quantitlytix AI ")
+    st.markdown(*Bank Statement Parser & Financial Report Generator*)
+    # Allow multiple PDF uploads
+    uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
+    if uploaded_files:
         try:
+            # Initialize the Gemini model
             model = configure_gemini(api_key)
+            all_transactions = []
+            for uploaded_file in uploaded_files:
+                # Read PDF text directly from the uploaded file
+                pdf_text = read_pdf(uploaded_file)
+                with st.spinner(f"Processing {uploaded_file.name}..."):
+                    json_response = process_with_gemini(model, pdf_text)
+                    # Extract valid JSON from the response
+                    json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
+                    json_str = json_str.replace('```json', '').replace('```', '')
+                    data = json.loads(json_str)
+                    transactions = data.get('transactions', [])
+                    all_transactions.extend(transactions)
+            # Combine transactions into one JSON object
+            combined_json = {"transactions": all_transactions}
+            # Display extracted transactions in a DataFrame if available
+            if all_transactions:
+                df = pd.DataFrame(all_transactions)
+                # Convert amounts to numeric and format
+                df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
+                df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
+                df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
+                st.success("Extraction complete!")
+                st.write("### Extracted Transactions")
+                st.dataframe(df)
+            else:
+                st.warning("No transactions were extracted from the uploaded files.")
+            # Allow user to select financial report sections
+            st.write("### Generate Financial Report")
+            report_options = st.multiselect(
+                "Select financial report sections to include",
+                ["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"],
+                default=["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"]
+            )
+            if st.button("Generate Financial Report"):
+                with st.spinner("Generating financial report..."):
+                    report_text = generate_financial_report(model, combined_json, report_options)
+                st.success("Financial report generated!")
+                st.text_area("Financial Report", report_text, height=300)
+                # Create PDF from the report text
+                pdf_buffer = create_pdf_report(report_text)
+                # Provide a download button for the PDF report
+                st.download_button(
+                    label="Download Financial Report as PDF",
+                    data=pdf_buffer,
+                    file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
+                    mime="application/pdf"
+                )
         except Exception as e:
+            st.error(f"Error processing documents: {str(e)}")
+            st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
 if __name__ == "__main__":
     main()