rairo commited on
Commit
0f9a906
·
verified ·
1 Parent(s): 7a28b23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -68
app.py CHANGED
@@ -1,101 +1,152 @@
1
  import re
 
 
 
 
 
2
  import pandas as pd
3
  import streamlit as st
4
  import google.generativeai as genai
5
  import pypdf
6
- import json
7
- from datetime import datetime
8
- import os
9
-
10
 
 
11
  api_key = os.environ['Gemini']
12
- # Configure Gemini
13
  def configure_gemini(api_key):
14
  genai.configure(api_key=api_key)
15
- return genai.GenerativeModel('gemini-2.0-flash-exp')
16
 
17
- # Read PDF content
18
- def read_pdf(file_path):
19
  text_content = []
20
- with open(file_path, 'rb') as file:
21
- pdf_reader = pypdf.PdfReader(file)
22
- for page in pdf_reader.pages:
23
- text = page.extract_text()
24
- if text:
25
- text_content.append(text)
26
  return "\n".join(text_content)
27
 
28
- # Process text with Gemini
29
  def process_with_gemini(model, text):
30
  prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
31
- - Date (format DD/MM/YYYY)
32
- - Description
33
- - Amount (just the integer value)
34
- - Type (is 'income' if 'credit amount', else 'expense')
35
- - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
36
- - City (In address of bank statement)
37
-
38
- Return ONLY valid JSON with this structure:
39
- {
40
- "transactions": [
41
- {
42
- "Date": "string",
43
- "Description": "string",
44
- "Customer_name": "string",
45
- "City": "string",
46
- "Amount": number,
47
- "Type": "string"
48
- }
49
- ]
50
- }"""
51
-
52
  response = model.generate_content([prompt, text])
53
  return response.text
54
 
55
- # Main Streamlit app
56
- def main():
57
- st.title("Bank Statement Parser with Gemini AI")
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
 
 
 
59
 
60
- uploaded_file = st.file_uploader("Upload a PDF bank statement", type="pdf")
 
 
 
 
 
 
 
 
 
61
 
62
- if uploaded_file:
63
  try:
64
- # Configure Gemini
65
  model = configure_gemini(api_key)
66
 
67
- # Save and read PDF
68
- with open("temp.pdf", "wb") as f:
69
- f.write(uploaded_file.getbuffer())
 
 
 
 
 
 
 
 
 
70
 
71
- pdf_text = read_pdf("temp.pdf")
 
72
 
73
- # Process with Gemini
74
- with st.spinner("Analyzing statement with Gemini AI..."):
75
- json_response = process_with_gemini(model, pdf_text)
76
-
77
- # Clean JSON response
78
- json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
79
- json_str = json_str.replace('```json', '').replace('```', '')
80
-
81
- data = json.loads(json_str)
82
- transactions = data.get('transactions', [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # Create DataFrame
85
- df = pd.DataFrame(transactions)
86
 
87
- # Format amounts
88
- if not df.empty:
89
- df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
90
- df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y').dt.strftime('%d/%m/%Y')
91
-
92
- st.success("Analysis complete!")
93
- st.write("### Extracted Transactions")
94
- st.dataframe(df)
95
 
96
  except Exception as e:
97
- st.error(f"Error processing document: {str(e)}")
98
- st.error("Please ensure you're using a valid bank statement PDF and API key")
99
 
100
  if __name__ == "__main__":
101
  main()
 
1
  import re
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from io import BytesIO
6
+
7
  import pandas as pd
8
  import streamlit as st
9
  import google.generativeai as genai
10
  import pypdf
11
+ from fpdf import FPDF
 
 
 
12
 
13
+ # Configure API key for Gemini
14
  api_key = os.environ['Gemini']
15
+
16
  def configure_gemini(api_key):
17
  genai.configure(api_key=api_key)
18
+ return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
19
 
20
+ # Read PDF content from a file-like object (from Streamlit uploader)
21
+ def read_pdf(file_obj):
22
  text_content = []
23
+ pdf_reader = pypdf.PdfReader(file_obj)
24
+ for page in pdf_reader.pages:
25
+ text = page.extract_text()
26
+ if text:
27
+ text_content.append(text)
 
28
  return "\n".join(text_content)
29
 
30
+ # Process PDF text with Gemini to extract transactions as JSON
31
  def process_with_gemini(model, text):
32
  prompt = """Analyze this bank statement and extract transactions in JSON format with these fields:
33
+ - Date (format DD/MM/YYYY)
34
+ - Description
35
+ - Amount (just the integer value)
36
+ - Type (is 'income' if 'credit amount', else 'expense')
37
+ - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
38
+ - City (In address of bank statement)
39
+
40
+ Return ONLY valid JSON with this structure:
41
+ {
42
+ "transactions": [
43
+ {
44
+ "Date": "string",
45
+ "Description": "string",
46
+ "Customer_name": "string",
47
+ "City": "string",
48
+ "Amount": number,
49
+ "Type": "string"
50
+ }
51
+ ]
52
+ }"""
 
53
  response = model.generate_content([prompt, text])
54
  return response.text
55
 
56
+ # Generate financial report from aggregated JSON transactions and chosen sections
57
+ def generate_financial_report(model, json_data, report_types):
58
+ prompt = f"""Based on the following transactions JSON data:
59
+ {json.dumps(json_data)}
60
+
61
+ Generate a detailed financial report that includes the following sections: {', '.join(report_types)}.
62
+ Ensure that each section is clearly formatted with headings and includes insights and summaries.
63
+ Return the complete report as plain text."""
64
+ response = model.generate_content([prompt])
65
+ return response.text
66
+
67
+ # Create a PDF file from the report text
68
+ def create_pdf_report(report_text):
69
+ pdf = FPDF()
70
+ pdf.add_page()
71
+ pdf.set_font("Arial", size=12)
72
 
73
+ # Split report text into lines and add them to the PDF
74
+ for line in report_text.split('\n'):
75
+ pdf.multi_cell(0, 10, line)
76
 
77
+ pdf_buffer = BytesIO()
78
+ pdf.output(pdf_buffer)
79
+ pdf_buffer.seek(0)
80
+ return pdf_buffer
81
+
82
+ def main():
83
+ st.title("Quantitlytix AI ")
84
+ st.markdown(*Bank Statement Parser & Financial Report Generator*)
85
+ # Allow multiple PDF uploads
86
+ uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
87
 
88
+ if uploaded_files:
89
  try:
90
+ # Initialize the Gemini model
91
  model = configure_gemini(api_key)
92
 
93
+ all_transactions = []
94
+ for uploaded_file in uploaded_files:
95
+ # Read PDF text directly from the uploaded file
96
+ pdf_text = read_pdf(uploaded_file)
97
+ with st.spinner(f"Processing {uploaded_file.name}..."):
98
+ json_response = process_with_gemini(model, pdf_text)
99
+ # Extract valid JSON from the response
100
+ json_str = json_response[json_response.find('{'):json_response.rfind('}')+1]
101
+ json_str = json_str.replace('```json', '').replace('```', '')
102
+ data = json.loads(json_str)
103
+ transactions = data.get('transactions', [])
104
+ all_transactions.extend(transactions)
105
 
106
+ # Combine transactions into one JSON object
107
+ combined_json = {"transactions": all_transactions}
108
 
109
+ # Display extracted transactions in a DataFrame if available
110
+ if all_transactions:
111
+ df = pd.DataFrame(all_transactions)
112
+ # Convert amounts to numeric and format
113
+ df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')
114
+ df['Amount'] = df['Amount'].apply(lambda x: f"R {x:,.2f}" if x >= 0 else f"R ({abs(x):,.2f})")
115
+ df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce').dt.strftime('%d/%m/%Y')
116
+ st.success("Extraction complete!")
117
+ st.write("### Extracted Transactions")
118
+ st.dataframe(df)
119
+ else:
120
+ st.warning("No transactions were extracted from the uploaded files.")
121
+
122
+ # Allow user to select financial report sections
123
+ st.write("### Generate Financial Report")
124
+ report_options = st.multiselect(
125
+ "Select financial report sections to include",
126
+ ["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"],
127
+ default=["By Date", "Income Statement", "Cashflow Statement", "Balance Sheet"]
128
+ )
129
+
130
+ if st.button("Generate Financial Report"):
131
+ with st.spinner("Generating financial report..."):
132
+ report_text = generate_financial_report(model, combined_json, report_options)
133
+ st.success("Financial report generated!")
134
+ st.text_area("Financial Report", report_text, height=300)
135
 
136
+ # Create PDF from the report text
137
+ pdf_buffer = create_pdf_report(report_text)
138
 
139
+ # Provide a download button for the PDF report
140
+ st.download_button(
141
+ label="Download Financial Report as PDF",
142
+ data=pdf_buffer,
143
+ file_name=f"financial_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
144
+ mime="application/pdf"
145
+ )
 
146
 
147
  except Exception as e:
148
+ st.error(f"Error processing documents: {str(e)}")
149
+ st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
150
 
151
  if __name__ == "__main__":
152
  main()