rairo commited on
Commit
931320e
·
verified ·
1 Parent(s): 8be6607

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -23
app.py CHANGED
@@ -11,14 +11,20 @@ import google.generativeai as genai
11
  import pypdf
12
  from fpdf import FPDF
13
  from google.api_core import exceptions
 
 
14
 
15
  # Configure API key for Gemini
16
  api_key = os.environ.get('Gemini')
17
 
18
  def configure_gemini(api_key):
19
  genai.configure(api_key=api_key)
20
- return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
21
 
 
 
 
 
22
  # Read PDF content from a file-like object (from Streamlit uploader)
23
  def read_pdf(file_obj):
24
  file_obj.seek(0) # Ensure the file pointer is at the start
@@ -39,7 +45,7 @@ def process_with_gemini(model, text):
39
  - Type (is 'income' if 'credit amount', else 'expense')
40
  - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
41
  - City (In address of bank statement)
42
- - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to Other expenses’. If 'Type' is 'income' set Destination_of_funds to income’.)
43
  - ignore opening or closing balances.
44
 
45
  Return ONLY valid JSON with this structure:
@@ -58,7 +64,7 @@ def process_with_gemini(model, text):
58
  }"""
59
  try:
60
  response = model.generate_content([prompt, text])
61
- time.sleep(8) # Sleep for 8 seconds to work around rate limit
62
  return response.text
63
  except exceptions.ServiceUnavailable as e:
64
  if e.response.status_code == 504:
@@ -81,7 +87,10 @@ Consistent Formatting: Maintain consistent formatting for monetary values (e.g.,
81
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
82
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
83
  Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
84
- Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}. Return just the report and nothing else."""
 
 
 
85
  try:
86
  response = model.generate_content([prompt])
87
  time.sleep(7) # Sleep for 7 seconds to work around rate limit
@@ -94,31 +103,57 @@ Concise Summary: Provide a concluding summary paragraph that encapsulates the ov
94
  else:
95
  raise
96
 
97
- # Create a PDF file from the report text
98
  def create_pdf_report(report_text):
99
  pdf = FPDF()
100
  pdf.add_page()
101
- pdf.set_font("Arial", size=12)
102
-
103
- # Split report text into lines and add them to the PDF
104
- for line in report_text.split('\n'):
105
- pdf.multi_cell(0, 10, line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  # Create BytesIO object
108
  pdf_buffer = BytesIO()
109
 
110
- # The problem is here - FPDF needs a filepath string or bytestream parameter
111
- # Fix: Use the dest parameter to write to bytes
112
- pdf.output(dest='S').encode('latin-1') # Get PDF as string and encode to bytes
113
-
114
- # Write the encoded bytes to our BytesIO buffer
115
  pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
116
  pdf_buffer.seek(0)
117
 
118
  return pdf_buffer
119
 
120
 
121
-
122
  def main():
123
  st.title("Quantitlytix AI")
124
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
@@ -135,15 +170,27 @@ def main():
135
  if input_type == "Bulk Bank Statement Upload":
136
  uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
137
  if uploaded_files:
138
- st.write(f"{len(uploaded_files)} PDF file(s) uploaded.")
 
139
  try:
140
  model = configure_gemini(api_key)
141
- for uploaded_file in uploaded_files:
 
 
 
 
 
 
 
 
 
 
142
  pdf_text = read_pdf(uploaded_file)
143
  if not pdf_text:
144
  st.warning(f"No text found in {uploaded_file.name}.")
145
  continue
146
- with st.spinner(f"Processing {uploaded_file.name}..."):
 
147
  json_response = process_with_gemini(model, pdf_text)
148
  if json_response:
149
  start_idx = json_response.find('{')
@@ -159,6 +206,11 @@ def main():
159
  all_transactions.extend(transactions)
160
  except json.JSONDecodeError as e:
161
  st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
 
 
 
 
 
162
  except Exception as e:
163
  st.error(f"Error processing PDF documents: {str(e)}")
164
  st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
@@ -223,15 +275,18 @@ def main():
223
  st.warning("No transactions found within the selected date range.")
224
  else:
225
  try:
226
- model = configure_gemini(api_key)
227
  combined_json = {"transactions": filtered_transactions}
228
  with st.spinner("Generating financial report..."):
229
- report_text = generate_financial_report(model, combined_json, start_date, end_date, statement_type)
230
  if report_text:
231
  st.success("Financial report generated!")
232
- st.text_area("Financial Report", report_text, height=300)
233
-
 
 
234
 
 
235
  pdf_buffer = create_pdf_report(report_text)
236
  st.download_button(
237
  label="Download Financial Report as PDF",
 
11
  import pypdf
12
  from fpdf import FPDF
13
  from google.api_core import exceptions
14
+ import markdown
15
+ from markdown.extensions.tables import TableExtension
16
 
17
  # Configure API key for Gemini
18
  api_key = os.environ.get('Gemini')
19
 
20
  def configure_gemini(api_key):
21
  genai.configure(api_key=api_key)
22
+ return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
23
 
24
+ def configure_gemini1(api_key):
25
+ genai.configure(api_key=api_key)
26
+ return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
27
+
28
  # Read PDF content from a file-like object (from Streamlit uploader)
29
  def read_pdf(file_obj):
30
  file_obj.seek(0) # Ensure the file pointer is at the start
 
45
  - Type (is 'income' if 'credit amount', else 'expense')
46
  - Customer Name (Only If Type is 'income' and if no name is extracted write 'general income' and if type is not 'income' write 'expense')
47
  - City (In address of bank statement)
48
+ - Category_of_expense (a string, if transaction 'Type' is 'expense' categorize it based on description into: Water and electricity, Salaries and wages, Repairs & Maintenance, Motor vehicle expenses, Projects Expenses, Hardware expenses, Refunds, Accounting fees, Loan interest, Bank charges, Insurance, SARS PAYE UIF, Advertising & Marketing, Logistics and distribution, Fuel, Website hosting fees, Rentals, Subscriptions, Computer internet and Telephone, Staff training, Travel and accommodation, Depreciation, Other expenses. If no category matches, default to 'Other expenses'. If 'Type' is 'income' set Destination_of_funds to 'income'.)
49
  - ignore opening or closing balances.
50
 
51
  Return ONLY valid JSON with this structure:
 
64
  }"""
65
  try:
66
  response = model.generate_content([prompt, text])
67
+ time.sleep(6) # Sleep for 8 seconds to work around rate limit
68
  return response.text
69
  except exceptions.ServiceUnavailable as e:
70
  if e.response.status_code == 504:
 
87
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
88
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
89
  Key Insights: Include a brief section (e.g., "Key Highlights" or "Summary") that identifies significant trends, notable figures, or key performance indicators derived from the data within the statement. This should be written in plain, understandable English, potentially highlighting aspects particularly relevant to the economic context of Zimbabwe if discernible from the data.
90
+ Concise Summary: Provide a concluding summary paragraph that encapsulates the overall financial picture presented in the {statement_type}.
91
+
92
+ Format the report in Markdown for better visual structure.
93
+ Do not name the company if name is not there and return just the report and nothing else."""
94
  try:
95
  response = model.generate_content([prompt])
96
  time.sleep(7) # Sleep for 7 seconds to work around rate limit
 
103
  else:
104
  raise
105
 
106
+ # Create a PDF file from the markdown report text
107
  def create_pdf_report(report_text):
108
  pdf = FPDF()
109
  pdf.add_page()
110
+ pdf.set_font("Arial", "B", 16)
111
+
112
+ # Convert markdown to HTML
113
+ html = markdown.markdown(report_text, extensions=[TableExtension()])
114
+
115
+ # Process the HTML to extract content with some basic formatting
116
+ # This is a simplified approach - for complete markdown to PDF, consider using other libraries
117
+
118
+ # Handle headers
119
+ lines = report_text.split('\n')
120
+ for line in lines:
121
+ # Handle headers
122
+ if line.startswith('# '):
123
+ pdf.set_font("Arial", "B", 18)
124
+ pdf.cell(0, 10, line[2:], 0, 1)
125
+ pdf.ln(5)
126
+ elif line.startswith('## '):
127
+ pdf.set_font("Arial", "B", 16)
128
+ pdf.cell(0, 10, line[3:], 0, 1)
129
+ pdf.ln(3)
130
+ elif line.startswith('### '):
131
+ pdf.set_font("Arial", "B", 14)
132
+ pdf.cell(0, 10, line[4:], 0, 1)
133
+ pdf.ln(3)
134
+ # Handle bullet points
135
+ elif line.startswith('* ') or line.startswith('- '):
136
+ pdf.set_font("Arial", "", 12)
137
+ pdf.cell(10, 10, "•", 0, 0)
138
+ pdf.multi_cell(0, 10, line[2:])
139
+ # Handle normal text
140
+ elif line.strip():
141
+ pdf.set_font("Arial", "", 12)
142
+ pdf.multi_cell(0, 10, line)
143
+ # Handle empty lines
144
+ else:
145
+ pdf.ln(5)
146
 
147
  # Create BytesIO object
148
  pdf_buffer = BytesIO()
149
 
150
+ # Write the PDF to the buffer
 
 
 
 
151
  pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
152
  pdf_buffer.seek(0)
153
 
154
  return pdf_buffer
155
 
156
 
 
157
  def main():
158
  st.title("Quantitlytix AI")
159
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
 
170
  if input_type == "Bulk Bank Statement Upload":
171
  uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
172
  if uploaded_files:
173
+ total_files = len(uploaded_files)
174
+ st.write(f"{total_files} PDF file(s) uploaded.")
175
  try:
176
  model = configure_gemini(api_key)
177
+
178
+ # Create a progress bar
179
+ progress_bar = st.progress(0)
180
+ status_text = st.empty()
181
+
182
+ for index, uploaded_file in enumerate(uploaded_files):
183
+ # Update progress bar and status text
184
+ progress = (index) / total_files
185
+ progress_bar.progress(progress)
186
+ status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
187
+
188
  pdf_text = read_pdf(uploaded_file)
189
  if not pdf_text:
190
  st.warning(f"No text found in {uploaded_file.name}.")
191
  continue
192
+
193
+ with st.spinner(f"Processing {uploaded_file.name}... ({index+1}/{total_files})"):
194
  json_response = process_with_gemini(model, pdf_text)
195
  if json_response:
196
  start_idx = json_response.find('{')
 
206
  all_transactions.extend(transactions)
207
  except json.JSONDecodeError as e:
208
  st.error(f"Error decoding JSON for {uploaded_file.name}: {e}")
209
+
210
+ # Complete the progress bar
211
+ progress_bar.progress(1.0)
212
+ status_text.text(f"Completed processing {total_files} files!")
213
+
214
  except Exception as e:
215
  st.error(f"Error processing PDF documents: {str(e)}")
216
  st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
 
275
  st.warning("No transactions found within the selected date range.")
276
  else:
277
  try:
278
+ model1 = configure_gemini1(api_key)
279
  combined_json = {"transactions": filtered_transactions}
280
  with st.spinner("Generating financial report..."):
281
+ report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
282
  if report_text:
283
  st.success("Financial report generated!")
284
+
285
+ # Display the report as markdown
286
+ st.markdown("### Financial Report Preview")
287
+ st.markdown(report_text)
288
 
289
+ # Create PDF from markdown
290
  pdf_buffer = create_pdf_report(report_text)
291
  st.download_button(
292
  label="Download Financial Report as PDF",