rairo commited on
Commit
261be6e
·
verified ·
1 Parent(s): ea691b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -53
app.py CHANGED
@@ -10,20 +10,20 @@ import streamlit as st
10
  import google.generativeai as genai
11
  import pypdf
12
  from fpdf import FPDF
13
- from google.api_core import exceptions
14
  import markdown
15
- from markdown.extensions.tables import TableExtension
16
 
17
  # Configure API key for Gemini
18
- api_key = os.environ.get('Gemini')
19
 
20
  def configure_gemini(api_key):
21
  genai.configure(api_key=api_key)
22
- return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
23
 
24
  def configure_gemini1(api_key):
25
  genai.configure(api_key=api_key)
26
- return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
27
 
28
  # Read PDF content from a file-like object (from Streamlit uploader)
29
  def read_pdf(file_obj):
@@ -64,7 +64,7 @@ def process_with_gemini(model, text):
64
  }"""
65
  try:
66
  response = model.generate_content([prompt, text])
67
- time.sleep(6) # Sleep for 8 seconds to work around rate limit
68
  return response.text
69
  except exceptions.ServiceUnavailable as e:
70
  if e.response.status_code == 504:
@@ -77,7 +77,7 @@ def process_with_gemini(model, text):
77
  def generate_financial_report(model, json_data, start_date, end_date, statement_type):
78
  prompt = f"""Based on the following transactions JSON data:
79
  {json.dumps(json_data)}
80
- Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
81
 
82
  Specific Formatting and Content Requirements:
83
 
@@ -103,57 +103,94 @@ Do not name the company if name is not there and return just the report and noth
103
  else:
104
  raise
105
 
106
- # Create a PDF file from the markdown report text
107
  def create_pdf_report(report_text):
 
 
 
 
 
108
  pdf = FPDF()
109
  pdf.add_page()
110
- pdf.set_font("Arial", "B", 16)
111
 
112
- # Convert markdown to HTML
113
- html = markdown.markdown(report_text, extensions=[TableExtension()])
 
 
 
 
 
 
114
 
115
- # Process the HTML to extract content with some basic formatting
116
- # This is a simplified approach - for complete markdown to PDF, consider using other libraries
 
 
 
 
 
 
 
117
 
118
- # Handle headers
119
- lines = report_text.split('\n')
120
- for line in lines:
 
 
121
  # Handle headers
122
- if line.startswith('# '):
123
- pdf.set_font("Arial", "B", 18)
124
- pdf.cell(0, 10, line[2:], 0, 1)
 
125
  pdf.ln(5)
126
- elif line.startswith('## '):
127
- pdf.set_font("Arial", "B", 16)
128
- pdf.cell(0, 10, line[3:], 0, 1)
 
129
  pdf.ln(3)
130
- elif line.startswith('### '):
131
- pdf.set_font("Arial", "B", 14)
132
- pdf.cell(0, 10, line[4:], 0, 1)
133
- pdf.ln(3)
134
- # Handle bullet points
135
- elif line.startswith('* ') or line.startswith('- '):
136
- pdf.set_font("Arial", "", 12)
137
- pdf.cell(10, 10, "•", 0, 0)
138
- pdf.multi_cell(0, 10, line[2:])
139
- # Handle normal text
140
- elif line.strip():
141
- pdf.set_font("Arial", "", 12)
142
- pdf.multi_cell(0, 10, line)
143
- # Handle empty lines
144
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  pdf.ln(5)
146
 
147
- # Create BytesIO object
148
  pdf_buffer = BytesIO()
149
-
150
- # Write the PDF to the buffer
151
- pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
152
  pdf_buffer.seek(0)
153
 
154
  return pdf_buffer
155
 
156
-
157
  def main():
158
  st.title("Quantitlytix AI")
159
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
@@ -181,7 +218,7 @@ def main():
181
 
182
  for index, uploaded_file in enumerate(uploaded_files):
183
  # Update progress bar and status text
184
- progress = (index) / total_files
185
  progress_bar.progress(progress)
186
  status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
187
 
@@ -195,7 +232,7 @@ def main():
195
  if json_response:
196
  start_idx = json_response.find('{')
197
  end_idx = json_response.rfind('}') + 1
198
- if start_idx == -1 or end_idx == -1:
199
  st.warning(f"Invalid JSON response for {uploaded_file.name}.")
200
  continue
201
  json_str = json_response[start_idx:end_idx]
@@ -287,13 +324,17 @@ def main():
287
  st.markdown(report_text)
288
 
289
  # Create PDF from markdown
290
- pdf_buffer = create_pdf_report(report_text)
291
- st.download_button(
292
- label="Download Financial Report as PDF",
293
- data=pdf_buffer.getvalue(), # Use getvalue() to get bytes from BytesIO
294
- file_name=f"{statement_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
295
- mime="application/pdf"
296
- )
 
 
 
 
297
  except exceptions.ServiceUnavailable as e:
298
  if e.response.status_code == 504:
299
  st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
@@ -303,8 +344,8 @@ def main():
303
  st.error(f"Error generating financial report: {str(e)}")
304
  if "504" in str(e):
305
  st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
306
- elif len(filtered_transactions) > 500: # Example threshold, adjust as needed
307
- st.info("For very large datasets, consider generating reports for smaller time periods and combining them manually if a single comprehensive report fails.")
308
 
309
  if __name__ == "__main__":
310
  main()
 
10
  import google.generativeai as genai
11
  import pypdf
12
  from fpdf import FPDF
13
+ from fpdf.enums import XPos, YPos
14
  import markdown
15
+ from google.api_core import exceptions
16
 
17
  # Configure API key for Gemini
18
+ api_key = os.environ.get('GEMINI_API_KEY')
19
 
20
  def configure_gemini(api_key):
21
  genai.configure(api_key=api_key)
22
+ return genai.GenerativeModel('gemini-1.5-pro-latest')
23
 
24
  def configure_gemini1(api_key):
25
  genai.configure(api_key=api_key)
26
+ return genai.GenerativeModel('gemini-1.5-pro-latest')
27
 
28
  # Read PDF content from a file-like object (from Streamlit uploader)
29
  def read_pdf(file_obj):
 
64
  }"""
65
  try:
66
  response = model.generate_content([prompt, text])
67
+ time.sleep(6) # Sleep for 6 seconds to work around rate limit
68
  return response.text
69
  except exceptions.ServiceUnavailable as e:
70
  if e.response.status_code == 504:
 
77
  def generate_financial_report(model, json_data, start_date, end_date, statement_type):
78
  prompt = f"""Based on the following transactions JSON data:
79
  {json.dumps(json_data)}
80
+ Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to South Africa, but with improved readability and visual appeal.
81
 
82
  Specific Formatting and Content Requirements:
83
 
 
103
  else:
104
  raise
105
 
 
106
  def create_pdf_report(report_text):
107
+ """Create PDF from markdown text with proper Unicode support"""
108
+ # Convert markdown to HTML
109
+ html_content = markdown.markdown(report_text, extensions=['tables'])
110
+
111
+ # Create PDF with better UTF-8 support
112
  pdf = FPDF()
113
  pdf.add_page()
 
114
 
115
+ # Add Noto Sans fonts (must be available in the same directory)
116
+ try:
117
+ pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
118
+ pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
119
+ pdf.set_font("NotoSans", size=12)
120
+ except:
121
+ # Fallback to built-in fonts if Noto Sans not available
122
+ pdf.set_font("Arial", size=12)
123
 
124
+ # Basic styling
125
+ styles = {
126
+ 'h1': {'size': 24, 'color': (25, 25, 112)}, # MidnightBlue
127
+ 'h2': {'size': 20, 'color': (25, 25, 112)},
128
+ 'h3': {'size': 16, 'color': (25, 25, 112)},
129
+ 'table': {'cell_width': 40, 'header_color': (245, 245, 245)},
130
+ 'th': {'border': 1, 'align': 'L', 'fill': True},
131
+ 'td': {'border': 1, 'align': 'L'}
132
+ }
133
 
134
+ # Parse HTML content
135
+ in_table = False
136
+ for line in html_content.split('\n'):
137
+ line = line.strip()
138
+
139
  # Handle headers
140
+ if line.startswith('<h1>'):
141
+ pdf.set_font(style="B", size=styles['h1']['size'])
142
+ pdf.set_text_color(*styles['h1']['color'])
143
+ pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
144
  pdf.ln(5)
145
+ elif line.startswith('<h2>'):
146
+ pdf.set_font(style="B", size=styles['h2']['size'])
147
+ pdf.set_text_color(*styles['h2']['color'])
148
+ pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
149
  pdf.ln(3)
150
+ elif line.startswith('<h3>'):
151
+ pdf.set_font(style="B", size=styles['h3']['size'])
152
+ pdf.set_text_color(*styles['h3']['color'])
153
+ pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
154
+ pdf.ln(2)
155
+
156
+ # Handle tables
157
+ elif line.startswith('<table>'):
158
+ in_table = True
159
+ col_count = line.count('<th>') # Simple column count
160
+ elif line.startswith('</table>'):
161
+ in_table = False
162
+ pdf.ln(10)
163
+ elif in_table:
164
+ if line.startswith('<tr>'):
165
+ pdf.set_font(style="B" if '<th>' in line else "")
166
+ cells = line.replace('<tr>','').replace('</tr>','').split('</td>')[:-1]
167
+ for cell in cells:
168
+ content = cell.replace('<td>','').replace('<th>','').strip()
169
+ pdf.cell(styles['table']['cell_width'], 10, content,
170
+ border=styles['td']['border'], align=styles['td']['align'])
171
+ pdf.ln()
172
+
173
+ # Handle list items
174
+ elif line.startswith('<li>'):
175
+ pdf.set_font(style="")
176
+ pdf.cell(10, 10, '•', border=0)
177
+ pdf.multi_cell(0, 10, line[4:-5].strip())
178
+
179
+ # Handle regular text
180
+ elif line.startswith('<p>'):
181
+ pdf.set_font(style="")
182
+ pdf.set_text_color(0, 0, 0)
183
+ pdf.multi_cell(0, 10, line[3:-4].strip())
184
  pdf.ln(5)
185
 
186
+ # Create BytesIO buffer with UTF-8 encoding
187
  pdf_buffer = BytesIO()
188
+ pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
189
+ pdf_buffer.write(pdf_output)
 
190
  pdf_buffer.seek(0)
191
 
192
  return pdf_buffer
193
 
 
194
  def main():
195
  st.title("Quantitlytix AI")
196
  st.markdown("*Bank Statement Parser & Financial Report Generator*")
 
218
 
219
  for index, uploaded_file in enumerate(uploaded_files):
220
  # Update progress bar and status text
221
+ progress = (index + 1) / total_files
222
  progress_bar.progress(progress)
223
  status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
224
 
 
232
  if json_response:
233
  start_idx = json_response.find('{')
234
  end_idx = json_response.rfind('}') + 1
235
+ if start_idx == -1 or end_idx == 0:
236
  st.warning(f"Invalid JSON response for {uploaded_file.name}.")
237
  continue
238
  json_str = json_response[start_idx:end_idx]
 
324
  st.markdown(report_text)
325
 
326
  # Create PDF from markdown
327
+ try:
328
+ pdf_buffer = create_pdf_report(report_text)
329
+ st.download_button(
330
+ label="Download Financial Report as PDF",
331
+ data=pdf_buffer.getvalue(),
332
+ file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
333
+ mime="application/pdf"
334
+ )
335
+ except Exception as e:
336
+ st.error(f"Error generating PDF: {str(e)}")
337
+ st.info("For better PDF generation, please ensure NotoSans fonts are installed in the same directory.")
338
  except exceptions.ServiceUnavailable as e:
339
  if e.response.status_code == 504:
340
  st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
 
344
  st.error(f"Error generating financial report: {str(e)}")
345
  if "504" in str(e):
346
  st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
347
+ elif len(filtered_transactions) > 500:
348
+ st.info("For large datasets, consider generating reports for smaller time periods.")
349
 
350
  if __name__ == "__main__":
351
  main()