Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

261be6e

verified ·

1 Parent(s): ea691b1

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -53

app.py CHANGED Viewed

@@ -10,20 +10,20 @@ import streamlit as st
 import google.generativeai as genai
 import pypdf
 from fpdf import FPDF
-from google.api_core import exceptions
 import markdown
-from markdown.extensions.tables import TableExtension
 # Configure API key for Gemini
-api_key = os.environ.get('Gemini')
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
-    return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
 def configure_gemini1(api_key):
     genai.configure(api_key=api_key)
-    return genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
 # Read PDF content from a file-like object (from Streamlit uploader)
 def read_pdf(file_obj):
@@ -64,7 +64,7 @@ def process_with_gemini(model, text):
     }"""
     try:
         response = model.generate_content([prompt, text])
-        time.sleep(6)  # Sleep for 8 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
@@ -77,7 +77,7 @@ def process_with_gemini(model, text):
 def generate_financial_report(model, json_data, start_date, end_date, statement_type):
     prompt = f"""Based on the following transactions JSON data:
 {json.dumps(json_data)}
-Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to Zimbabwe, but with improved readability and visual appeal.
 Specific Formatting and Content Requirements:
@@ -103,57 +103,94 @@ Do not name the company if name is not there and return just the report and noth
         else:
             raise
-# Create a PDF file from the markdown report text
 def create_pdf_report(report_text):
     pdf = FPDF()
     pdf.add_page()
-    pdf.set_font("Arial", "B", 16)
-    # Convert markdown to HTML
-    html = markdown.markdown(report_text, extensions=[TableExtension()])
-    # Process the HTML to extract content with some basic formatting
-    # This is a simplified approach - for complete markdown to PDF, consider using other libraries
-    # Handle headers
-    lines = report_text.split('\n')
-    for line in lines:
         # Handle headers
-        if line.startswith('# '):
-            pdf.set_font("Arial", "B", 18)
-            pdf.cell(0, 10, line[2:], 0, 1)
             pdf.ln(5)
-        elif line.startswith('## '):
-            pdf.set_font("Arial", "B", 16)
-            pdf.cell(0, 10, line[3:], 0, 1)
             pdf.ln(3)
-        elif line.startswith('### '):
-            pdf.set_font("Arial", "B", 14)
-            pdf.cell(0, 10, line[4:], 0, 1)
-            pdf.ln(3)
-        # Handle bullet points
-        elif line.startswith('* ') or line.startswith('- '):
-            pdf.set_font("Arial", "", 12)
-            pdf.cell(10, 10, "•", 0, 0)
-            pdf.multi_cell(0, 10, line[2:])
-        # Handle normal text
-        elif line.strip():
-            pdf.set_font("Arial", "", 12)
-            pdf.multi_cell(0, 10, line)
-        # Handle empty lines
-        else:
             pdf.ln(5)
-    # Create BytesIO object
     pdf_buffer = BytesIO()
-    # Write the PDF to the buffer
-    pdf_buffer.write(pdf.output(dest='S').encode('latin-1'))
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
@@ -181,7 +218,7 @@ def main():
                 for index, uploaded_file in enumerate(uploaded_files):
                     # Update progress bar and status text
-                    progress = (index) / total_files
                     progress_bar.progress(progress)
                     status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
@@ -195,7 +232,7 @@ def main():
                         if json_response:
                             start_idx = json_response.find('{')
                             end_idx = json_response.rfind('}') + 1
-                            if start_idx == -1 or end_idx == -1:
                                 st.warning(f"Invalid JSON response for {uploaded_file.name}.")
                                 continue
                             json_str = json_response[start_idx:end_idx]
@@ -287,13 +324,17 @@ def main():
                             st.markdown(report_text)
                             # Create PDF from markdown
-                            pdf_buffer = create_pdf_report(report_text)
-                            st.download_button(
-                                label="Download Financial Report as PDF",
-                                data=pdf_buffer.getvalue(),  # Use getvalue() to get bytes from BytesIO
-                                file_name=f"{statement_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
-                                mime="application/pdf"
-                            )
                 except exceptions.ServiceUnavailable as e:
                     if e.response.status_code == 504:
                         st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
@@ -303,8 +344,8 @@ def main():
                     st.error(f"Error generating financial report: {str(e)}")
                     if "504" in str(e):
                         st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
-                    elif len(filtered_transactions) > 500: # Example threshold, adjust as needed
-                        st.info("For very large datasets, consider generating reports for smaller time periods and combining them manually if a single comprehensive report fails.")
 if __name__ == "__main__":
     main()

 import google.generativeai as genai
 import pypdf
 from fpdf import FPDF
+from fpdf.enums import XPos, YPos
 import markdown
+from google.api_core import exceptions
 # Configure API key for Gemini
+api_key = os.environ.get('GEMINI_API_KEY')
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
+    return genai.GenerativeModel('gemini-1.5-pro-latest')
 def configure_gemini1(api_key):
     genai.configure(api_key=api_key)
+    return genai.GenerativeModel('gemini-1.5-pro-latest')
 # Read PDF content from a file-like object (from Streamlit uploader)
 def read_pdf(file_obj):
     }"""
     try:
         response = model.generate_content([prompt, text])
+        time.sleep(6)  # Sleep for 6 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
 def generate_financial_report(model, json_data, start_date, end_date, statement_type):
     prompt = f"""Based on the following transactions JSON data:
 {json.dumps(json_data)}
+Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to South Africa, but with improved readability and visual appeal.
 Specific Formatting and Content Requirements:
         else:
             raise
 def create_pdf_report(report_text):
+    """Create PDF from markdown text with proper Unicode support"""
+    # Convert markdown to HTML
+    html_content = markdown.markdown(report_text, extensions=['tables'])
+    # Create PDF with better UTF-8 support
     pdf = FPDF()
     pdf.add_page()
+    # Add Noto Sans fonts (must be available in the same directory)
+    try:
+        pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
+        pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
+        pdf.set_font("NotoSans", size=12)
+    except:
+        # Fallback to built-in fonts if Noto Sans not available
+        pdf.set_font("Arial", size=12)
+    # Basic styling
+    styles = {
+        'h1': {'size': 24, 'color': (25, 25, 112)},  # MidnightBlue
+        'h2': {'size': 20, 'color': (25, 25, 112)},
+        'h3': {'size': 16, 'color': (25, 25, 112)},
+        'table': {'cell_width': 40, 'header_color': (245, 245, 245)},
+        'th': {'border': 1, 'align': 'L', 'fill': True},
+        'td': {'border': 1, 'align': 'L'}
+    }
+    # Parse HTML content
+    in_table = False
+    for line in html_content.split('\n'):
+        line = line.strip()
         # Handle headers
+        if line.startswith('<h1>'):
+            pdf.set_font(style="B", size=styles['h1']['size'])
+            pdf.set_text_color(*styles['h1']['color'])
+            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
             pdf.ln(5)
+        elif line.startswith('<h2>'):
+            pdf.set_font(style="B", size=styles['h2']['size'])
+            pdf.set_text_color(*styles['h2']['color'])
+            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
             pdf.ln(3)
+        elif line.startswith('<h3>'):
+            pdf.set_font(style="B", size=styles['h3']['size'])
+            pdf.set_text_color(*styles['h3']['color'])
+            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
+            pdf.ln(2)
+        # Handle tables
+        elif line.startswith('<table>'):
+            in_table = True
+            col_count = line.count('<th>')  # Simple column count
+        elif line.startswith('</table>'):
+            in_table = False
+            pdf.ln(10)
+        elif in_table:
+            if line.startswith('<tr>'):
+                pdf.set_font(style="B" if '<th>' in line else "")
+                cells = line.replace('<tr>','').replace('</tr>','').split('</td>')[:-1]
+                for cell in cells:
+                    content = cell.replace('<td>','').replace('<th>','').strip()
+                    pdf.cell(styles['table']['cell_width'], 10, content,
+                            border=styles['td']['border'], align=styles['td']['align'])
+                pdf.ln()
+        # Handle list items
+        elif line.startswith('<li>'):
+            pdf.set_font(style="")
+            pdf.cell(10, 10, '•', border=0)
+            pdf.multi_cell(0, 10, line[4:-5].strip())
+        # Handle regular text
+        elif line.startswith('<p>'):
+            pdf.set_font(style="")
+            pdf.set_text_color(0, 0, 0)
+            pdf.multi_cell(0, 10, line[3:-4].strip())
             pdf.ln(5)
+    # Create BytesIO buffer with UTF-8 encoding
     pdf_buffer = BytesIO()
+    pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
+    pdf_buffer.write(pdf_output)
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
                 for index, uploaded_file in enumerate(uploaded_files):
                     # Update progress bar and status text
+                    progress = (index + 1) / total_files
                     progress_bar.progress(progress)
                     status_text.text(f"Processing file {index+1} of {total_files}: {uploaded_file.name}")
                         if json_response:
                             start_idx = json_response.find('{')
                             end_idx = json_response.rfind('}') + 1
+                            if start_idx == -1 or end_idx == 0:
                                 st.warning(f"Invalid JSON response for {uploaded_file.name}.")
                                 continue
                             json_str = json_response[start_idx:end_idx]
                             st.markdown(report_text)
                             # Create PDF from markdown
+                            try:
+                                pdf_buffer = create_pdf_report(report_text)
+                                st.download_button(
+                                    label="Download Financial Report as PDF",
+                                    data=pdf_buffer.getvalue(),
+                                    file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
+                                    mime="application/pdf"
+                                )
+                            except Exception as e:
+                                st.error(f"Error generating PDF: {str(e)}")
+                                st.info("For better PDF generation, please ensure NotoSans fonts are installed in the same directory.")
                 except exceptions.ServiceUnavailable as e:
                     if e.response.status_code == 504:
                         st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
                     st.error(f"Error generating financial report: {str(e)}")
                     if "504" in str(e):
                         st.info("The Gemini API might be overloaded. Consider reducing the time period for the report.")
+                    elif len(filtered_transactions) > 500:
+                        st.info("For large datasets, consider generating reports for smaller time periods.")
 if __name__ == "__main__":
     main()