Documents-Manager

Sleeping

App Files Files Community

rairo commited on Jul 20, 2025

Commit

91c8199

verified ·

1 Parent(s): 4ed1df1

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -38

app.py CHANGED Viewed

@@ -19,24 +19,31 @@ from html_to_markdown import convert_to_markdown
 api_key = os.getenv('Gemini')
 def configure_gemini(api_key):
     genai.configure(api_key=api_key)
     return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
 def configure_gemini1(api_key):
     genai.configure(api_key=api_key)
     return genai.GenerativeModel('gemini-2.5-flash')
 # Read PDF content page by page from a file-like object
 def read_pdf_pages(file_obj):
     file_obj.seek(0)  # Ensure the file pointer is at the start
     pdf_reader = pypdf.PdfReader(file_obj)
     total_pages = len(pdf_reader.pages)
     return pdf_reader, total_pages
 # Extract text from a specific page
 def extract_page_text(pdf_reader, page_num):
     if page_num < len(pdf_reader.pages):
         text = pdf_reader.pages[page_num].extract_text()
         return text if text else ""
     return ""
@@ -67,19 +74,26 @@ def process_with_gemini(model, text):
         ]
     }"""
     try:
         response = model.generate_content([prompt, text])
         time.sleep(6)  # Sleep for 6 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
             st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
             return None
         else:
             raise
 # Process PDF page by page to handle large files
 def process_pdf_pages(model, pdf_reader, total_pages, progress_callback=None):
     all_transactions = []
     # Process pages individually or in small chunks
     for page_num in range(total_pages):
@@ -91,9 +105,11 @@ def process_pdf_pages(model, pdf_reader, total_pages, progress_callback=None):
         page_text = extract_page_text(pdf_reader, page_num)
         if not page_text.strip():
             continue  # Skip empty pages
         # Process the page with Gemini
         json_response = process_with_gemini(model, page_text)
         if json_response:
@@ -102,6 +118,7 @@ def process_pdf_pages(model, pdf_reader, total_pages, progress_callback=None):
             end_idx = json_response.rfind('}') + 1
             if start_idx == -1 or end_idx == 0:
                 continue  # Skip invalid JSON
             json_str = json_response[start_idx:end_idx]
@@ -110,16 +127,23 @@ def process_pdf_pages(model, pdf_reader, total_pages, progress_callback=None):
             try:
                 data = json.loads(json_str)
                 transactions = data.get('transactions', [])
-                # Add transactions to the overall list
-                all_transactions.extend(transactions)
             except json.JSONDecodeError:
                 continue  # Skip invalid JSON
     return all_transactions
 # Generate financial report from aggregated JSON transactions and chosen parameters
 def generate_financial_report(model, json_data, start_date, end_date, statement_type):
     prompt = f"""Based on the following transactions JSON data:
 {json.dumps(json_data)}
 Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to South Africa, but with improved readability and visual appeal.
@@ -137,8 +161,10 @@ Concise Summary: Provide a concluding summary paragraph that encapsulates the ov
 Format the report in Markdown for better visual structure.
 Do not name the company if name is not there and return just the report and nothing else."""
     try:
         response = model.generate_content([prompt])
         time.sleep(7)  # Sleep for 7 seconds to work around rate limit
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
@@ -146,7 +172,11 @@ Do not name the company if name is not there and return just the report and noth
             st.session_state['last_error'] = "504" # Store the error in session state
             return None
         else:
             raise
 # Install required libraries:
@@ -312,34 +342,38 @@ def create_pdf_report(report_text):
         Exception: If PDF generation fails.
     """
     if not report_text:
         raise ValueError("Input report_text cannot be empty.")
     try:
         # 1. Clean Markdown
         cleaned_md = re.sub(r'^```markdown\s*', '', report_text, flags=re.MULTILINE)
         cleaned_md = re.sub(r'\s*```$', '', cleaned_md, flags=re.MULTILINE)
         cleaned_md = cleaned_md.strip()
         # 2. Convert Markdown to HTML
         html_content = markdown.markdown(cleaned_md, extensions=['tables', 'fenced_code', 'sane_lists'])
         if not html_content:
             raise ValueError("Markdown parsing resulted in empty HTML.")
         # 3. Parse HTML with BeautifulSoup
         soup = BeautifulSoup(html_content, 'html.parser')
         # 4. Generate PDF using FPDF
         pdf = PDF_Generator()
         pdf.add_page()
         pdf.set_font('helvetica', '', 10) # Default font
         # Iterate through top-level tags in the HTML body
         for element in soup.find_all(recursive=False):
-             # Track basic nested styles like bold/italic
-             # This is very basic and might not handle complex nesting well
              styles = set()
              def traverse(tag, current_styles):
-                 # Check for styling tags
                  local_style_added = None
                  if tag.name in ['b', 'strong']:
                      current_styles.add('b')
@@ -348,32 +382,29 @@ def create_pdf_report(report_text):
                      current_styles.add('i')
                      local_style_added = 'i'
-                 # If it's a text node, process it with current styles (handled within add_html_element)
-                 # If it's a known block element, process it
                  if tag.name in ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'table', 'br', 'hr']:
-                      pdf.add_html_element(tag, current_styles.copy()) # Pass styles down
                  else:
-                     # Recursively process children for other tags (like span, div, or styling tags)
                      if hasattr(tag, 'contents'):
                          for child in tag.contents:
-                             if isinstance(child, str): # Handle text nodes directly if needed (often handled by parent)
-                                 pass # Usually text is grabbed by parent's get_text()
-                             elif hasattr(child, 'name'): # Check if it's a tag
-                                 traverse(child, current_styles.copy()) # Pass styles down
-                 # Remove local style after processing the tag and its children
                  if local_style_added and local_style_added in current_styles:
                      current_styles.remove(local_style_added)
              traverse(element, styles)
         # 5. Output PDF to BytesIO buffer
         pdf_output = pdf.output(dest='S') # Output as bytes string
         if isinstance(pdf_output, str):
              # If output is string (older fpdf versions?), encode it
              pdf_output = pdf_output.encode('latin-1')
         return BytesIO(pdf_output)
     except ImportError:
@@ -382,17 +413,13 @@ def create_pdf_report(report_text):
     except Exception as e:
         st.error(f"Failed to generate PDF locally using FPDF: {type(e).__name__}: {e}")
         st.exception(e) # Show traceback in streamlit logs
-        # Log intermediate steps if possible
-        # print("--- Cleaned Markdown ---")
-        # print(cleaned_md)
-        # print("--- Generated HTML ---")
-        # print(html_content)
         raise Exception(f"Local FPDF PDF generation failed: {e}") from e
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
     # Initialize session state for last error
     if 'last_error' not in st.session_state:
@@ -406,12 +433,14 @@ def main():
     # Sidebar: Select input type: Bulk PDF or CSV Upload
     input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
     all_transactions = []
     if input_type == "Bulk Bank Statement Upload":
         uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
         if uploaded_files:
             total_files = len(uploaded_files)
             st.write(f"{total_files} PDF file(s) uploaded.")
             try:
@@ -423,9 +452,10 @@ def main():
                 file_progress = 0
                 for file_index, uploaded_file in enumerate(uploaded_files):
                     # Update file progress
                     file_progress = (file_index) / total_files
-                    progress_bar.progress(file_progress)
                     status_text.text(f"Processing file {file_index+1} of {total_files}: {uploaded_file.name}")
                     # Get PDF reader and page count
@@ -433,7 +463,7 @@ def main():
                         pdf_reader, total_pages = read_pdf_pages(uploaded_file)
                         if total_pages == 0:
-                            st.warning(f"No pages found in {uploaded_file.name}.")
                             continue
                         with st.spinner(f"Processing {uploaded_file.name} ({total_pages} pages)..."):
@@ -445,6 +475,7 @@ def main():
                                 status_text.text(f"File {file_index+1}/{total_files}: {message}")
                             # Process the PDF page by page
                             file_transactions = process_pdf_pages(
                                 model,
                                 pdf_reader,
@@ -454,21 +485,26 @@ def main():
                             # Add transactions from this file to overall list
                             all_transactions.extend(file_transactions)
                     except Exception as e:
-                        st.error(f"Error processing {uploaded_file.name}: {str(e)}")
                         continue
                 # Complete the progress bar
                 progress_bar.progress(1.0)
                 status_text.text(f"Completed processing {total_files} files!")
             except Exception as e:
-                st.error(f"Error processing PDF documents: {str(e)}")
                 st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
     elif input_type == "CSV Upload":
         uploaded_csv = st.file_uploader("Upload CSV of transactions", type="csv")
         if uploaded_csv:
             try:
                 df = pd.read_csv(uploaded_csv)
                 # Drop 'Unnamed:' columns from the uploaded CSV
@@ -478,19 +514,21 @@ def main():
                 # Convert dataframe to list of transaction dictionaries
                 transactions = df.to_dict(orient='records')
                 all_transactions.extend(transactions)
             except Exception as e:
-                st.error(f"Error processing CSV file: {str(e)}")
     # If transactions are loaded, show DataFrame and update date ranges
     if all_transactions:
         df = pd.DataFrame(all_transactions)
         # Drop 'Unnamed:' columns from the final DataFrame
         if not df.empty:
             df = df.loc[:, ~df.columns.str.startswith('Unnamed:')]
             try:
                 # Process dates and extract min/max dates for date range inputs
                 df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce')
                 # Get min and max dates from transactions
@@ -501,12 +539,15 @@ def main():
                     # Update session state with actual transaction date range
                     st.session_state['min_date'] = min_date
                     st.session_state['max_date'] = max_date
                 # Format dates for display
                 df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')
             except Exception as e:
-                st.warning("Some data could not be formatted correctly.")
                 st.exception(e)
             st.success("Transactions loaded successfully!")
@@ -515,7 +556,7 @@ def main():
         else:
             st.warning("No valid transactions could be extracted from the documents.")
     else:
-        st.info("No transactions loaded yet.")
     # Financial report generation parameters
     st.write("### Generate Financial Report")
@@ -530,10 +571,12 @@ def main():
     statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
     if st.button("Generate Financial Report"):
         if not all_transactions:
-            st.error("No transactions available to generate report.")
         else:
             # Filter transactions by date
             filtered_transactions = []
             for transaction in all_transactions:
                 try:
@@ -541,19 +584,20 @@ def main():
                     if start_date <= transaction_date <= end_date:
                         filtered_transactions.append(transaction)
                 except (ValueError, TypeError):
-                    st.warning(f"Could not parse date for transaction: {transaction}")
                     continue
             if not filtered_transactions:
-                st.warning("No transactions found within the selected date range.")
             else:
                 try:
                     model1 = configure_gemini1(api_key)
                     combined_json = {"transactions": filtered_transactions}
                     with st.spinner("Generating financial report..."):
                         report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
                         if report_text:
-                            st.success("Financial report generated!")
                             # Display the report as markdown
                             st.markdown("### Financial Report Preview")
@@ -561,6 +605,7 @@ def main():
                             # Create PDF from markdown
                             try:
                                 pdf_buffer = create_pdf_report(report_text)
                                 st.download_button(
                                     label="Download Financial Report as PDF",
@@ -568,20 +613,24 @@ def main():
                                     file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
                                     mime="application/pdf"
                                 )
                             except Exception as e:
-                                st.error(f"Error generating PDF: {str(e)}")
                                 st.info("For better PDF generation, please ensure NotoSans fonts are installed in the same directory.")
                 except exceptions.ServiceUnavailable as e:
                     if e.response.status_code == 504:
                         st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
                     else:
-                        st.error(f"Error generating financial report: {str(e)}")
                 except Exception as e:
-                    st.error(f"Error generating financial report: {str(e)}")
                     if "504" in str(e):
                         st.info("The Gemini API might be overloaded. Consider generating reports for smaller time periods.")
                     elif len(filtered_transactions) > 500:
                         st.info("For large datasets, consider generating reports for smaller time periods.")
 if __name__ == "__main__":
     main()

 api_key = os.getenv('Gemini')
 def configure_gemini(api_key):
+    st.info("Configuring Gemini API for transaction extraction...") # Log
     genai.configure(api_key=api_key)
     return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
 def configure_gemini1(api_key):
+    st.info("Configuring Gemini API for report generation...") # Log
     genai.configure(api_key=api_key)
     return genai.GenerativeModel('gemini-2.5-flash')
 # Read PDF content page by page from a file-like object
 def read_pdf_pages(file_obj):
+    st.info(f"Reading PDF pages from {file_obj.name}...") # Log
     file_obj.seek(0)  # Ensure the file pointer is at the start
     pdf_reader = pypdf.PdfReader(file_obj)
     total_pages = len(pdf_reader.pages)
+    st.info(f"Found {total_pages} pages in PDF.") # Log
     return pdf_reader, total_pages
 # Extract text from a specific page
 def extract_page_text(pdf_reader, page_num):
+    # st.debug(f"Extracting text from page {page_num + 1}...") # Too verbose for general logging
     if page_num < len(pdf_reader.pages):
         text = pdf_reader.pages[page_num].extract_text()
+        if not text.strip():
+            st.warning(f"Page {page_num + 1} appears to be empty or contains no extractable text.") # Log empty pages
         return text if text else ""
     return ""
         ]
     }"""
     try:
+        # st.debug("Sending text chunk to Gemini for transaction extraction...") # Too verbose
         response = model.generate_content([prompt, text])
         time.sleep(6)  # Sleep for 6 seconds to work around rate limit
+        # st.debug("Received response from Gemini for transaction extraction.") # Too verbose
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
             st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
             return None
         else:
+            st.error(f"Gemini API error during transaction extraction: {e}") # Log other API errors
             raise
+    except Exception as e:
+        st.error(f"An unexpected error occurred during Gemini transaction extraction: {e}") # Catch other potential errors
+        return None
 # Process PDF page by page to handle large files
 def process_pdf_pages(model, pdf_reader, total_pages, progress_callback=None):
     all_transactions = []
+    st.info(f"Starting page-by-page PDF processing for {total_pages} pages...") # Log
     # Process pages individually or in small chunks
     for page_num in range(total_pages):
         page_text = extract_page_text(pdf_reader, page_num)
         if not page_text.strip():
+            st.warning(f"Skipping empty or unreadable page {page_num + 1}.") # Log skipped pages
             continue  # Skip empty pages
         # Process the page with Gemini
+        st.info(f"Sending page {page_num + 1} text to Gemini for transaction extraction...") # Log
         json_response = process_with_gemini(model, page_text)
         if json_response:
             end_idx = json_response.rfind('}') + 1
             if start_idx == -1 or end_idx == 0:
+                st.warning(f"No valid JSON found in Gemini response for page {page_num + 1}. Raw response: {json_response[:200]}...") # Log invalid JSON structure
                 continue  # Skip invalid JSON
             json_str = json_response[start_idx:end_idx]
             try:
                 data = json.loads(json_str)
                 transactions = data.get('transactions', [])
+                if transactions:
+                    st.info(f"Successfully extracted {len(transactions)} transactions from page {page_num + 1}.") # Log successful extraction
+                    all_transactions.extend(transactions)
+                else:
+                    st.info(f"No transactions found on page {page_num + 1} based on Gemini's analysis.") # Log no transactions found on page
             except json.JSONDecodeError:
+                st.error(f"Failed to decode JSON from Gemini response for page {page_num + 1}. Check response format. Raw JSON snippet: {json_str[:200]}...") # Log JSON decode errors
                 continue  # Skip invalid JSON
+        else:
+            st.warning(f"Gemini returned no response for page {page_num + 1}. This page's transactions might be missing.") # Log no response from Gemini
+    st.info(f"Finished processing all pages. Total transactions extracted: {len(all_transactions)}.") # Final log for extraction
     return all_transactions
 # Generate financial report from aggregated JSON transactions and chosen parameters
 def generate_financial_report(model, json_data, start_date, end_date, statement_type):
+    st.info(f"Preparing prompt for Gemini to generate {statement_type} report from {start_date} to {end_date}...") # Log
     prompt = f"""Based on the following transactions JSON data:
 {json.dumps(json_data)}
 Generate a detailed {statement_type} report for the period from {start_date.strftime('%d/%m/%Y')} to {end_date.strftime('%d/%m/%Y')}. Present the report in a standard accounting format relevant to South Africa, but with improved readability and visual appeal.
 Format the report in Markdown for better visual structure.
 Do not name the company if name is not there and return just the report and nothing else."""
     try:
+        st.info("Sending request to Gemini for financial report generation...") # Log
         response = model.generate_content([prompt])
         time.sleep(7)  # Sleep for 7 seconds to work around rate limit
+        st.success("Successfully received financial report from Gemini.") # Log success
         return response.text
     except exceptions.ServiceUnavailable as e:
         if e.response.status_code == 504:
             st.session_state['last_error'] = "504" # Store the error in session state
             return None
         else:
+            st.error(f"Gemini API error during report generation: {e}") # Log other API errors
             raise
+    except Exception as e:
+        st.error(f"An unexpected error occurred during Gemini report generation: {e}") # Catch other potential errors
+        return None
 # Install required libraries:
         Exception: If PDF generation fails.
     """
     if not report_text:
+        st.warning("Report text is empty, skipping PDF generation.") # Log
         raise ValueError("Input report_text cannot be empty.")
     try:
+        st.info("Starting PDF generation from markdown report...") # Log
         # 1. Clean Markdown
         cleaned_md = re.sub(r'^```markdown\s*', '', report_text, flags=re.MULTILINE)
         cleaned_md = re.sub(r'\s*```$', '', cleaned_md, flags=re.MULTILINE)
         cleaned_md = cleaned_md.strip()
+        # st.debug("Markdown cleaned.") # Too verbose
         # 2. Convert Markdown to HTML
         html_content = markdown.markdown(cleaned_md, extensions=['tables', 'fenced_code', 'sane_lists'])
         if not html_content:
+            st.error("Markdown parsing resulted in empty HTML.") # Log
             raise ValueError("Markdown parsing resulted in empty HTML.")
+        # st.debug("Markdown converted to HTML.") # Too verbose
         # 3. Parse HTML with BeautifulSoup
         soup = BeautifulSoup(html_content, 'html.parser')
+        # st.debug("HTML parsed with BeautifulSoup.") # Too verbose
         # 4. Generate PDF using FPDF
         pdf = PDF_Generator()
         pdf.add_page()
         pdf.set_font('helvetica', '', 10) # Default font
+        st.info("PDF document initialized, adding content...") # Log
         # Iterate through top-level tags in the HTML body
         for element in soup.find_all(recursive=False):
              styles = set()
              def traverse(tag, current_styles):
                  local_style_added = None
                  if tag.name in ['b', 'strong']:
                      current_styles.add('b')
                      current_styles.add('i')
                      local_style_added = 'i'
                  if tag.name in ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'table', 'br', 'hr']:
+                      pdf.add_html_element(tag, current_styles.copy())
                  else:
                      if hasattr(tag, 'contents'):
                          for child in tag.contents:
+                             if isinstance(child, str):
+                                 pass
+                             elif hasattr(child, 'name'):
+                                 traverse(child, current_styles.copy())
                  if local_style_added and local_style_added in current_styles:
                      current_styles.remove(local_style_added)
              traverse(element, styles)
+        st.info("Content added to PDF. Outputting PDF to buffer...") # Log
         # 5. Output PDF to BytesIO buffer
         pdf_output = pdf.output(dest='S') # Output as bytes string
         if isinstance(pdf_output, str):
              # If output is string (older fpdf versions?), encode it
              pdf_output = pdf_output.encode('latin-1')
+        st.success("PDF report generated successfully.") # Log success
         return BytesIO(pdf_output)
     except ImportError:
     except Exception as e:
         st.error(f"Failed to generate PDF locally using FPDF: {type(e).__name__}: {e}")
         st.exception(e) # Show traceback in streamlit logs
         raise Exception(f"Local FPDF PDF generation failed: {e}") from e
 def main():
     st.title("Quantitlytix AI")
     st.markdown("*Bank Statement Parser & Financial Report Generator*")
+    st.info("Application started. Ready for user input.") # Log app start
     # Initialize session state for last error
     if 'last_error' not in st.session_state:
     # Sidebar: Select input type: Bulk PDF or CSV Upload
     input_type = st.sidebar.radio("Select Input Type", ("Bulk Bank Statement Upload", "CSV Upload"))
+    st.info(f"Input type selected: {input_type}") # Log input type
     all_transactions = []
     if input_type == "Bulk Bank Statement Upload":
         uploaded_files = st.file_uploader("Upload PDF bank statements", type="pdf", accept_multiple_files=True)
         if uploaded_files:
+            st.info(f"User uploaded {len(uploaded_files)} PDF file(s).") # Log file upload
             total_files = len(uploaded_files)
             st.write(f"{total_files} PDF file(s) uploaded.")
             try:
                 file_progress = 0
                 for file_index, uploaded_file in enumerate(uploaded_files):
+                    st.info(f"Starting processing for file {file_index+1}/{total_files}: {uploaded_file.name}") # Log individual file start
                     # Update file progress
                     file_progress = (file_index) / total_files
+                    progress_bar.progress(overall_progress) # Corrected variable name
                     status_text.text(f"Processing file {file_index+1} of {total_files}: {uploaded_file.name}")
                     # Get PDF reader and page count
                         pdf_reader, total_pages = read_pdf_pages(uploaded_file)
                         if total_pages == 0:
+                            st.warning(f"No pages found in {uploaded_file.name}. Skipping file.") # Log
                             continue
                         with st.spinner(f"Processing {uploaded_file.name} ({total_pages} pages)..."):
                                 status_text.text(f"File {file_index+1}/{total_files}: {message}")
                             # Process the PDF page by page
+                            st.info(f"Calling process_pdf_pages for {uploaded_file.name}...") # Log
                             file_transactions = process_pdf_pages(
                                 model,
                                 pdf_reader,
                             # Add transactions from this file to overall list
                             all_transactions.extend(file_transactions)
+                            st.info(f"Finished processing {uploaded_file.name}. Extracted {len(file_transactions)} transactions.") # Log file completion
                     except Exception as e:
+                        st.error(f"Error processing {uploaded_file.name}: {str(e)}") # Log specific file error
+                        st.exception(e) # Show traceback
                         continue
                 # Complete the progress bar
                 progress_bar.progress(1.0)
                 status_text.text(f"Completed processing {total_files} files!")
+                st.success(f"All PDF files processed. Total transactions collected: {len(all_transactions)}.") # Log overall completion
             except Exception as e:
+                st.error(f"Overall error during PDF document processing: {str(e)}") # Log general error during PDF handling
                 st.error("Please ensure you're using valid bank statement PDFs and a valid API key")
+                st.exception(e) # Show traceback
     elif input_type == "CSV Upload":
         uploaded_csv = st.file_uploader("Upload CSV of transactions", type="csv")
         if uploaded_csv:
+            st.info(f"User uploaded CSV file: {uploaded_csv.name}.") # Log
             try:
                 df = pd.read_csv(uploaded_csv)
                 # Drop 'Unnamed:' columns from the uploaded CSV
                 # Convert dataframe to list of transaction dictionaries
                 transactions = df.to_dict(orient='records')
                 all_transactions.extend(transactions)
+                st.success(f"Successfully loaded {len(transactions)} transactions from CSV.") # Log
             except Exception as e:
+                st.error(f"Error processing CSV file: {str(e)}") # Log CSV error
+                st.exception(e)
     # If transactions are loaded, show DataFrame and update date ranges
     if all_transactions:
+        st.info("Consolidating and displaying all extracted transactions.") # Log
         df = pd.DataFrame(all_transactions)
         # Drop 'Unnamed:' columns from the final DataFrame
         if not df.empty:
             df = df.loc[:, ~df.columns.str.startswith('Unnamed:')]
             try:
                 # Process dates and extract min/max dates for date range inputs
+                st.info("Parsing transaction dates and determining date range.") # Log
                 df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y', errors='coerce')
                 # Get min and max dates from transactions
                     # Update session state with actual transaction date range
                     st.session_state['min_date'] = min_date
                     st.session_state['max_date'] = max_date
+                    st.info(f"Determined transaction date range: {min_date} to {max_date}.") # Log
+                else:
+                    st.warning("Could not determine valid date range from transactions. Using default dates.") # Log
                 # Format dates for display
                 df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')
             except Exception as e:
+                st.warning("Some transaction dates could not be formatted correctly.")
                 st.exception(e)
             st.success("Transactions loaded successfully!")
         else:
             st.warning("No valid transactions could be extracted from the documents.")
     else:
+        st.info("No transactions loaded yet. Upload files to begin.") # Initial state log
     # Financial report generation parameters
     st.write("### Generate Financial Report")
     statement_type = st.selectbox("Select Financial Statement", ["Income Statement", "Cashflow Statement", "Balance Sheet"])
     if st.button("Generate Financial Report"):
+        st.info(f"User clicked 'Generate Financial Report' for {statement_type} from {start_date} to {end_date}.") # Log button click
         if not all_transactions:
+            st.error("No transactions available to generate report. Please upload files first.") # Log
         else:
             # Filter transactions by date
+            st.info(f"Filtering {len(all_transactions)} transactions for the period {start_date} to {end_date}...") # Log filtering
             filtered_transactions = []
             for transaction in all_transactions:
                 try:
                     if start_date <= transaction_date <= end_date:
                         filtered_transactions.append(transaction)
                 except (ValueError, TypeError):
+                    st.warning(f"Could not parse date for transaction, skipping: {transaction}") # Log problematic transactions
                     continue
             if not filtered_transactions:
+                st.warning("No transactions found within the selected date range. Please adjust dates or upload relevant files.") # Log
             else:
+                st.info(f"Found {len(filtered_transactions)} transactions within the selected date range.") # Log filtered count
                 try:
                     model1 = configure_gemini1(api_key)
                     combined_json = {"transactions": filtered_transactions}
                     with st.spinner("Generating financial report..."):
                         report_text = generate_financial_report(model1, combined_json, start_date, end_date, statement_type)
                         if report_text:
+                            st.success("Financial report generated successfully by Gemini!") # Log report text ready
                             # Display the report as markdown
                             st.markdown("### Financial Report Preview")
                             # Create PDF from markdown
                             try:
+                                st.info("Attempting to generate PDF from the report markdown.") # Log PDF start
                                 pdf_buffer = create_pdf_report(report_text)
                                 st.download_button(
                                     label="Download Financial Report as PDF",
                                     file_name=f"{statement_type.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf",
                                     mime="application/pdf"
                                 )
+                                st.success("PDF download button enabled.") # Log
                             except Exception as e:
+                                st.error(f"Error generating PDF for download: {str(e)}") # Log PDF error
                                 st.info("For better PDF generation, please ensure NotoSans fonts are installed in the same directory.")
+                                st.exception(e) # Show traceback
                 except exceptions.ServiceUnavailable as e:
                     if e.response.status_code == 504:
                         st.error("Error generating report: Gemini API timed out (504). Please try reducing the time period for the report.")
                     else:
+                        st.error(f"Error generating financial report due to Gemini API issue: {str(e)}") # Log API error
+                        st.exception(e) # Show traceback
                 except Exception as e:
+                    st.error(f"An unexpected error occurred while generating the financial report: {str(e)}") # Log general error
                     if "504" in str(e):
                         st.info("The Gemini API might be overloaded. Consider generating reports for smaller time periods.")
                     elif len(filtered_transactions) > 500:
                         st.info("For large datasets, consider generating reports for smaller time periods.")
+                    st.exception(e) # Show traceback
 if __name__ == "__main__":
     main()