Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

0b914c1

verified ·

1 Parent(s): 5e521b9

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -52

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from fpdf.enums import XPos, YPos
 import markdown
 from google.api_core import exceptions
 # Configure API key for Gemini
 api_key = os.getenv('Gemini')
@@ -81,8 +82,8 @@ Generate a detailed {statement_type} report for the period from {start_date.strf
 Specific Formatting and Content Requirements:
-Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
-Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
 Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
 Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
 Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
@@ -103,8 +104,10 @@ Do not name the company if name is not there and return just the report and noth
         else:
             raise
 def create_pdf_report(report_text):
-    """Create PDF from markdown text with proper Unicode support and table handling
     Args:
         report_text (str): Markdown formatted report text
@@ -115,80 +118,71 @@ def create_pdf_report(report_text):
     # Convert markdown to HTML with table support
     html_content = markdown.markdown(report_text, extensions=['tables'])
-    # Create PDF with proper configuration
     pdf = FPDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
-    # Configure fonts with fallbacks
     try:
-        # Try loading Noto Sans (must be in same directory)
         pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
         pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
         base_font = "NotoSans"
     except RuntimeError:
-        # Fallback to Arial if Noto Sans not available
         base_font = "Arial"
         if base_font not in pdf.fonts:
             pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
             pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
-    # Set default styles
     styles = {
-        'h1': {'size': 16, 'color': (25, 25, 112)},  # MidnightBlue
         'h2': {'size': 14, 'color': (25, 25, 112)},
         'h3': {'size': 12, 'color': (25, 25, 112)},
         'body': {'size': 10},
         'table': {
             'cell_margin': 2,
-            'header_color': (245, 245, 245),  # Light gray
             'row_height': 8,
             'border': 1
         }
     }
-    # Calculate available page width (considering margins)
     effective_page_width = pdf.w - 2 * pdf.l_margin
     def render_table_row(row_data, is_header=False):
-        """Helper to render a single table row with auto-sizing
-        Args:
-            row_data (list): List of cell contents
-            is_header (bool): Whether this is a header row
         """
         col_count = len(row_data)
-        col_width = effective_page_width / max(col_count, 1)  # Avoid division by zero
-        # Set font style for header vs body
         pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
-        # Track starting position
         start_y = pdf.y
-        # Find maximum number of lines needed for any cell in this row
         max_lines = 1
         for cell in row_data:
             lines = pdf.multi_cell(
                 w=col_width,
                 h=styles['table']['row_height'],
                 txt=cell.strip(),
-                border=0,  # We'll draw borders manually
                 align='L',
                 fill=False,
-                split_only=True
             )
             max_lines = max(max_lines, len(lines))
-        # Calculate total row height needed
         row_height = styles['table']['row_height'] * max_lines
-        # Draw each cell
         for i, cell in enumerate(row_data):
-            # Position cursor for this cell
             pdf.set_xy(pdf.l_margin + i * col_width, start_y)
-            # Draw cell with border and fill
             pdf.multi_cell(
                 w=col_width,
                 h=styles['table']['row_height'],
@@ -196,46 +190,41 @@ def create_pdf_report(report_text):
                 border=styles['table']['border'],
                 align='L',
                 fill=is_header,
-                max_line_height=styles['table']['row_height']
             )
-        # Move to next line position
         pdf.set_xy(pdf.l_margin, start_y + row_height)
-    # Parse HTML content
     current_table = []
     in_table = False
     for line in html_content.split('\n'):
         line = line.strip()
-        # Handle tables
         if line.startswith('<table>'):
             in_table = True
             current_table = []
         elif line.startswith('</table>'):
             in_table = False
             if current_table:
-                # Process header row first if exists
                 header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
                 if header:
                     render_table_row(header, is_header=True)
-                    current_table = current_table[1:]  # Remove header from body rows
-                # Process body rows
                 for row in current_table:
                     render_table_row(row)
-                pdf.ln(5)  # Add space after table
             current_table = []
         elif in_table and line.startswith('<tr>'):
-            # Clean and split cells
             cells = []
-            for cell in line[4:-5].split('</td>')[:-1]:  # Split and remove empty last element
                 clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
                 cells.append(clean_cell)
             current_table.append(cells)
-        # Handle headers
         elif line.startswith('<h1>'):
             pdf.set_font(base_font, 'B', styles['h1']['size'])
             pdf.set_text_color(*styles['h1']['color'])
@@ -251,22 +240,18 @@ def create_pdf_report(report_text):
             pdf.set_text_color(*styles['h3']['color'])
             pdf.cell(0, 10, line[4:-5], ln=1)
             pdf.ln(2)
-        # Handle list items
         elif line.startswith('<li>'):
             pdf.set_font(base_font, '', styles['body']['size'])
             pdf.set_text_color(0, 0, 0)
             pdf.cell(10, 6, '•')
-            pdf.multi_cell(0, 6, line[4:-5].strip())
-        # Handle paragraphs
         elif line.startswith('<p>'):
             pdf.set_font(base_font, '', styles['body']['size'])
             pdf.set_text_color(0, 0, 0)
-            pdf.multi_cell(0, 6, line[3:-4].strip())
             pdf.ln(4)
-    # Create output buffer
     pdf_buffer = BytesIO()
     try:
         pdf_output = pdf.output(dest='S').encode('utf-8')
@@ -275,7 +260,6 @@ def create_pdf_report(report_text):
     pdf_buffer.write(pdf_output)
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():

 import markdown
 from google.api_core import exceptions
 # Configure API key for Gemini
 api_key = os.getenv('Gemini')
 Specific Formatting and Content Requirements:
+Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, in nice tables considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
+Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
 Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
 Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
 Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
         else:
             raise
 def create_pdf_report(report_text):
+    """
+    Create a PDF from markdown text with proper Unicode support and table handling.
     Args:
         report_text (str): Markdown formatted report text
     # Convert markdown to HTML with table support
     html_content = markdown.markdown(report_text, extensions=['tables'])
+    # Create PDF and add first page
     pdf = FPDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
+    # Configure fonts with fallback: try NotoSans, otherwise use Arial.
     try:
         pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
         pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
         base_font = "NotoSans"
     except RuntimeError:
         base_font = "Arial"
         if base_font not in pdf.fonts:
             pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
             pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
+    # Define default styles
     styles = {
+        'h1': {'size': 16, 'color': (25, 25, 112)},
         'h2': {'size': 14, 'color': (25, 25, 112)},
         'h3': {'size': 12, 'color': (25, 25, 112)},
         'body': {'size': 10},
         'table': {
             'cell_margin': 2,
+            'header_color': (245, 245, 245),
             'row_height': 8,
             'border': 1
         }
     }
+    # Calculate available page width
     effective_page_width = pdf.w - 2 * pdf.l_margin
     def render_table_row(row_data, is_header=False):
+        """
+        Render a single table row, auto-sizing each cell.
         """
         col_count = len(row_data)
+        col_width = effective_page_width / max(col_count, 1)
+        # Set font: bold for header rows, normal otherwise.
         pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
         start_y = pdf.y
+        # First pass: compute maximum number of lines needed for any cell
         max_lines = 1
         for cell in row_data:
+            # We use split_only=True so that multi_cell returns the lines without printing.
             lines = pdf.multi_cell(
                 w=col_width,
                 h=styles['table']['row_height'],
                 txt=cell.strip(),
+                border=0,
                 align='L',
                 fill=False,
+                split_only=True,
+                new_x=XPos.LEFT
             )
             max_lines = max(max_lines, len(lines))
         row_height = styles['table']['row_height'] * max_lines
+        # Second pass: output each cell, resetting x to the left margin for each cell.
         for i, cell in enumerate(row_data):
             pdf.set_xy(pdf.l_margin + i * col_width, start_y)
             pdf.multi_cell(
                 w=col_width,
                 h=styles['table']['row_height'],
                 border=styles['table']['border'],
                 align='L',
                 fill=is_header,
+                max_line_height=styles['table']['row_height'],
+                new_x=XPos.LEFT
             )
+        # Move the cursor to the beginning of the next line
         pdf.set_xy(pdf.l_margin, start_y + row_height)
+    # Parse the HTML content line by line
     current_table = []
     in_table = False
     for line in html_content.split('\n'):
         line = line.strip()
         if line.startswith('<table>'):
             in_table = True
             current_table = []
         elif line.startswith('</table>'):
             in_table = False
             if current_table:
+                # Check if first row contains header cells
                 header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
                 if header:
                     render_table_row(header, is_header=True)
+                    current_table = current_table[1:]
                 for row in current_table:
                     render_table_row(row)
+                pdf.ln(5)
             current_table = []
         elif in_table and line.startswith('<tr>'):
             cells = []
+            # Remove the <tr> and </tr> tags and split cells on </td>
+            for cell in line[4:-5].split('</td>')[:-1]:
                 clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
                 cells.append(clean_cell)
             current_table.append(cells)
         elif line.startswith('<h1>'):
             pdf.set_font(base_font, 'B', styles['h1']['size'])
             pdf.set_text_color(*styles['h1']['color'])
             pdf.set_text_color(*styles['h3']['color'])
             pdf.cell(0, 10, line[4:-5], ln=1)
             pdf.ln(2)
         elif line.startswith('<li>'):
             pdf.set_font(base_font, '', styles['body']['size'])
             pdf.set_text_color(0, 0, 0)
             pdf.cell(10, 6, '•')
+            pdf.multi_cell(0, 6, line[4:-5].strip(), new_x=XPos.LEFT)
         elif line.startswith('<p>'):
             pdf.set_font(base_font, '', styles['body']['size'])
             pdf.set_text_color(0, 0, 0)
+            pdf.multi_cell(0, 6, line[3:-4].strip(), new_x=XPos.LEFT)
             pdf.ln(4)
+    # Output PDF to a bytes buffer
     pdf_buffer = BytesIO()
     try:
         pdf_output = pdf.output(dest='S').encode('utf-8')
     pdf_buffer.write(pdf_output)
     pdf_buffer.seek(0)
     return pdf_buffer
 def main():