Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

5e521b9

verified ·

1 Parent(s): 1de63e3

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -49

app.py CHANGED Viewed

@@ -104,88 +104,175 @@ Do not name the company if name is not there and return just the report and noth
             raise
 def create_pdf_report(report_text):
-    """Create PDF from markdown text with proper Unicode support"""
-    # Convert markdown to HTML
     html_content = markdown.markdown(report_text, extensions=['tables'])
-    # Create PDF with better UTF-8 support
     pdf = FPDF()
     pdf.add_page()
-    # Add Noto Sans fonts (must be available in the same directory)
     try:
         pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
         pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
-        pdf.set_font("NotoSans", size=12)
-    except:
-        # Fallback to built-in fonts if Noto Sans not available
-        pdf.set_font("Arial", size=12)
-    # Basic styling
     styles = {
-        'h1': {'size': 24, 'color': (25, 25, 112)},  # MidnightBlue
-        'h2': {'size': 20, 'color': (25, 25, 112)},
-        'h3': {'size': 16, 'color': (25, 25, 112)},
-        'table': {'cell_width': 40, 'header_color': (245, 245, 245)},
-        'th': {'border': 1, 'align': 'L', 'fill': True},
-        'td': {'border': 1, 'align': 'L'}
     }
     # Parse HTML content
     in_table = False
     for line in html_content.split('\n'):
         line = line.strip()
         # Handle headers
-        if line.startswith('<h1>'):
-            pdf.set_font(style="B", size=styles['h1']['size'])
             pdf.set_text_color(*styles['h1']['color'])
-            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
             pdf.ln(5)
         elif line.startswith('<h2>'):
-            pdf.set_font(style="B", size=styles['h2']['size'])
             pdf.set_text_color(*styles['h2']['color'])
-            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
             pdf.ln(3)
         elif line.startswith('<h3>'):
-            pdf.set_font(style="B", size=styles['h3']['size'])
             pdf.set_text_color(*styles['h3']['color'])
-            pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
             pdf.ln(2)
-        # Handle tables
-        elif line.startswith('<table>'):
-            in_table = True
-            col_count = line.count('<th>')  # Simple column count
-        elif line.startswith('</table>'):
-            in_table = False
-            pdf.ln(10)
-        elif in_table:
-            if line.startswith('<tr>'):
-                pdf.set_font(style="B" if '<th>' in line else "")
-                cells = line.replace('<tr>','').replace('</tr>','').split('</td>')[:-1]
-                for cell in cells:
-                    content = cell.replace('<td>','').replace('<th>','').strip()
-                    pdf.cell(styles['table']['cell_width'], 10, content,
-                            border=styles['td']['border'], align=styles['td']['align'])
-                pdf.ln()
         # Handle list items
         elif line.startswith('<li>'):
-            pdf.set_font(style="")
-            pdf.cell(10, 10, '•', border=0)
-            pdf.multi_cell(0, 10, line[4:-5].strip())
-        # Handle regular text
         elif line.startswith('<p>'):
-            pdf.set_font(style="")
             pdf.set_text_color(0, 0, 0)
-            pdf.multi_cell(0, 10, line[3:-4].strip())
-            pdf.ln(5)
-    # Create BytesIO buffer with UTF-8 encoding
     pdf_buffer = BytesIO()
-    pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
     pdf_buffer.write(pdf_output)
     pdf_buffer.seek(0)

             raise
 def create_pdf_report(report_text):
+    """Create PDF from markdown text with proper Unicode support and table handling
+    Args:
+        report_text (str): Markdown formatted report text
+    Returns:
+        BytesIO: PDF file in memory buffer
+    """
+    # Convert markdown to HTML with table support
     html_content = markdown.markdown(report_text, extensions=['tables'])
+    # Create PDF with proper configuration
     pdf = FPDF()
     pdf.add_page()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    # Configure fonts with fallbacks
     try:
+        # Try loading Noto Sans (must be in same directory)
         pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
         pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
+        base_font = "NotoSans"
+    except RuntimeError:
+        # Fallback to Arial if Noto Sans not available
+        base_font = "Arial"
+        if base_font not in pdf.fonts:
+            pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
+            pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
+    # Set default styles
     styles = {
+        'h1': {'size': 16, 'color': (25, 25, 112)},  # MidnightBlue
+        'h2': {'size': 14, 'color': (25, 25, 112)},
+        'h3': {'size': 12, 'color': (25, 25, 112)},
+        'body': {'size': 10},
+        'table': {
+            'cell_margin': 2,
+            'header_color': (245, 245, 245),  # Light gray
+            'row_height': 8,
+            'border': 1
+        }
     }
+    # Calculate available page width (considering margins)
+    effective_page_width = pdf.w - 2 * pdf.l_margin
+    def render_table_row(row_data, is_header=False):
+        """Helper to render a single table row with auto-sizing
+        Args:
+            row_data (list): List of cell contents
+            is_header (bool): Whether this is a header row
+        """
+        col_count = len(row_data)
+        col_width = effective_page_width / max(col_count, 1)  # Avoid division by zero
+        # Set font style for header vs body
+        pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
+        # Track starting position
+        start_y = pdf.y
+        # Find maximum number of lines needed for any cell in this row
+        max_lines = 1
+        for cell in row_data:
+            lines = pdf.multi_cell(
+                w=col_width,
+                h=styles['table']['row_height'],
+                txt=cell.strip(),
+                border=0,  # We'll draw borders manually
+                align='L',
+                fill=False,
+                split_only=True
+            )
+            max_lines = max(max_lines, len(lines))
+        # Calculate total row height needed
+        row_height = styles['table']['row_height'] * max_lines
+        # Draw each cell
+        for i, cell in enumerate(row_data):
+            # Position cursor for this cell
+            pdf.set_xy(pdf.l_margin + i * col_width, start_y)
+            # Draw cell with border and fill
+            pdf.multi_cell(
+                w=col_width,
+                h=styles['table']['row_height'],
+                txt=cell.strip(),
+                border=styles['table']['border'],
+                align='L',
+                fill=is_header,
+                max_line_height=styles['table']['row_height']
+            )
+        # Move to next line position
+        pdf.set_xy(pdf.l_margin, start_y + row_height)
     # Parse HTML content
+    current_table = []
     in_table = False
     for line in html_content.split('\n'):
         line = line.strip()
+        # Handle tables
+        if line.startswith('<table>'):
+            in_table = True
+            current_table = []
+        elif line.startswith('</table>'):
+            in_table = False
+            if current_table:
+                # Process header row first if exists
+                header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
+                if header:
+                    render_table_row(header, is_header=True)
+                    current_table = current_table[1:]  # Remove header from body rows
+                # Process body rows
+                for row in current_table:
+                    render_table_row(row)
+                pdf.ln(5)  # Add space after table
+            current_table = []
+        elif in_table and line.startswith('<tr>'):
+            # Clean and split cells
+            cells = []
+            for cell in line[4:-5].split('</td>')[:-1]:  # Split and remove empty last element
+                clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
+                cells.append(clean_cell)
+            current_table.append(cells)
         # Handle headers
+        elif line.startswith('<h1>'):
+            pdf.set_font(base_font, 'B', styles['h1']['size'])
             pdf.set_text_color(*styles['h1']['color'])
+            pdf.cell(0, 10, line[4:-5], ln=1)
             pdf.ln(5)
         elif line.startswith('<h2>'):
+            pdf.set_font(base_font, 'B', styles['h2']['size'])
             pdf.set_text_color(*styles['h2']['color'])
+            pdf.cell(0, 10, line[4:-5], ln=1)
             pdf.ln(3)
         elif line.startswith('<h3>'):
+            pdf.set_font(base_font, 'B', styles['h3']['size'])
             pdf.set_text_color(*styles['h3']['color'])
+            pdf.cell(0, 10, line[4:-5], ln=1)
             pdf.ln(2)
         # Handle list items
         elif line.startswith('<li>'):
+            pdf.set_font(base_font, '', styles['body']['size'])
+            pdf.set_text_color(0, 0, 0)
+            pdf.cell(10, 6, '•')
+            pdf.multi_cell(0, 6, line[4:-5].strip())
+        # Handle paragraphs
         elif line.startswith('<p>'):
+            pdf.set_font(base_font, '', styles['body']['size'])
             pdf.set_text_color(0, 0, 0)
+            pdf.multi_cell(0, 6, line[3:-4].strip())
+            pdf.ln(4)
+    # Create output buffer
     pdf_buffer = BytesIO()
+    try:
+        pdf_output = pdf.output(dest='S').encode('utf-8')
+    except UnicodeEncodeError:
+        pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
     pdf_buffer.write(pdf_output)
     pdf_buffer.seek(0)