Documents-Manager

Sleeping

App Files Files Community

rairo commited on Apr 1, 2025

Commit

1315a14

verified ·

1 Parent(s): 0b914c1

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -144

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import time
 from datetime import datetime, date, timedelta
 from io import BytesIO
 import pandas as pd
 import streamlit as st
 import google.generativeai as genai
@@ -105,9 +105,11 @@ Do not name the company if name is not there and return just the report and noth
             raise
 def create_pdf_report(report_text):
     """
-    Create a PDF from markdown text with proper Unicode support and table handling.
     Args:
         report_text (str): Markdown formatted report text
@@ -115,152 +117,39 @@ def create_pdf_report(report_text):
     Returns:
         BytesIO: PDF file in memory buffer
     """
-    # Convert markdown to HTML with table support
-    html_content = markdown.markdown(report_text, extensions=['tables'])
-    # Create PDF and add first page
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    # Configure fonts with fallback: try NotoSans, otherwise use Arial.
-    try:
-        pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
-        pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
-        base_font = "NotoSans"
-    except RuntimeError:
-        base_font = "Arial"
-        if base_font not in pdf.fonts:
-            pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
-            pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
-    # Define default styles
-    styles = {
-        'h1': {'size': 16, 'color': (25, 25, 112)},
-        'h2': {'size': 14, 'color': (25, 25, 112)},
-        'h3': {'size': 12, 'color': (25, 25, 112)},
-        'body': {'size': 10},
-        'table': {
-            'cell_margin': 2,
-            'header_color': (245, 245, 245),
-            'row_height': 8,
-            'border': 1
-        }
-    }
-    # Calculate available page width
-    effective_page_width = pdf.w - 2 * pdf.l_margin
-    def render_table_row(row_data, is_header=False):
-        """
-        Render a single table row, auto-sizing each cell.
-        """
-        col_count = len(row_data)
-        col_width = effective_page_width / max(col_count, 1)
-        # Set font: bold for header rows, normal otherwise.
-        pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
-        start_y = pdf.y
-        # First pass: compute maximum number of lines needed for any cell
-        max_lines = 1
-        for cell in row_data:
-            # We use split_only=True so that multi_cell returns the lines without printing.
-            lines = pdf.multi_cell(
-                w=col_width,
-                h=styles['table']['row_height'],
-                txt=cell.strip(),
-                border=0,
-                align='L',
-                fill=False,
-                split_only=True,
-                new_x=XPos.LEFT
-            )
-            max_lines = max(max_lines, len(lines))
-        row_height = styles['table']['row_height'] * max_lines
-        # Second pass: output each cell, resetting x to the left margin for each cell.
-        for i, cell in enumerate(row_data):
-            pdf.set_xy(pdf.l_margin + i * col_width, start_y)
-            pdf.multi_cell(
-                w=col_width,
-                h=styles['table']['row_height'],
-                txt=cell.strip(),
-                border=styles['table']['border'],
-                align='L',
-                fill=is_header,
-                max_line_height=styles['table']['row_height'],
-                new_x=XPos.LEFT
-            )
-        # Move the cursor to the beginning of the next line
-        pdf.set_xy(pdf.l_margin, start_y + row_height)
-    # Parse the HTML content line by line
-    current_table = []
-    in_table = False
-    for line in html_content.split('\n'):
-        line = line.strip()
-        if line.startswith('<table>'):
-            in_table = True
-            current_table = []
-        elif line.startswith('</table>'):
-            in_table = False
-            if current_table:
-                # Check if first row contains header cells
-                header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
-                if header:
-                    render_table_row(header, is_header=True)
-                    current_table = current_table[1:]
-                for row in current_table:
-                    render_table_row(row)
-                pdf.ln(5)
-            current_table = []
-        elif in_table and line.startswith('<tr>'):
-            cells = []
-            # Remove the <tr> and </tr> tags and split cells on </td>
-            for cell in line[4:-5].split('</td>')[:-1]:
-                clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
-                cells.append(clean_cell)
-            current_table.append(cells)
-        elif line.startswith('<h1>'):
-            pdf.set_font(base_font, 'B', styles['h1']['size'])
-            pdf.set_text_color(*styles['h1']['color'])
-            pdf.cell(0, 10, line[4:-5], ln=1)
-            pdf.ln(5)
-        elif line.startswith('<h2>'):
-            pdf.set_font(base_font, 'B', styles['h2']['size'])
-            pdf.set_text_color(*styles['h2']['color'])
-            pdf.cell(0, 10, line[4:-5], ln=1)
-            pdf.ln(3)
-        elif line.startswith('<h3>'):
-            pdf.set_font(base_font, 'B', styles['h3']['size'])
-            pdf.set_text_color(*styles['h3']['color'])
-            pdf.cell(0, 10, line[4:-5], ln=1)
-            pdf.ln(2)
-        elif line.startswith('<li>'):
-            pdf.set_font(base_font, '', styles['body']['size'])
-            pdf.set_text_color(0, 0, 0)
-            pdf.cell(10, 6, '•')
-            pdf.multi_cell(0, 6, line[4:-5].strip(), new_x=XPos.LEFT)
-        elif line.startswith('<p>'):
-            pdf.set_font(base_font, '', styles['body']['size'])
-            pdf.set_text_color(0, 0, 0)
-            pdf.multi_cell(0, 6, line[3:-4].strip(), new_x=XPos.LEFT)
-            pdf.ln(4)
-    # Output PDF to a bytes buffer
-    pdf_buffer = BytesIO()
-    try:
-        pdf_output = pdf.output(dest='S').encode('utf-8')
-    except UnicodeEncodeError:
-        pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
-    pdf_buffer.write(pdf_output)
-    pdf_buffer.seek(0)
-    return pdf_buffer
 def main():
     st.title("Quantitlytix AI")

 import time
 from datetime import datetime, date, timedelta
 from io import BytesIO
+import requests
 import pandas as pd
 import streamlit as st
 import google.generativeai as genai
             raise
 def create_pdf_report(report_text):
     """
+    Create a PDF from markdown text using the md-to-pdf API.
     Args:
         report_text (str): Markdown formatted report text
     Returns:
         BytesIO: PDF file in memory buffer
     """
+    api_url = "https://md-to-pdf.fly.dev"
+    css = """
+h1, h2 {
+    color: MidnightBlue;
+}
+table {
+  border-collapse: collapse;
+}
+table, th, td {
+  border: 1px solid DimGray;
+}
+th, td {
+  text-align: left;
+  padding: 1em;
+}
+"""
+    payload = {
+        'markdown': report_text,
+        'engine': 'weasyprint',
+        'css': css
+    }
+    response = requests.post(api_url, data=payload)
+    if response.status_code == 200:
+        # Return the PDF in a BytesIO buffer
+        from io import BytesIO
+        return BytesIO(response.content)
+    else:
+        raise Exception(f"Failed to generate PDF: {response.status_code} - {response.text}")
 def main():
     st.title("Quantitlytix AI")