rairo commited on
Commit
1315a14
·
verified ·
1 Parent(s): 0b914c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -144
app.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import time
5
  from datetime import datetime, date, timedelta
6
  from io import BytesIO
7
-
8
  import pandas as pd
9
  import streamlit as st
10
  import google.generativeai as genai
@@ -105,9 +105,11 @@ Do not name the company if name is not there and return just the report and noth
105
  raise
106
 
107
 
 
108
  def create_pdf_report(report_text):
 
109
  """
110
- Create a PDF from markdown text with proper Unicode support and table handling.
111
 
112
  Args:
113
  report_text (str): Markdown formatted report text
@@ -115,152 +117,39 @@ def create_pdf_report(report_text):
115
  Returns:
116
  BytesIO: PDF file in memory buffer
117
  """
118
- # Convert markdown to HTML with table support
119
- html_content = markdown.markdown(report_text, extensions=['tables'])
120
-
121
- # Create PDF and add first page
122
- pdf = FPDF()
123
- pdf.add_page()
124
- pdf.set_auto_page_break(auto=True, margin=15)
125
-
126
- # Configure fonts with fallback: try NotoSans, otherwise use Arial.
127
- try:
128
- pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
129
- pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
130
- base_font = "NotoSans"
131
- except RuntimeError:
132
- base_font = "Arial"
133
- if base_font not in pdf.fonts:
134
- pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
135
- pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
136
-
137
- # Define default styles
138
- styles = {
139
- 'h1': {'size': 16, 'color': (25, 25, 112)},
140
- 'h2': {'size': 14, 'color': (25, 25, 112)},
141
- 'h3': {'size': 12, 'color': (25, 25, 112)},
142
- 'body': {'size': 10},
143
- 'table': {
144
- 'cell_margin': 2,
145
- 'header_color': (245, 245, 245),
146
- 'row_height': 8,
147
- 'border': 1
148
- }
149
- }
150
 
151
- # Calculate available page width
152
- effective_page_width = pdf.w - 2 * pdf.l_margin
 
 
 
153
 
154
- def render_table_row(row_data, is_header=False):
155
- """
156
- Render a single table row, auto-sizing each cell.
157
- """
158
- col_count = len(row_data)
159
- col_width = effective_page_width / max(col_count, 1)
160
-
161
- # Set font: bold for header rows, normal otherwise.
162
- pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
163
- start_y = pdf.y
164
-
165
- # First pass: compute maximum number of lines needed for any cell
166
- max_lines = 1
167
- for cell in row_data:
168
- # We use split_only=True so that multi_cell returns the lines without printing.
169
- lines = pdf.multi_cell(
170
- w=col_width,
171
- h=styles['table']['row_height'],
172
- txt=cell.strip(),
173
- border=0,
174
- align='L',
175
- fill=False,
176
- split_only=True,
177
- new_x=XPos.LEFT
178
- )
179
- max_lines = max(max_lines, len(lines))
180
-
181
- row_height = styles['table']['row_height'] * max_lines
182
-
183
- # Second pass: output each cell, resetting x to the left margin for each cell.
184
- for i, cell in enumerate(row_data):
185
- pdf.set_xy(pdf.l_margin + i * col_width, start_y)
186
- pdf.multi_cell(
187
- w=col_width,
188
- h=styles['table']['row_height'],
189
- txt=cell.strip(),
190
- border=styles['table']['border'],
191
- align='L',
192
- fill=is_header,
193
- max_line_height=styles['table']['row_height'],
194
- new_x=XPos.LEFT
195
- )
196
-
197
- # Move the cursor to the beginning of the next line
198
- pdf.set_xy(pdf.l_margin, start_y + row_height)
199
 
200
- # Parse the HTML content line by line
201
- current_table = []
202
- in_table = False
203
-
204
- for line in html_content.split('\n'):
205
- line = line.strip()
206
- if line.startswith('<table>'):
207
- in_table = True
208
- current_table = []
209
- elif line.startswith('</table>'):
210
- in_table = False
211
- if current_table:
212
- # Check if first row contains header cells
213
- header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
214
- if header:
215
- render_table_row(header, is_header=True)
216
- current_table = current_table[1:]
217
- for row in current_table:
218
- render_table_row(row)
219
- pdf.ln(5)
220
- current_table = []
221
- elif in_table and line.startswith('<tr>'):
222
- cells = []
223
- # Remove the <tr> and </tr> tags and split cells on </td>
224
- for cell in line[4:-5].split('</td>')[:-1]:
225
- clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
226
- cells.append(clean_cell)
227
- current_table.append(cells)
228
- elif line.startswith('<h1>'):
229
- pdf.set_font(base_font, 'B', styles['h1']['size'])
230
- pdf.set_text_color(*styles['h1']['color'])
231
- pdf.cell(0, 10, line[4:-5], ln=1)
232
- pdf.ln(5)
233
- elif line.startswith('<h2>'):
234
- pdf.set_font(base_font, 'B', styles['h2']['size'])
235
- pdf.set_text_color(*styles['h2']['color'])
236
- pdf.cell(0, 10, line[4:-5], ln=1)
237
- pdf.ln(3)
238
- elif line.startswith('<h3>'):
239
- pdf.set_font(base_font, 'B', styles['h3']['size'])
240
- pdf.set_text_color(*styles['h3']['color'])
241
- pdf.cell(0, 10, line[4:-5], ln=1)
242
- pdf.ln(2)
243
- elif line.startswith('<li>'):
244
- pdf.set_font(base_font, '', styles['body']['size'])
245
- pdf.set_text_color(0, 0, 0)
246
- pdf.cell(10, 6, '•')
247
- pdf.multi_cell(0, 6, line[4:-5].strip(), new_x=XPos.LEFT)
248
- elif line.startswith('<p>'):
249
- pdf.set_font(base_font, '', styles['body']['size'])
250
- pdf.set_text_color(0, 0, 0)
251
- pdf.multi_cell(0, 6, line[3:-4].strip(), new_x=XPos.LEFT)
252
- pdf.ln(4)
253
-
254
- # Output PDF to a bytes buffer
255
- pdf_buffer = BytesIO()
256
- try:
257
- pdf_output = pdf.output(dest='S').encode('utf-8')
258
- except UnicodeEncodeError:
259
- pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
260
 
261
- pdf_buffer.write(pdf_output)
262
- pdf_buffer.seek(0)
263
- return pdf_buffer
 
 
 
 
264
 
265
  def main():
266
  st.title("Quantitlytix AI")
 
4
  import time
5
  from datetime import datetime, date, timedelta
6
  from io import BytesIO
7
+ import requests
8
  import pandas as pd
9
  import streamlit as st
10
  import google.generativeai as genai
 
105
  raise
106
 
107
 
108
+
109
  def create_pdf_report(report_text):
110
+
111
  """
112
+ Create a PDF from markdown text using the md-to-pdf API.
113
 
114
  Args:
115
  report_text (str): Markdown formatted report text
 
117
  Returns:
118
  BytesIO: PDF file in memory buffer
119
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ api_url = "https://md-to-pdf.fly.dev"
122
+ css = """
123
+ h1, h2 {
124
+ color: MidnightBlue;
125
+ }
126
 
127
+ table {
128
+ border-collapse: collapse;
129
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ table, th, td {
132
+ border: 1px solid DimGray;
133
+ }
134
+
135
+ th, td {
136
+ text-align: left;
137
+ padding: 1em;
138
+ }
139
+ """
140
+ payload = {
141
+ 'markdown': report_text,
142
+ 'engine': 'weasyprint',
143
+ 'css': css
144
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ response = requests.post(api_url, data=payload)
147
+ if response.status_code == 200:
148
+ # Return the PDF in a BytesIO buffer
149
+ from io import BytesIO
150
+ return BytesIO(response.content)
151
+ else:
152
+ raise Exception(f"Failed to generate PDF: {response.status_code} - {response.text}")
153
 
154
  def main():
155
  st.title("Quantitlytix AI")