rairo commited on
Commit
5e521b9
·
verified ·
1 Parent(s): 1de63e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -49
app.py CHANGED
@@ -104,88 +104,175 @@ Do not name the company if name is not there and return just the report and noth
104
  raise
105
 
106
  def create_pdf_report(report_text):
107
- """Create PDF from markdown text with proper Unicode support"""
108
- # Convert markdown to HTML
 
 
 
 
 
 
 
109
  html_content = markdown.markdown(report_text, extensions=['tables'])
110
 
111
- # Create PDF with better UTF-8 support
112
  pdf = FPDF()
113
  pdf.add_page()
 
114
 
115
- # Add Noto Sans fonts (must be available in the same directory)
116
  try:
 
117
  pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
118
  pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
119
- pdf.set_font("NotoSans", size=12)
120
- except:
121
- # Fallback to built-in fonts if Noto Sans not available
122
- pdf.set_font("Arial", size=12)
 
 
 
123
 
124
- # Basic styling
125
  styles = {
126
- 'h1': {'size': 24, 'color': (25, 25, 112)}, # MidnightBlue
127
- 'h2': {'size': 20, 'color': (25, 25, 112)},
128
- 'h3': {'size': 16, 'color': (25, 25, 112)},
129
- 'table': {'cell_width': 40, 'header_color': (245, 245, 245)},
130
- 'th': {'border': 1, 'align': 'L', 'fill': True},
131
- 'td': {'border': 1, 'align': 'L'}
 
 
 
 
132
  }
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # Parse HTML content
 
135
  in_table = False
 
136
  for line in html_content.split('\n'):
137
  line = line.strip()
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Handle headers
140
- if line.startswith('<h1>'):
141
- pdf.set_font(style="B", size=styles['h1']['size'])
142
  pdf.set_text_color(*styles['h1']['color'])
143
- pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
144
  pdf.ln(5)
145
  elif line.startswith('<h2>'):
146
- pdf.set_font(style="B", size=styles['h2']['size'])
147
  pdf.set_text_color(*styles['h2']['color'])
148
- pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
149
  pdf.ln(3)
150
  elif line.startswith('<h3>'):
151
- pdf.set_font(style="B", size=styles['h3']['size'])
152
  pdf.set_text_color(*styles['h3']['color'])
153
- pdf.cell(0, 10, line[4:-5], new_x=XPos.LMARGIN, new_y=YPos.NEXT)
154
  pdf.ln(2)
155
 
156
- # Handle tables
157
- elif line.startswith('<table>'):
158
- in_table = True
159
- col_count = line.count('<th>') # Simple column count
160
- elif line.startswith('</table>'):
161
- in_table = False
162
- pdf.ln(10)
163
- elif in_table:
164
- if line.startswith('<tr>'):
165
- pdf.set_font(style="B" if '<th>' in line else "")
166
- cells = line.replace('<tr>','').replace('</tr>','').split('</td>')[:-1]
167
- for cell in cells:
168
- content = cell.replace('<td>','').replace('<th>','').strip()
169
- pdf.cell(styles['table']['cell_width'], 10, content,
170
- border=styles['td']['border'], align=styles['td']['align'])
171
- pdf.ln()
172
-
173
  # Handle list items
174
  elif line.startswith('<li>'):
175
- pdf.set_font(style="")
176
- pdf.cell(10, 10, '•', border=0)
177
- pdf.multi_cell(0, 10, line[4:-5].strip())
 
178
 
179
- # Handle regular text
180
  elif line.startswith('<p>'):
181
- pdf.set_font(style="")
182
  pdf.set_text_color(0, 0, 0)
183
- pdf.multi_cell(0, 10, line[3:-4].strip())
184
- pdf.ln(5)
185
-
186
- # Create BytesIO buffer with UTF-8 encoding
187
  pdf_buffer = BytesIO()
188
- pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
 
 
 
 
189
  pdf_buffer.write(pdf_output)
190
  pdf_buffer.seek(0)
191
 
 
104
  raise
105
 
106
  def create_pdf_report(report_text):
107
+ """Create PDF from markdown text with proper Unicode support and table handling
108
+
109
+ Args:
110
+ report_text (str): Markdown formatted report text
111
+
112
+ Returns:
113
+ BytesIO: PDF file in memory buffer
114
+ """
115
+ # Convert markdown to HTML with table support
116
  html_content = markdown.markdown(report_text, extensions=['tables'])
117
 
118
+ # Create PDF with proper configuration
119
  pdf = FPDF()
120
  pdf.add_page()
121
+ pdf.set_auto_page_break(auto=True, margin=15)
122
 
123
+ # Configure fonts with fallbacks
124
  try:
125
+ # Try loading Noto Sans (must be in same directory)
126
  pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
127
  pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
128
+ base_font = "NotoSans"
129
+ except RuntimeError:
130
+ # Fallback to Arial if Noto Sans not available
131
+ base_font = "Arial"
132
+ if base_font not in pdf.fonts:
133
+ pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
134
+ pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
135
 
136
+ # Set default styles
137
  styles = {
138
+ 'h1': {'size': 16, 'color': (25, 25, 112)}, # MidnightBlue
139
+ 'h2': {'size': 14, 'color': (25, 25, 112)},
140
+ 'h3': {'size': 12, 'color': (25, 25, 112)},
141
+ 'body': {'size': 10},
142
+ 'table': {
143
+ 'cell_margin': 2,
144
+ 'header_color': (245, 245, 245), # Light gray
145
+ 'row_height': 8,
146
+ 'border': 1
147
+ }
148
  }
149
 
150
+ # Calculate available page width (considering margins)
151
+ effective_page_width = pdf.w - 2 * pdf.l_margin
152
+
153
+ def render_table_row(row_data, is_header=False):
154
+ """Helper to render a single table row with auto-sizing
155
+
156
+ Args:
157
+ row_data (list): List of cell contents
158
+ is_header (bool): Whether this is a header row
159
+ """
160
+ col_count = len(row_data)
161
+ col_width = effective_page_width / max(col_count, 1) # Avoid division by zero
162
+
163
+ # Set font style for header vs body
164
+ pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
165
+
166
+ # Track starting position
167
+ start_y = pdf.y
168
+
169
+ # Find maximum number of lines needed for any cell in this row
170
+ max_lines = 1
171
+ for cell in row_data:
172
+ lines = pdf.multi_cell(
173
+ w=col_width,
174
+ h=styles['table']['row_height'],
175
+ txt=cell.strip(),
176
+ border=0, # We'll draw borders manually
177
+ align='L',
178
+ fill=False,
179
+ split_only=True
180
+ )
181
+ max_lines = max(max_lines, len(lines))
182
+
183
+ # Calculate total row height needed
184
+ row_height = styles['table']['row_height'] * max_lines
185
+
186
+ # Draw each cell
187
+ for i, cell in enumerate(row_data):
188
+ # Position cursor for this cell
189
+ pdf.set_xy(pdf.l_margin + i * col_width, start_y)
190
+
191
+ # Draw cell with border and fill
192
+ pdf.multi_cell(
193
+ w=col_width,
194
+ h=styles['table']['row_height'],
195
+ txt=cell.strip(),
196
+ border=styles['table']['border'],
197
+ align='L',
198
+ fill=is_header,
199
+ max_line_height=styles['table']['row_height']
200
+ )
201
+
202
+ # Move to next line position
203
+ pdf.set_xy(pdf.l_margin, start_y + row_height)
204
+
205
  # Parse HTML content
206
+ current_table = []
207
  in_table = False
208
+
209
  for line in html_content.split('\n'):
210
  line = line.strip()
211
 
212
+ # Handle tables
213
+ if line.startswith('<table>'):
214
+ in_table = True
215
+ current_table = []
216
+ elif line.startswith('</table>'):
217
+ in_table = False
218
+ if current_table:
219
+ # Process header row first if exists
220
+ header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
221
+ if header:
222
+ render_table_row(header, is_header=True)
223
+ current_table = current_table[1:] # Remove header from body rows
224
+
225
+ # Process body rows
226
+ for row in current_table:
227
+ render_table_row(row)
228
+ pdf.ln(5) # Add space after table
229
+ current_table = []
230
+ elif in_table and line.startswith('<tr>'):
231
+ # Clean and split cells
232
+ cells = []
233
+ for cell in line[4:-5].split('</td>')[:-1]: # Split and remove empty last element
234
+ clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
235
+ cells.append(clean_cell)
236
+ current_table.append(cells)
237
+
238
  # Handle headers
239
+ elif line.startswith('<h1>'):
240
+ pdf.set_font(base_font, 'B', styles['h1']['size'])
241
  pdf.set_text_color(*styles['h1']['color'])
242
+ pdf.cell(0, 10, line[4:-5], ln=1)
243
  pdf.ln(5)
244
  elif line.startswith('<h2>'):
245
+ pdf.set_font(base_font, 'B', styles['h2']['size'])
246
  pdf.set_text_color(*styles['h2']['color'])
247
+ pdf.cell(0, 10, line[4:-5], ln=1)
248
  pdf.ln(3)
249
  elif line.startswith('<h3>'):
250
+ pdf.set_font(base_font, 'B', styles['h3']['size'])
251
  pdf.set_text_color(*styles['h3']['color'])
252
+ pdf.cell(0, 10, line[4:-5], ln=1)
253
  pdf.ln(2)
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  # Handle list items
256
  elif line.startswith('<li>'):
257
+ pdf.set_font(base_font, '', styles['body']['size'])
258
+ pdf.set_text_color(0, 0, 0)
259
+ pdf.cell(10, 6, '•')
260
+ pdf.multi_cell(0, 6, line[4:-5].strip())
261
 
262
+ # Handle paragraphs
263
  elif line.startswith('<p>'):
264
+ pdf.set_font(base_font, '', styles['body']['size'])
265
  pdf.set_text_color(0, 0, 0)
266
+ pdf.multi_cell(0, 6, line[3:-4].strip())
267
+ pdf.ln(4)
268
+
269
+ # Create output buffer
270
  pdf_buffer = BytesIO()
271
+ try:
272
+ pdf_output = pdf.output(dest='S').encode('utf-8')
273
+ except UnicodeEncodeError:
274
+ pdf_output = pdf.output(dest='S').encode('utf-8', errors='replace')
275
+
276
  pdf_buffer.write(pdf_output)
277
  pdf_buffer.seek(0)
278