rairo commited on
Commit
0b914c1
·
verified ·
1 Parent(s): 5e521b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -52
app.py CHANGED
@@ -14,6 +14,7 @@ from fpdf.enums import XPos, YPos
14
  import markdown
15
  from google.api_core import exceptions
16
 
 
17
  # Configure API key for Gemini
18
  api_key = os.getenv('Gemini')
19
 
@@ -81,8 +82,8 @@ Generate a detailed {statement_type} report for the period from {start_date.strf
81
 
82
  Specific Formatting and Content Requirements:
83
 
84
- Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
85
- Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
86
  Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
87
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
88
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
@@ -103,8 +104,10 @@ Do not name the company if name is not there and return just the report and noth
103
  else:
104
  raise
105
 
 
106
  def create_pdf_report(report_text):
107
- """Create PDF from markdown text with proper Unicode support and table handling
 
108
 
109
  Args:
110
  report_text (str): Markdown formatted report text
@@ -115,80 +118,71 @@ def create_pdf_report(report_text):
115
  # Convert markdown to HTML with table support
116
  html_content = markdown.markdown(report_text, extensions=['tables'])
117
 
118
- # Create PDF with proper configuration
119
  pdf = FPDF()
120
  pdf.add_page()
121
  pdf.set_auto_page_break(auto=True, margin=15)
122
 
123
- # Configure fonts with fallbacks
124
  try:
125
- # Try loading Noto Sans (must be in same directory)
126
  pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
127
  pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
128
  base_font = "NotoSans"
129
  except RuntimeError:
130
- # Fallback to Arial if Noto Sans not available
131
  base_font = "Arial"
132
  if base_font not in pdf.fonts:
133
  pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
134
  pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
135
 
136
- # Set default styles
137
  styles = {
138
- 'h1': {'size': 16, 'color': (25, 25, 112)}, # MidnightBlue
139
  'h2': {'size': 14, 'color': (25, 25, 112)},
140
  'h3': {'size': 12, 'color': (25, 25, 112)},
141
  'body': {'size': 10},
142
  'table': {
143
  'cell_margin': 2,
144
- 'header_color': (245, 245, 245), # Light gray
145
  'row_height': 8,
146
  'border': 1
147
  }
148
  }
149
 
150
- # Calculate available page width (considering margins)
151
  effective_page_width = pdf.w - 2 * pdf.l_margin
152
-
153
  def render_table_row(row_data, is_header=False):
154
- """Helper to render a single table row with auto-sizing
155
-
156
- Args:
157
- row_data (list): List of cell contents
158
- is_header (bool): Whether this is a header row
159
  """
160
  col_count = len(row_data)
161
- col_width = effective_page_width / max(col_count, 1) # Avoid division by zero
162
 
163
- # Set font style for header vs body
164
  pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
165
-
166
- # Track starting position
167
  start_y = pdf.y
168
 
169
- # Find maximum number of lines needed for any cell in this row
170
  max_lines = 1
171
  for cell in row_data:
 
172
  lines = pdf.multi_cell(
173
  w=col_width,
174
  h=styles['table']['row_height'],
175
  txt=cell.strip(),
176
- border=0, # We'll draw borders manually
177
  align='L',
178
  fill=False,
179
- split_only=True
 
180
  )
181
  max_lines = max(max_lines, len(lines))
182
 
183
- # Calculate total row height needed
184
  row_height = styles['table']['row_height'] * max_lines
185
 
186
- # Draw each cell
187
  for i, cell in enumerate(row_data):
188
- # Position cursor for this cell
189
  pdf.set_xy(pdf.l_margin + i * col_width, start_y)
190
-
191
- # Draw cell with border and fill
192
  pdf.multi_cell(
193
  w=col_width,
194
  h=styles['table']['row_height'],
@@ -196,46 +190,41 @@ def create_pdf_report(report_text):
196
  border=styles['table']['border'],
197
  align='L',
198
  fill=is_header,
199
- max_line_height=styles['table']['row_height']
 
200
  )
201
 
202
- # Move to next line position
203
  pdf.set_xy(pdf.l_margin, start_y + row_height)
204
-
205
- # Parse HTML content
206
  current_table = []
207
  in_table = False
208
 
209
  for line in html_content.split('\n'):
210
  line = line.strip()
211
-
212
- # Handle tables
213
  if line.startswith('<table>'):
214
  in_table = True
215
  current_table = []
216
  elif line.startswith('</table>'):
217
  in_table = False
218
  if current_table:
219
- # Process header row first if exists
220
  header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
221
  if header:
222
  render_table_row(header, is_header=True)
223
- current_table = current_table[1:] # Remove header from body rows
224
-
225
- # Process body rows
226
  for row in current_table:
227
  render_table_row(row)
228
- pdf.ln(5) # Add space after table
229
  current_table = []
230
  elif in_table and line.startswith('<tr>'):
231
- # Clean and split cells
232
  cells = []
233
- for cell in line[4:-5].split('</td>')[:-1]: # Split and remove empty last element
 
234
  clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
235
  cells.append(clean_cell)
236
  current_table.append(cells)
237
-
238
- # Handle headers
239
  elif line.startswith('<h1>'):
240
  pdf.set_font(base_font, 'B', styles['h1']['size'])
241
  pdf.set_text_color(*styles['h1']['color'])
@@ -251,22 +240,18 @@ def create_pdf_report(report_text):
251
  pdf.set_text_color(*styles['h3']['color'])
252
  pdf.cell(0, 10, line[4:-5], ln=1)
253
  pdf.ln(2)
254
-
255
- # Handle list items
256
  elif line.startswith('<li>'):
257
  pdf.set_font(base_font, '', styles['body']['size'])
258
  pdf.set_text_color(0, 0, 0)
259
  pdf.cell(10, 6, '•')
260
- pdf.multi_cell(0, 6, line[4:-5].strip())
261
-
262
- # Handle paragraphs
263
  elif line.startswith('<p>'):
264
  pdf.set_font(base_font, '', styles['body']['size'])
265
  pdf.set_text_color(0, 0, 0)
266
- pdf.multi_cell(0, 6, line[3:-4].strip())
267
  pdf.ln(4)
268
 
269
- # Create output buffer
270
  pdf_buffer = BytesIO()
271
  try:
272
  pdf_output = pdf.output(dest='S').encode('utf-8')
@@ -275,7 +260,6 @@ def create_pdf_report(report_text):
275
 
276
  pdf_buffer.write(pdf_output)
277
  pdf_buffer.seek(0)
278
-
279
  return pdf_buffer
280
 
281
  def main():
 
14
  import markdown
15
  from google.api_core import exceptions
16
 
17
+
18
  # Configure API key for Gemini
19
  api_key = os.getenv('Gemini')
20
 
 
82
 
83
  Specific Formatting and Content Requirements:
84
 
85
+ Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, in nice tables considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
86
+ Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
87
  Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
88
  Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
89
  Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
 
104
  else:
105
  raise
106
 
107
+
108
  def create_pdf_report(report_text):
109
+ """
110
+ Create a PDF from markdown text with proper Unicode support and table handling.
111
 
112
  Args:
113
  report_text (str): Markdown formatted report text
 
118
  # Convert markdown to HTML with table support
119
  html_content = markdown.markdown(report_text, extensions=['tables'])
120
 
121
+ # Create PDF and add first page
122
  pdf = FPDF()
123
  pdf.add_page()
124
  pdf.set_auto_page_break(auto=True, margin=15)
125
 
126
+ # Configure fonts with fallback: try NotoSans, otherwise use Arial.
127
  try:
 
128
  pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
129
  pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
130
  base_font = "NotoSans"
131
  except RuntimeError:
 
132
  base_font = "Arial"
133
  if base_font not in pdf.fonts:
134
  pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
135
  pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
136
 
137
+ # Define default styles
138
  styles = {
139
+ 'h1': {'size': 16, 'color': (25, 25, 112)},
140
  'h2': {'size': 14, 'color': (25, 25, 112)},
141
  'h3': {'size': 12, 'color': (25, 25, 112)},
142
  'body': {'size': 10},
143
  'table': {
144
  'cell_margin': 2,
145
+ 'header_color': (245, 245, 245),
146
  'row_height': 8,
147
  'border': 1
148
  }
149
  }
150
 
151
+ # Calculate available page width
152
  effective_page_width = pdf.w - 2 * pdf.l_margin
153
+
154
  def render_table_row(row_data, is_header=False):
155
+ """
156
+ Render a single table row, auto-sizing each cell.
 
 
 
157
  """
158
  col_count = len(row_data)
159
+ col_width = effective_page_width / max(col_count, 1)
160
 
161
+ # Set font: bold for header rows, normal otherwise.
162
  pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
 
 
163
  start_y = pdf.y
164
 
165
+ # First pass: compute maximum number of lines needed for any cell
166
  max_lines = 1
167
  for cell in row_data:
168
+ # We use split_only=True so that multi_cell returns the lines without printing.
169
  lines = pdf.multi_cell(
170
  w=col_width,
171
  h=styles['table']['row_height'],
172
  txt=cell.strip(),
173
+ border=0,
174
  align='L',
175
  fill=False,
176
+ split_only=True,
177
+ new_x=XPos.LEFT
178
  )
179
  max_lines = max(max_lines, len(lines))
180
 
 
181
  row_height = styles['table']['row_height'] * max_lines
182
 
183
+ # Second pass: output each cell, resetting x to the left margin for each cell.
184
  for i, cell in enumerate(row_data):
 
185
  pdf.set_xy(pdf.l_margin + i * col_width, start_y)
 
 
186
  pdf.multi_cell(
187
  w=col_width,
188
  h=styles['table']['row_height'],
 
190
  border=styles['table']['border'],
191
  align='L',
192
  fill=is_header,
193
+ max_line_height=styles['table']['row_height'],
194
+ new_x=XPos.LEFT
195
  )
196
 
197
+ # Move the cursor to the beginning of the next line
198
  pdf.set_xy(pdf.l_margin, start_y + row_height)
199
+
200
+ # Parse the HTML content line by line
201
  current_table = []
202
  in_table = False
203
 
204
  for line in html_content.split('\n'):
205
  line = line.strip()
 
 
206
  if line.startswith('<table>'):
207
  in_table = True
208
  current_table = []
209
  elif line.startswith('</table>'):
210
  in_table = False
211
  if current_table:
212
+ # Check if first row contains header cells
213
  header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
214
  if header:
215
  render_table_row(header, is_header=True)
216
+ current_table = current_table[1:]
 
 
217
  for row in current_table:
218
  render_table_row(row)
219
+ pdf.ln(5)
220
  current_table = []
221
  elif in_table and line.startswith('<tr>'):
 
222
  cells = []
223
+ # Remove the <tr> and </tr> tags and split cells on </td>
224
+ for cell in line[4:-5].split('</td>')[:-1]:
225
  clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
226
  cells.append(clean_cell)
227
  current_table.append(cells)
 
 
228
  elif line.startswith('<h1>'):
229
  pdf.set_font(base_font, 'B', styles['h1']['size'])
230
  pdf.set_text_color(*styles['h1']['color'])
 
240
  pdf.set_text_color(*styles['h3']['color'])
241
  pdf.cell(0, 10, line[4:-5], ln=1)
242
  pdf.ln(2)
 
 
243
  elif line.startswith('<li>'):
244
  pdf.set_font(base_font, '', styles['body']['size'])
245
  pdf.set_text_color(0, 0, 0)
246
  pdf.cell(10, 6, '•')
247
+ pdf.multi_cell(0, 6, line[4:-5].strip(), new_x=XPos.LEFT)
 
 
248
  elif line.startswith('<p>'):
249
  pdf.set_font(base_font, '', styles['body']['size'])
250
  pdf.set_text_color(0, 0, 0)
251
+ pdf.multi_cell(0, 6, line[3:-4].strip(), new_x=XPos.LEFT)
252
  pdf.ln(4)
253
 
254
+ # Output PDF to a bytes buffer
255
  pdf_buffer = BytesIO()
256
  try:
257
  pdf_output = pdf.output(dest='S').encode('utf-8')
 
260
 
261
  pdf_buffer.write(pdf_output)
262
  pdf_buffer.seek(0)
 
263
  return pdf_buffer
264
 
265
  def main():