Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,7 @@ from fpdf.enums import XPos, YPos
|
|
| 14 |
import markdown
|
| 15 |
from google.api_core import exceptions
|
| 16 |
|
|
|
|
| 17 |
# Configure API key for Gemini
|
| 18 |
api_key = os.getenv('Gemini')
|
| 19 |
|
|
@@ -81,8 +82,8 @@ Generate a detailed {statement_type} report for the period from {start_date.strf
|
|
| 81 |
|
| 82 |
Specific Formatting and Content Requirements:
|
| 83 |
|
| 84 |
-
Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
|
| 85 |
-
Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
|
| 86 |
Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
|
| 87 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 88 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
|
@@ -103,8 +104,10 @@ Do not name the company if name is not there and return just the report and noth
|
|
| 103 |
else:
|
| 104 |
raise
|
| 105 |
|
|
|
|
| 106 |
def create_pdf_report(report_text):
|
| 107 |
-
"""
|
|
|
|
| 108 |
|
| 109 |
Args:
|
| 110 |
report_text (str): Markdown formatted report text
|
|
@@ -115,80 +118,71 @@ def create_pdf_report(report_text):
|
|
| 115 |
# Convert markdown to HTML with table support
|
| 116 |
html_content = markdown.markdown(report_text, extensions=['tables'])
|
| 117 |
|
| 118 |
-
# Create PDF
|
| 119 |
pdf = FPDF()
|
| 120 |
pdf.add_page()
|
| 121 |
pdf.set_auto_page_break(auto=True, margin=15)
|
| 122 |
|
| 123 |
-
# Configure fonts with
|
| 124 |
try:
|
| 125 |
-
# Try loading Noto Sans (must be in same directory)
|
| 126 |
pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
|
| 127 |
pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
|
| 128 |
base_font = "NotoSans"
|
| 129 |
except RuntimeError:
|
| 130 |
-
# Fallback to Arial if Noto Sans not available
|
| 131 |
base_font = "Arial"
|
| 132 |
if base_font not in pdf.fonts:
|
| 133 |
pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
|
| 134 |
pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
|
| 135 |
|
| 136 |
-
#
|
| 137 |
styles = {
|
| 138 |
-
'h1': {'size': 16, 'color': (25, 25, 112)},
|
| 139 |
'h2': {'size': 14, 'color': (25, 25, 112)},
|
| 140 |
'h3': {'size': 12, 'color': (25, 25, 112)},
|
| 141 |
'body': {'size': 10},
|
| 142 |
'table': {
|
| 143 |
'cell_margin': 2,
|
| 144 |
-
'header_color': (245, 245, 245),
|
| 145 |
'row_height': 8,
|
| 146 |
'border': 1
|
| 147 |
}
|
| 148 |
}
|
| 149 |
|
| 150 |
-
# Calculate available page width
|
| 151 |
effective_page_width = pdf.w - 2 * pdf.l_margin
|
| 152 |
-
|
| 153 |
def render_table_row(row_data, is_header=False):
|
| 154 |
-
"""
|
| 155 |
-
|
| 156 |
-
Args:
|
| 157 |
-
row_data (list): List of cell contents
|
| 158 |
-
is_header (bool): Whether this is a header row
|
| 159 |
"""
|
| 160 |
col_count = len(row_data)
|
| 161 |
-
col_width = effective_page_width / max(col_count, 1)
|
| 162 |
|
| 163 |
-
# Set font
|
| 164 |
pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
|
| 165 |
-
|
| 166 |
-
# Track starting position
|
| 167 |
start_y = pdf.y
|
| 168 |
|
| 169 |
-
#
|
| 170 |
max_lines = 1
|
| 171 |
for cell in row_data:
|
|
|
|
| 172 |
lines = pdf.multi_cell(
|
| 173 |
w=col_width,
|
| 174 |
h=styles['table']['row_height'],
|
| 175 |
txt=cell.strip(),
|
| 176 |
-
border=0,
|
| 177 |
align='L',
|
| 178 |
fill=False,
|
| 179 |
-
split_only=True
|
|
|
|
| 180 |
)
|
| 181 |
max_lines = max(max_lines, len(lines))
|
| 182 |
|
| 183 |
-
# Calculate total row height needed
|
| 184 |
row_height = styles['table']['row_height'] * max_lines
|
| 185 |
|
| 186 |
-
#
|
| 187 |
for i, cell in enumerate(row_data):
|
| 188 |
-
# Position cursor for this cell
|
| 189 |
pdf.set_xy(pdf.l_margin + i * col_width, start_y)
|
| 190 |
-
|
| 191 |
-
# Draw cell with border and fill
|
| 192 |
pdf.multi_cell(
|
| 193 |
w=col_width,
|
| 194 |
h=styles['table']['row_height'],
|
|
@@ -196,46 +190,41 @@ def create_pdf_report(report_text):
|
|
| 196 |
border=styles['table']['border'],
|
| 197 |
align='L',
|
| 198 |
fill=is_header,
|
| 199 |
-
max_line_height=styles['table']['row_height']
|
|
|
|
| 200 |
)
|
| 201 |
|
| 202 |
-
# Move to next line
|
| 203 |
pdf.set_xy(pdf.l_margin, start_y + row_height)
|
| 204 |
-
|
| 205 |
-
# Parse HTML content
|
| 206 |
current_table = []
|
| 207 |
in_table = False
|
| 208 |
|
| 209 |
for line in html_content.split('\n'):
|
| 210 |
line = line.strip()
|
| 211 |
-
|
| 212 |
-
# Handle tables
|
| 213 |
if line.startswith('<table>'):
|
| 214 |
in_table = True
|
| 215 |
current_table = []
|
| 216 |
elif line.startswith('</table>'):
|
| 217 |
in_table = False
|
| 218 |
if current_table:
|
| 219 |
-
#
|
| 220 |
header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
|
| 221 |
if header:
|
| 222 |
render_table_row(header, is_header=True)
|
| 223 |
-
current_table = current_table[1:]
|
| 224 |
-
|
| 225 |
-
# Process body rows
|
| 226 |
for row in current_table:
|
| 227 |
render_table_row(row)
|
| 228 |
-
pdf.ln(5)
|
| 229 |
current_table = []
|
| 230 |
elif in_table and line.startswith('<tr>'):
|
| 231 |
-
# Clean and split cells
|
| 232 |
cells = []
|
| 233 |
-
|
|
|
|
| 234 |
clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
|
| 235 |
cells.append(clean_cell)
|
| 236 |
current_table.append(cells)
|
| 237 |
-
|
| 238 |
-
# Handle headers
|
| 239 |
elif line.startswith('<h1>'):
|
| 240 |
pdf.set_font(base_font, 'B', styles['h1']['size'])
|
| 241 |
pdf.set_text_color(*styles['h1']['color'])
|
|
@@ -251,22 +240,18 @@ def create_pdf_report(report_text):
|
|
| 251 |
pdf.set_text_color(*styles['h3']['color'])
|
| 252 |
pdf.cell(0, 10, line[4:-5], ln=1)
|
| 253 |
pdf.ln(2)
|
| 254 |
-
|
| 255 |
-
# Handle list items
|
| 256 |
elif line.startswith('<li>'):
|
| 257 |
pdf.set_font(base_font, '', styles['body']['size'])
|
| 258 |
pdf.set_text_color(0, 0, 0)
|
| 259 |
pdf.cell(10, 6, '•')
|
| 260 |
-
pdf.multi_cell(0, 6, line[4:-5].strip())
|
| 261 |
-
|
| 262 |
-
# Handle paragraphs
|
| 263 |
elif line.startswith('<p>'):
|
| 264 |
pdf.set_font(base_font, '', styles['body']['size'])
|
| 265 |
pdf.set_text_color(0, 0, 0)
|
| 266 |
-
pdf.multi_cell(0, 6, line[3:-4].strip())
|
| 267 |
pdf.ln(4)
|
| 268 |
|
| 269 |
-
#
|
| 270 |
pdf_buffer = BytesIO()
|
| 271 |
try:
|
| 272 |
pdf_output = pdf.output(dest='S').encode('utf-8')
|
|
@@ -275,7 +260,6 @@ def create_pdf_report(report_text):
|
|
| 275 |
|
| 276 |
pdf_buffer.write(pdf_output)
|
| 277 |
pdf_buffer.seek(0)
|
| 278 |
-
|
| 279 |
return pdf_buffer
|
| 280 |
|
| 281 |
def main():
|
|
|
|
| 14 |
import markdown
|
| 15 |
from google.api_core import exceptions
|
| 16 |
|
| 17 |
+
|
| 18 |
# Configure API key for Gemini
|
| 19 |
api_key = os.getenv('Gemini')
|
| 20 |
|
|
|
|
| 82 |
|
| 83 |
Specific Formatting and Content Requirements:
|
| 84 |
|
| 85 |
+
Standard Accounting Structure (South Africa Focus): Organize the {statement_type} according to typical accounting practices followed in South Africa (e.g., for an Income Statement, clearly separate Revenue, Cost of Goods Sold, Gross Profit, Operating Expenses, and Net Income, in nice tables considering local terminology where applicable). If unsure of specific local variations, adhere to widely accepted international accounting structures.
|
| 86 |
+
Clear Headings and Subheadings: Use distinct and informative headings and subheadings in English to delineate different sections of the report. Ensure these are visually prominent.
|
| 87 |
Consistent Formatting: Maintain consistent formatting for monetary values (e.g., using "R" for South African Rand if applicable and discernible from the data, comma separators for thousands), dates, and alignment.
|
| 88 |
Totals and Subtotals: Clearly display totals for relevant categories and subtotals where appropriate to provide a clear understanding of the financial performance or position.
|
| 89 |
Descriptive Line Items: Use clear and concise descriptions for each transaction or aggregated account based on the provided JSON data.
|
|
|
|
| 104 |
else:
|
| 105 |
raise
|
| 106 |
|
| 107 |
+
|
| 108 |
def create_pdf_report(report_text):
|
| 109 |
+
"""
|
| 110 |
+
Create a PDF from markdown text with proper Unicode support and table handling.
|
| 111 |
|
| 112 |
Args:
|
| 113 |
report_text (str): Markdown formatted report text
|
|
|
|
| 118 |
# Convert markdown to HTML with table support
|
| 119 |
html_content = markdown.markdown(report_text, extensions=['tables'])
|
| 120 |
|
| 121 |
+
# Create PDF and add first page
|
| 122 |
pdf = FPDF()
|
| 123 |
pdf.add_page()
|
| 124 |
pdf.set_auto_page_break(auto=True, margin=15)
|
| 125 |
|
| 126 |
+
# Configure fonts with fallback: try NotoSans, otherwise use Arial.
|
| 127 |
try:
|
|
|
|
| 128 |
pdf.add_font("NotoSans", style="", fname="NotoSans-Regular.ttf", uni=True)
|
| 129 |
pdf.add_font("NotoSans", style="B", fname="NotoSans-Bold.ttf", uni=True)
|
| 130 |
base_font = "NotoSans"
|
| 131 |
except RuntimeError:
|
|
|
|
| 132 |
base_font = "Arial"
|
| 133 |
if base_font not in pdf.fonts:
|
| 134 |
pdf.add_font("Arial", style="", fname="arial.ttf", uni=True)
|
| 135 |
pdf.add_font("Arial", style="B", fname="arialbd.ttf", uni=True)
|
| 136 |
|
| 137 |
+
# Define default styles
|
| 138 |
styles = {
|
| 139 |
+
'h1': {'size': 16, 'color': (25, 25, 112)},
|
| 140 |
'h2': {'size': 14, 'color': (25, 25, 112)},
|
| 141 |
'h3': {'size': 12, 'color': (25, 25, 112)},
|
| 142 |
'body': {'size': 10},
|
| 143 |
'table': {
|
| 144 |
'cell_margin': 2,
|
| 145 |
+
'header_color': (245, 245, 245),
|
| 146 |
'row_height': 8,
|
| 147 |
'border': 1
|
| 148 |
}
|
| 149 |
}
|
| 150 |
|
| 151 |
+
# Calculate available page width
|
| 152 |
effective_page_width = pdf.w - 2 * pdf.l_margin
|
| 153 |
+
|
| 154 |
def render_table_row(row_data, is_header=False):
|
| 155 |
+
"""
|
| 156 |
+
Render a single table row, auto-sizing each cell.
|
|
|
|
|
|
|
|
|
|
| 157 |
"""
|
| 158 |
col_count = len(row_data)
|
| 159 |
+
col_width = effective_page_width / max(col_count, 1)
|
| 160 |
|
| 161 |
+
# Set font: bold for header rows, normal otherwise.
|
| 162 |
pdf.set_font(base_font, 'B' if is_header else '', styles['body']['size'])
|
|
|
|
|
|
|
| 163 |
start_y = pdf.y
|
| 164 |
|
| 165 |
+
# First pass: compute maximum number of lines needed for any cell
|
| 166 |
max_lines = 1
|
| 167 |
for cell in row_data:
|
| 168 |
+
# We use split_only=True so that multi_cell returns the lines without printing.
|
| 169 |
lines = pdf.multi_cell(
|
| 170 |
w=col_width,
|
| 171 |
h=styles['table']['row_height'],
|
| 172 |
txt=cell.strip(),
|
| 173 |
+
border=0,
|
| 174 |
align='L',
|
| 175 |
fill=False,
|
| 176 |
+
split_only=True,
|
| 177 |
+
new_x=XPos.LEFT
|
| 178 |
)
|
| 179 |
max_lines = max(max_lines, len(lines))
|
| 180 |
|
|
|
|
| 181 |
row_height = styles['table']['row_height'] * max_lines
|
| 182 |
|
| 183 |
+
# Second pass: output each cell, resetting x to the left margin for each cell.
|
| 184 |
for i, cell in enumerate(row_data):
|
|
|
|
| 185 |
pdf.set_xy(pdf.l_margin + i * col_width, start_y)
|
|
|
|
|
|
|
| 186 |
pdf.multi_cell(
|
| 187 |
w=col_width,
|
| 188 |
h=styles['table']['row_height'],
|
|
|
|
| 190 |
border=styles['table']['border'],
|
| 191 |
align='L',
|
| 192 |
fill=is_header,
|
| 193 |
+
max_line_height=styles['table']['row_height'],
|
| 194 |
+
new_x=XPos.LEFT
|
| 195 |
)
|
| 196 |
|
| 197 |
+
# Move the cursor to the beginning of the next line
|
| 198 |
pdf.set_xy(pdf.l_margin, start_y + row_height)
|
| 199 |
+
|
| 200 |
+
# Parse the HTML content line by line
|
| 201 |
current_table = []
|
| 202 |
in_table = False
|
| 203 |
|
| 204 |
for line in html_content.split('\n'):
|
| 205 |
line = line.strip()
|
|
|
|
|
|
|
| 206 |
if line.startswith('<table>'):
|
| 207 |
in_table = True
|
| 208 |
current_table = []
|
| 209 |
elif line.startswith('</table>'):
|
| 210 |
in_table = False
|
| 211 |
if current_table:
|
| 212 |
+
# Check if first row contains header cells
|
| 213 |
header = current_table[0] if any('<th>' in row for row in current_table[:1]) else []
|
| 214 |
if header:
|
| 215 |
render_table_row(header, is_header=True)
|
| 216 |
+
current_table = current_table[1:]
|
|
|
|
|
|
|
| 217 |
for row in current_table:
|
| 218 |
render_table_row(row)
|
| 219 |
+
pdf.ln(5)
|
| 220 |
current_table = []
|
| 221 |
elif in_table and line.startswith('<tr>'):
|
|
|
|
| 222 |
cells = []
|
| 223 |
+
# Remove the <tr> and </tr> tags and split cells on </td>
|
| 224 |
+
for cell in line[4:-5].split('</td>')[:-1]:
|
| 225 |
clean_cell = cell.replace('<td>', '').replace('<th>', '').strip()
|
| 226 |
cells.append(clean_cell)
|
| 227 |
current_table.append(cells)
|
|
|
|
|
|
|
| 228 |
elif line.startswith('<h1>'):
|
| 229 |
pdf.set_font(base_font, 'B', styles['h1']['size'])
|
| 230 |
pdf.set_text_color(*styles['h1']['color'])
|
|
|
|
| 240 |
pdf.set_text_color(*styles['h3']['color'])
|
| 241 |
pdf.cell(0, 10, line[4:-5], ln=1)
|
| 242 |
pdf.ln(2)
|
|
|
|
|
|
|
| 243 |
elif line.startswith('<li>'):
|
| 244 |
pdf.set_font(base_font, '', styles['body']['size'])
|
| 245 |
pdf.set_text_color(0, 0, 0)
|
| 246 |
pdf.cell(10, 6, '•')
|
| 247 |
+
pdf.multi_cell(0, 6, line[4:-5].strip(), new_x=XPos.LEFT)
|
|
|
|
|
|
|
| 248 |
elif line.startswith('<p>'):
|
| 249 |
pdf.set_font(base_font, '', styles['body']['size'])
|
| 250 |
pdf.set_text_color(0, 0, 0)
|
| 251 |
+
pdf.multi_cell(0, 6, line[3:-4].strip(), new_x=XPos.LEFT)
|
| 252 |
pdf.ln(4)
|
| 253 |
|
| 254 |
+
# Output PDF to a bytes buffer
|
| 255 |
pdf_buffer = BytesIO()
|
| 256 |
try:
|
| 257 |
pdf_output = pdf.output(dest='S').encode('utf-8')
|
|
|
|
| 260 |
|
| 261 |
pdf_buffer.write(pdf_output)
|
| 262 |
pdf_buffer.seek(0)
|
|
|
|
| 263 |
return pdf_buffer
|
| 264 |
|
| 265 |
def main():
|