from __future__ import annotations import re from html import escape from io import BytesIO def markdown_to_pdf_bytes(report_markdown: str) -> bytes: from reportlab.lib import colors from reportlab.lib.enums import TA_LEFT from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import inch from reportlab.platypus import ( ListFlowable, ListItem, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle, ) buffer = BytesIO() styles = getSampleStyleSheet() body_style = ParagraphStyle( "ReportBody", parent=styles["BodyText"], fontName="Helvetica", fontSize=10, leading=14, alignment=TA_LEFT, spaceAfter=5, ) title_style = ParagraphStyle( "ReportTitle", parent=styles["Title"], fontName="Helvetica-Bold", fontSize=18, leading=22, alignment=TA_LEFT, spaceAfter=10, ) heading1_style = ParagraphStyle( "ReportHeading1", parent=styles["Heading1"], fontName="Helvetica-Bold", fontSize=15, leading=18, alignment=TA_LEFT, spaceBefore=10, spaceAfter=6, ) heading2_style = ParagraphStyle( "ReportHeading2", parent=styles["Heading2"], fontName="Helvetica-Bold", fontSize=12, leading=15, alignment=TA_LEFT, spaceBefore=7, spaceAfter=4, ) table_header_style = ParagraphStyle( "ReportTableHeader", parent=body_style, fontName="Helvetica-Bold", fontSize=8.5, leading=11, ) table_cell_style = ParagraphStyle( "ReportTableCell", parent=body_style, fontSize=8.5, leading=11, ) doc = SimpleDocTemplate( buffer, pagesize=A4, rightMargin=0.65 * inch, leftMargin=0.65 * inch, topMargin=0.65 * inch, bottomMargin=0.65 * inch, ) story = [] list_items = [] list_type = "bullet" table_rows = [] def flush_list() -> None: nonlocal list_items, list_type if list_items: story.append( ListFlowable( [ListItem(Paragraph(item, body_style)) for item in list_items], bulletType=list_type, bulletFontName="Helvetica", bulletFontSize=7 if list_type == "bullet" else 9, leftIndent=16, bulletIndent=4, itemSpace=3, ) ) story.append(Spacer(1, 6)) list_items = [] list_type = "bullet" def flush_table() -> None: nonlocal table_rows if not table_rows: return if len(table_rows) >= 2 and _is_markdown_table_separator(table_rows[1]): rows = [table_rows[0], *table_rows[2:]] data = [] for row_index, row in enumerate(rows): style = table_header_style if row_index == 0 else table_cell_style data.append( [ Paragraph(_markdown_to_reportlab_text(cell), style) for cell in _split_table_row(row) ] ) if data: col_count = max(len(row) for row in data) for row in data: row.extend( Paragraph("", table_cell_style) for _ in range(col_count - len(row)) ) table = Table(data, repeatRows=1) table.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#f1f5f9")), ("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#111827")), ("GRID", (0, 0), (-1, -1), 0.35, colors.HexColor("#cbd5e1")), ("VALIGN", (0, 0), (-1, -1), "TOP"), ("LEFTPADDING", (0, 0), (-1, -1), 5), ("RIGHTPADDING", (0, 0), (-1, -1), 5), ("TOPPADDING", (0, 0), (-1, -1), 4), ("BOTTOMPADDING", (0, 0), (-1, -1), 4), ] ) ) story.append(table) story.append(Spacer(1, 8)) else: for row in table_rows: story.append(Paragraph(_markdown_to_reportlab_text(row), body_style)) table_rows = [] def append_list_item(item: str, item_type: str) -> None: nonlocal list_items, list_type if list_items and list_type != item_type: flush_list() list_type = item_type list_items.append(item) for raw_line in report_markdown.splitlines(): stripped_line = raw_line.strip() if _is_markdown_table_line(stripped_line): flush_list() table_rows.append(stripped_line) continue flush_table() line = _markdown_to_reportlab_text(stripped_line) if not line: flush_list() story.append(Spacer(1, 5)) continue if line.startswith("- ") or line.startswith("* "): append_list_item(line[2:].strip(), "bullet") continue flush_list() if line.startswith("# "): story.append(Paragraph(line[2:].strip(), title_style)) elif line.startswith("## "): story.append(Paragraph(line[3:].strip(), heading1_style)) elif line.startswith("### "): story.append(Paragraph(line[4:].strip(), heading2_style)) elif set(line) <= {"-", "_", "*"}: story.append(Spacer(1, 8)) else: story.append(Paragraph(line, body_style)) flush_table() flush_list() if not story: raise RuntimeError("No report content available for PDF generation.") doc.build(story) return buffer.getvalue() def _is_markdown_table_line(value: str) -> bool: return value.startswith("|") and value.endswith("|") and value.count("|") >= 2 def _is_markdown_table_separator(value: str) -> bool: cells = _split_table_row(value) return bool(cells) and all( re.fullmatch(r":?-{3,}:?", cell.strip()) for cell in cells ) def _split_table_row(value: str) -> list[str]: return [cell.strip() for cell in value.strip().strip("|").split("|")] def _markdown_to_reportlab_text(value: str) -> str: value = escape(value) value = re.sub(r"^(\d+)\.\s+", r"\1. ", value) value = re.sub(r"`([^`]+)`", r"\1", value) value = re.sub(r"\*\*([^*]+)\*\*", r"\1", value) value = re.sub(r"\*([^*]+)\*", r"\1", value) return re.sub( r"\[([^\]]+)\]\(([^)]+)\)", r'\1', value, )