SmokeScan / pipeline /pdf_generator.py
KinetoLabs's picture
Initial commit: FDAM AI Pipeline v4.0.1
88bdcff
"""PDF Generator using WeasyPrint.
Converts Markdown SOW documents to professional PDF format.
Uses markdown → HTML → PDF pipeline with WeasyPrint.
"""
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import markdown
@dataclass
class PDFResult:
"""Result of PDF generation."""
success: bool
pdf_path: Optional[str]
error_message: Optional[str] = None
file_size_bytes: int = 0
# Professional CSS styling for SOW documents
SOW_CSS = """
@page {
size: letter;
margin: 0.75in;
@top-center {
content: "FDAM Assessment Report";
font-size: 9pt;
color: #666;
}
@bottom-center {
content: "Page " counter(page) " of " counter(pages);
font-size: 9pt;
color: #666;
}
}
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 11pt;
line-height: 1.5;
color: #333;
}
h1 {
font-size: 20pt;
color: #1a1a1a;
border-bottom: 2px solid #0066cc;
padding-bottom: 8px;
margin-top: 0;
}
h2 {
font-size: 14pt;
color: #0066cc;
margin-top: 20px;
border-bottom: 1px solid #ddd;
padding-bottom: 4px;
}
h3 {
font-size: 12pt;
color: #333;
margin-top: 15px;
}
table {
width: 100%;
border-collapse: collapse;
margin: 15px 0;
font-size: 10pt;
}
th {
background-color: #0066cc;
color: white;
padding: 8px 10px;
text-align: left;
font-weight: bold;
}
td {
padding: 6px 10px;
border-bottom: 1px solid #ddd;
}
tr:nth-child(even) {
background-color: #f8f9fa;
}
tr:hover {
background-color: #e9ecef;
}
ul, ol {
margin: 10px 0;
padding-left: 25px;
}
li {
margin: 4px 0;
}
strong {
color: #1a1a1a;
}
code {
background-color: #f4f4f4;
padding: 2px 5px;
border-radius: 3px;
font-size: 10pt;
}
hr {
border: none;
border-top: 1px solid #ddd;
margin: 20px 0;
}
.disclaimer {
background-color: #fff3cd;
border: 1px solid #ffc107;
padding: 12px;
border-radius: 4px;
font-size: 10pt;
margin-top: 20px;
}
em {
color: #666;
}
"""
class PDFGenerator:
"""Generates PDF documents from Markdown using WeasyPrint."""
def __init__(self, custom_css: Optional[str] = None):
"""Initialize PDF generator.
Args:
custom_css: Optional custom CSS to override default styling
"""
self.css = custom_css or SOW_CSS
self._weasyprint_available = None
@property
def weasyprint_available(self) -> bool:
"""Check if WeasyPrint is available."""
if self._weasyprint_available is None:
try:
from weasyprint import HTML
self._weasyprint_available = True
except ImportError:
self._weasyprint_available = False
return self._weasyprint_available
def markdown_to_html(self, markdown_content: str) -> str:
"""Convert Markdown to HTML with styling.
Args:
markdown_content: Markdown text
Returns:
Complete HTML document with CSS
"""
# Convert markdown to HTML
md = markdown.Markdown(
extensions=[
"tables",
"fenced_code",
"toc",
]
)
html_body = md.convert(markdown_content)
# Wrap in complete HTML document with CSS
html = f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
{self.css}
</style>
</head>
<body>
{html_body}
</body>
</html>"""
return html
def generate_pdf(
self,
markdown_content: str,
output_path: Optional[str] = None,
) -> PDFResult:
"""Generate PDF from Markdown content.
Args:
markdown_content: Markdown text to convert
output_path: Optional output file path. If None, uses temp file.
Returns:
PDFResult with success status and file path
"""
if not self.weasyprint_available:
return PDFResult(
success=False,
pdf_path=None,
error_message="WeasyPrint is not installed. Run: pip install weasyprint",
)
try:
from weasyprint import HTML
# Convert markdown to styled HTML
html_content = self.markdown_to_html(markdown_content)
# Determine output path
if output_path is None:
output_file = tempfile.NamedTemporaryFile(
suffix=".pdf",
delete=False,
prefix="SOW_",
)
output_path = output_file.name
output_file.close()
# Generate PDF
HTML(string=html_content).write_pdf(output_path)
# Verify file was created
pdf_path = Path(output_path)
if not pdf_path.exists():
return PDFResult(
success=False,
pdf_path=None,
error_message="PDF file was not created",
)
return PDFResult(
success=True,
pdf_path=str(pdf_path),
file_size_bytes=pdf_path.stat().st_size,
)
except Exception as e:
return PDFResult(
success=False,
pdf_path=None,
error_message=f"PDF generation failed: {str(e)}",
)
def generate_html(
self,
markdown_content: str,
output_path: Optional[str] = None,
) -> tuple[bool, Optional[str], Optional[str]]:
"""Generate HTML from Markdown (fallback if PDF fails).
Args:
markdown_content: Markdown text
output_path: Optional output path
Returns:
Tuple of (success, file_path, error_message)
"""
try:
html_content = self.markdown_to_html(markdown_content)
if output_path is None:
output_file = tempfile.NamedTemporaryFile(
mode="w",
suffix=".html",
delete=False,
prefix="SOW_",
encoding="utf-8",
)
output_path = output_file.name
output_file.write(html_content)
output_file.close()
else:
with open(output_path, "w", encoding="utf-8") as f:
f.write(html_content)
return True, output_path, None
except Exception as e:
return False, None, str(e)
def generate_sow_pdf(
markdown_content: str,
project_name: str,
output_path: Optional[str] = None,
) -> PDFResult:
"""Convenience function to generate SOW PDF.
Args:
markdown_content: SOW markdown content
project_name: Project name for filename
output_path: Optional output path
Returns:
PDFResult with success status
"""
generator = PDFGenerator()
return generator.generate_pdf(
markdown_content=markdown_content,
output_path=output_path,
)