import pdfkit
import tempfile
import os
import html
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.formatters import HtmlFormatter
from src.utils import strip_html
import logging
logger = logging.getLogger(__name__)
def syntax_highlight_code(code: str, language: str = "python") -> str:
try:
lexer = get_lexer_by_name(language)
except:
try:
lexer = guess_lexer(code)
except:
lexer = get_lexer_by_name("text")
formatter = HtmlFormatter(style="friendly", cssclass="codehilite")
return highlight(code, lexer, formatter)
def render_chat_to_html(chat_history) -> str:
css = HtmlFormatter(style="friendly").get_style_defs('.codehilite')
html_lines = [f"""
FINESE SCHOOL: Data Science Mentor Session
FINESE SCHOOL: Expert Data Science Session
Session exported on {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""]
for role, content in chat_history:
cls = "user" if "You" in role else "assistant"
clean_content = strip_html(content)
# Handle special content blocks
import re
# Process diagnosis blocks
clean_content = re.sub(r'(.*?)
', r'\1
', clean_content, flags=re.DOTALL)
# Process tip blocks
clean_content = re.sub(r'(.*?)
', r'\1
', clean_content, flags=re.DOTALL)
# Process reference blocks
clean_content = re.sub(r'(.*?)
', r'\1
', clean_content, flags=re.DOTALL)
# Process code blocks
def replace_code_block(match):
code = match.group(1)
return f''
clean_content = re.sub(r'(.*?)
', replace_code_block, clean_content, flags=re.DOTALL)
# Process on-topic warnings
clean_content = re.sub(r'(.*?)
', r'\1
', clean_content, flags=re.DOTALL)
html_lines.append(f'')
html_lines.append("")
return "".join(html_lines)
def export_chat_to_pdf(chat_history) -> bytes:
try:
# Try to configure wkhtmltopdf - fallback to default if not found
try:
config = pdfkit.configuration(wkhtmltopdf="/usr/bin/wkhtmltopdf")
except:
config = None
html_content = render_chat_to_html(chat_history)
with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False, encoding="utf-8") as f:
f.write(html_content)
temp_html = f.name
pdf_path = temp_html.replace(".html", ".pdf")
options = {
'page-size': 'A4',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "UTF-8",
'no-outline': None,
'enable-local-file-access': None,
'quiet': ''
}
try:
if config:
pdfkit.from_file(temp_html, pdf_path, configuration=config, options=options)
else:
pdfkit.from_file(temp_html, pdf_path, options=options)
with open(pdf_path, "rb") as f:
return f.read()
finally:
for path in [temp_html, pdf_path]:
if os.path.exists(path):
os.remove(path)
except Exception as e:
logger.error(f"PDF export failed: {str(e)}")
raise RuntimeError(f"Failed to export PDF: {str(e)}")