import pdfkit import tempfile import os import html from pygments import highlight from pygments.lexers import get_lexer_by_name, guess_lexer from pygments.formatters import HtmlFormatter from src.utils import strip_html import logging logger = logging.getLogger(__name__) def syntax_highlight_code(code: str, language: str = "python") -> str: try: lexer = get_lexer_by_name(language) except: try: lexer = guess_lexer(code) except: lexer = get_lexer_by_name("text") formatter = HtmlFormatter(style="friendly", cssclass="codehilite") return highlight(code, lexer, formatter) def render_chat_to_html(chat_history) -> str: css = HtmlFormatter(style="friendly").get_style_defs('.codehilite') html_lines = [f""" FINESE SCHOOL: Data Science Mentor Session

FINESE SCHOOL: Expert Data Science Session

Session exported on {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}


"""] for role, content in chat_history: cls = "user" if "You" in role else "assistant" clean_content = strip_html(content) # Handle special content blocks import re # Process diagnosis blocks clean_content = re.sub(r'
(.*?)
', r'
\1
', clean_content, flags=re.DOTALL) # Process tip blocks clean_content = re.sub(r'
(.*?)
', r'
\1
', clean_content, flags=re.DOTALL) # Process reference blocks clean_content = re.sub(r'
(.*?)
', r'
\1
', clean_content, flags=re.DOTALL) # Process code blocks def replace_code_block(match): code = match.group(1) return f'
{html.escape(code)}
' clean_content = re.sub(r'
(.*?)
', replace_code_block, clean_content, flags=re.DOTALL) # Process on-topic warnings clean_content = re.sub(r'
(.*?)
', r'
\1
', clean_content, flags=re.DOTALL) html_lines.append(f'

{role}

{clean_content}
') html_lines.append("") return "".join(html_lines) def export_chat_to_pdf(chat_history) -> bytes: try: # Try to configure wkhtmltopdf - fallback to default if not found try: config = pdfkit.configuration(wkhtmltopdf="/usr/bin/wkhtmltopdf") except: config = None html_content = render_chat_to_html(chat_history) with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False, encoding="utf-8") as f: f.write(html_content) temp_html = f.name pdf_path = temp_html.replace(".html", ".pdf") options = { 'page-size': 'A4', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'no-outline': None, 'enable-local-file-access': None, 'quiet': '' } try: if config: pdfkit.from_file(temp_html, pdf_path, configuration=config, options=options) else: pdfkit.from_file(temp_html, pdf_path, options=options) with open(pdf_path, "rb") as f: return f.read() finally: for path in [temp_html, pdf_path]: if os.path.exists(path): os.remove(path) except Exception as e: logger.error(f"PDF export failed: {str(e)}") raise RuntimeError(f"Failed to export PDF: {str(e)}")