import streamlit as st import markdown from weasyprint import HTML, CSS from datetime import datetime import base64 import os import re import zlib import requests from io import BytesIO from PIL import Image # Page config st.set_page_config(page_title="Markdown to PDF", layout="wide", page_icon="📄") # Font options FONTS = { "Open Sans": "Open+Sans:wght@400;600;700", "Montserrat": "Montserrat:wght@400;600;700", "DM Mono": "DM+Mono:wght@400;500", "Anonymous Pro": "Anonymous+Pro:wght@400;700", "Inconsolata": "Inconsolata:wght@400;700" } MONOSPACE_FONTS = ["DM Mono", "Anonymous Pro", "Inconsolata"] def get_css_template(font_name, spacing="normal", font_size=11): """Generate CSS template with selected font and spacing""" is_mono = font_name in MONOSPACE_FONTS code_font = font_name if is_mono else "DM Mono" google_fonts_url = f"https://fonts.googleapis.com/css2?family={FONTS[font_name]}" if not is_mono: google_fonts_url += f"&family={FONTS[code_font]}" # Custom line-height per font type if font_name in MONOSPACE_FONTS: line_height = 1.6 if spacing == "spacious" else 1.4 else: line_height = 1.8 if spacing == "spacious" else 1.6 margin_multiplier = 1.2 if spacing == "spacious" else 1.0 return f""" @import url('{google_fonts_url}'); @page {{ size: A4; margin: {2.5 * margin_multiplier}cm {2 * margin_multiplier}cm; @top-left {{ content: element(header-left); vertical-align: middle; }} @top-right {{ content: element(header-right); vertical-align: middle; text-align: right; }} @bottom-center {{ content: "Page " counter(page) " of " counter(pages); font-family: '{font_name}', sans-serif; font-size: 9pt; color: #666; }} @bottom-right {{ content: "{datetime.now().strftime('%B %d, %Y')}"; font-family: '{font_name}', sans-serif; font-size: 9pt; color: #666; }} }} /* Add extra space after header on pages 2+ */ @page :not(:first) {{ margin-top: {3.5 * margin_multiplier}cm; }} @page :first {{ margin-top: {2.5 * margin_multiplier}cm; }} .header-left {{ position: running(header-left); }} .header-right {{ position: running(header-right); }} .logo-container {{ max-width: 120px; max-height: 40px; display: inline-block; }} .logo-container img {{ max-width: 120px; max-height: 40px; width: auto; height: auto; display: block; }} .title-header {{ font-family: '{font_name}', sans-serif; font-size: 14pt; font-weight: 600; color: #2c3e50; margin: 0; padding: 0; }} body {{ font-family: '{font_name}', sans-serif; font-size: {font_size}pt; line-height: {line_height}; color: #333; margin: 0; padding: 0; }} h1, h2, h3, h4, h5, h6 {{ font-family: '{font_name}', sans-serif; color: #2c3e50; margin-top: {1.5 * margin_multiplier}em; margin-bottom: {0.5 * margin_multiplier}em; page-break-after: avoid; font-weight: 600; }} h1 {{ font-size: {font_size * 2}pt; border-bottom: 2px solid #e0e0e0; padding-bottom: 0.3em; }} h2 {{ font-size: {font_size * 1.6}pt; border-bottom: 1px solid #e0e0e0; padding-bottom: 0.2em; }} h3 {{ font-size: {font_size * 1.3}pt; }} h4 {{ font-size: {font_size * 1.1}pt; }} h5 {{ font-size: {font_size}pt; }} h6 {{ font-size: {font_size * 0.9}pt; color: #666; }} p {{ margin: {0.8 * margin_multiplier}em 0; }} /* List item atomic page breaking */ ul, ol {{ margin: {1 * margin_multiplier}em 0; padding-left: 2em; }} li {{ margin: {0.5 * margin_multiplier}em 0; page-break-inside: avoid; break-inside: avoid; }} /* Prevent orphaned list items */ ul, ol {{ orphans: 3; widows: 3; }} blockquote {{ border-left: 4px solid #3498db; margin: {1.2 * margin_multiplier}em 0; padding: {0.5 * margin_multiplier}em 0 {0.5 * margin_multiplier}em 1em; background: #f8f9fa; font-style: italic; color: #555; page-break-inside: avoid; }} code {{ font-family: '{code_font}', monospace; font-size: {font_size * 0.9}pt; background: #f4f4f4; padding: 0.1em 0.3em; border-radius: 3px; color: #c7254e; }} pre {{ font-family: '{code_font}', monospace; font-size: {font_size * 0.85}pt; background: #f8f8f8; border: 1px solid #ddd; border-radius: 4px; padding: {1 * margin_multiplier}em; overflow-x: auto; line-height: 1.4; page-break-inside: avoid; margin: {1 * margin_multiplier}em 0; }} pre code {{ background: none; padding: 0; color: #333; }} table {{ border-collapse: collapse; width: 100%; margin: {1.2 * margin_multiplier}em 0; page-break-inside: avoid; }} th, td {{ border: 1px solid #ddd; padding: {0.6 * margin_multiplier}em {0.8 * margin_multiplier}em; text-align: left; }} th {{ background: #f5f5f5; font-weight: 600; color: #2c3e50; }} tr:nth-child(even) {{ background: #fafafa; }} img {{ max-width: 100%; height: auto; display: block; margin: {1.2 * margin_multiplier}em 0; page-break-inside: avoid; }} hr {{ border: none; border-top: 1px solid #ddd; margin: {2 * margin_multiplier}em 0; }} a {{ color: #3498db; text-decoration: none; }} a:hover {{ text-decoration: underline; }} /* Mermaid diagram container - fits within one page */ .mermaid-container {{ max-height: 600px; width: 100%; page-break-inside: avoid; break-inside: avoid; margin: {1.5 * margin_multiplier}em 0; text-align: center; background: #fafafa; border: 1px solid #e0e0e0; border-radius: 8px; padding: 1em; box-sizing: border-box; }} .mermaid-container img {{ max-width: 100%; max-height: 550px; width: auto; height: auto; object-fit: contain; display: block; margin: 0 auto; }} .mermaid-error {{ padding: 1em; background: #fff3cd; border: 1px solid #ffc107; border-radius: 4px; color: #856404; margin: 1em 0; }} /* Syntax highlighting for code blocks - Complete Pygments style */ .codehilite .hll {{ background-color: #ffffcc }} .codehilite .c {{ color: #008000; font-style: italic }} /* Comment */ .codehilite .err {{ border: 1px solid #FF0000 }} /* Error */ .codehilite .k {{ color: #0000ff; font-weight: bold }} /* Keyword */ .codehilite .o {{ color: #666666 }} /* Operator */ .codehilite .ch {{ color: #008000; font-style: italic }} /* Comment.Hashbang */ .codehilite .cm {{ color: #008000; font-style: italic }} /* Comment.Multiline */ .codehilite .cp {{ color: #0000ff }} /* Comment.Preproc */ .codehilite .cpf {{ color: #008000; font-style: italic }} /* Comment.PreprocFile */ .codehilite .c1 {{ color: #008000; font-style: italic }} /* Comment.Single */ .codehilite .cs {{ color: #008000; font-style: italic }} /* Comment.Special */ .codehilite .gd {{ color: #A00000 }} /* Generic.Deleted */ .codehilite .ge {{ font-style: italic }} /* Generic.Emph */ .codehilite .gr {{ color: #FF0000 }} /* Generic.Error */ .codehilite .gh {{ color: #000080; font-weight: bold }} /* Generic.Heading */ .codehilite .gi {{ color: #00A000 }} /* Generic.Inserted */ .codehilite .go {{ color: #888888 }} /* Generic.Output */ .codehilite .gp {{ color: #000080; font-weight: bold }} /* Generic.Prompt */ .codehilite .gs {{ font-weight: bold }} /* Generic.Strong */ .codehilite .gu {{ color: #800080; font-weight: bold }} /* Generic.Subheading */ .codehilite .gt {{ color: #0044DD }} /* Generic.Traceback */ .codehilite .kc {{ color: #0000ff; font-weight: bold }} /* Keyword.Constant */ .codehilite .kd {{ color: #0000ff; font-weight: bold }} /* Keyword.Declaration */ .codehilite .kn {{ color: #0000ff; font-weight: bold }} /* Keyword.Namespace */ .codehilite .kp {{ color: #0000ff }} /* Keyword.Pseudo */ .codehilite .kr {{ color: #0000ff; font-weight: bold }} /* Keyword.Reserved */ .codehilite .kt {{ color: #2b91af }} /* Keyword.Type */ .codehilite .m {{ color: #009999 }} /* Literal.Number */ .codehilite .s {{ color: #a31515 }} /* Literal.String */ .codehilite .na {{ color: #FF0000 }} /* Name.Attribute */ .codehilite .nb {{ color: #0086B3 }} /* Name.Builtin */ .codehilite .nc {{ color: #2b91af; font-weight: bold }} /* Name.Class */ .codehilite .no {{ color: #008080 }} /* Name.Constant */ .codehilite .nd {{ color: #AA22FF }} /* Name.Decorator */ .codehilite .ni {{ color: #999999; font-weight: bold }} /* Name.Entity */ .codehilite .ne {{ color: #D2413A; font-weight: bold }} /* Name.Exception */ .codehilite .nf {{ color: #000000; font-weight: bold }} /* Name.Function */ .codehilite .nl {{ color: #A0A000 }} /* Name.Label */ .codehilite .nn {{ color: #0000FF; font-weight: bold }} /* Name.Namespace */ .codehilite .nt {{ color: #0000ff }} /* Name.Tag */ .codehilite .nv {{ color: #008080 }} /* Name.Variable */ .codehilite .ow {{ color: #0000ff; font-weight: bold }} /* Operator.Word */ .codehilite .w {{ color: #bbbbbb }} /* Text.Whitespace */ .codehilite .mb {{ color: #009999 }} /* Literal.Number.Bin */ .codehilite .mf {{ color: #009999 }} /* Literal.Number.Float */ .codehilite .mh {{ color: #009999 }} /* Literal.Number.Hex */ .codehilite .mi {{ color: #009999 }} /* Literal.Number.Integer */ .codehilite .mo {{ color: #009999 }} /* Literal.Number.Oct */ .codehilite .sa {{ color: #a31515 }} /* Literal.String.Affix */ .codehilite .sb {{ color: #a31515 }} /* Literal.String.Backtick */ .codehilite .sc {{ color: #a31515 }} /* Literal.String.Char */ .codehilite .dl {{ color: #a31515 }} /* Literal.String.Delimiter */ .codehilite .sd {{ color: #a31515; font-style: italic }} /* Literal.String.Doc */ .codehilite .s2 {{ color: #a31515 }} /* Literal.String.Double */ .codehilite .se {{ color: #a31515; font-weight: bold }} /* Literal.String.Escape */ .codehilite .sh {{ color: #a31515 }} /* Literal.String.Heredoc */ .codehilite .si {{ color: #a31515 }} /* Literal.String.Interpol */ .codehilite .sx {{ color: #a31515 }} /* Literal.String.Other */ .codehilite .sr {{ color: #a31515 }} /* Literal.String.Regex */ .codehilite .s1 {{ color: #a31515 }} /* Literal.String.Single */ .codehilite .ss {{ color: #a31515 }} /* Literal.String.Symbol */ .codehilite .bp {{ color: #0086B3 }} /* Name.Builtin.Pseudo */ .codehilite .fm {{ color: #000000; font-weight: bold }} /* Name.Function.Magic */ .codehilite .vc {{ color: #008080 }} /* Name.Variable.Class */ .codehilite .vg {{ color: #008080 }} /* Name.Variable.Global */ .codehilite .vi {{ color: #008080 }} /* Name.Variable.Instance */ .codehilite .vm {{ color: #008080 }} /* Name.Variable.Magic */ .codehilite .il {{ color: #009999 }} /* Literal.Number.Integer.Long */ """ def generate_html(markdown_text, title, font_name, spacing="normal", font_size=11, logo_data=None): """Convert Markdown to styled HTML""" # Process Mermaid blocks first (renders diagrams and keeps code blocks) processed_text = process_mermaid_blocks(markdown_text) # Convert Markdown to HTML with extensions md = markdown.Markdown( extensions=[ 'extra', 'codehilite', 'tables', 'fenced_code', 'nl2br', 'sane_lists' ], extension_configs={ 'codehilite': { 'linenums': False, 'guess_lang': True, 'css_class': 'codehilite', 'use_pygments': True } } ) body_html = md.convert(processed_text) # Handle logo logo_html = "" if logo_data: logo_html = f'
Logo
' css = get_css_template(font_name, spacing, font_size) html = f"""
{logo_html}
{title}
{body_html} """ return html def generate_pdf(html_content): """Generate PDF from HTML using WeasyPrint""" pdf_buffer = BytesIO() HTML(string=html_content).write_pdf(pdf_buffer) pdf_buffer.seek(0) return pdf_buffer def image_to_base64(image_file): """Convert uploaded image to base64 data URL""" try: # Open image with PIL to validate and get format img = Image.open(image_file) # Convert to RGB if necessary (for PNG with transparency) if img.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', img.size, (255, 255, 255)) if img.mode == 'P': img = img.convert('RGBA') background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None) img = background # Save to buffer buffer = BytesIO() img.save(buffer, format='PNG') buffer.seek(0) # Encode to base64 img_base64 = base64.b64encode(buffer.read()).decode() return f"data:image/png;base64,{img_base64}" except Exception as e: st.error(f"Error processing image: {str(e)}") return None def encode_mermaid_for_ink(mermaid_code): """Encode Mermaid code for mermaid.ink API using pako deflate + base64""" # Use zlib to compress (pako compatible) compressed = zlib.compress(mermaid_code.encode('utf-8'), level=9) # Remove zlib header (first 2 bytes) and checksum (last 4 bytes) for raw deflate raw_deflate = compressed[2:-4] # Base64 encode with URL-safe characters encoded = base64.urlsafe_b64encode(raw_deflate).decode('utf-8') return encoded def render_mermaid_to_svg(mermaid_code): """ Render Mermaid code to SVG using mermaid.ink API Returns: SVG content as string, or None on failure """ try: encoded = encode_mermaid_for_ink(mermaid_code) url = f"https://mermaid.ink/svg/pako:{encoded}" response = requests.get(url, timeout=30) if response.status_code == 200: return response.text else: return None except Exception as e: print(f"Mermaid rendering error: {e}") return None def process_mermaid_blocks(markdown_text): """ Parse ::mermaid...:: blocks and replace with rendered SVG + code block Returns: modified markdown/HTML hybrid with embedded diagrams """ # Pattern to match ::mermaid\n...\n:: pattern = r'::mermaid\n(.*?)\n::' def replace_mermaid(match): mermaid_code = match.group(1).strip() # Render to SVG svg_content = render_mermaid_to_svg(mermaid_code) if svg_content: # Embed SVG as base64 data URL svg_base64 = base64.b64encode(svg_content.encode('utf-8')).decode('utf-8') img_tag = f'
Mermaid Diagram
' else: img_tag = '
⚠️ Failed to render Mermaid diagram
' # Create code block for raw mermaid code (will be processed by markdown later) code_block = f'\n```mermaid\n{mermaid_code}\n```\n' # Return both: image first, then code block return f'\n{img_tag}\n{code_block}' # Process all mermaid blocks result = re.sub(pattern, replace_mermaid, markdown_text, flags=re.DOTALL) return result # Streamlit UI st.title("📄 Markdown to PDF Converter") st.markdown("Convert your Markdown documents into beautifully formatted PDFs with syntax highlighting") # Sidebar for settings with st.sidebar: st.header("⚙️ Settings") # Logo upload st.subheader("🖼️ Logo") logo_file = st.file_uploader( "Upload Logo (PNG, JPG, JPEG)", type=['png', 'jpg', 'jpeg'], help="Recommended size: 240x80px. Will be scaled to fit 120x40px in header." ) # Show logo preview if uploaded if logo_file: st.image(logo_file, caption="Logo Preview", width=150) else: # Check if logo.png exists in directory if os.path.exists('./logo.png'): st.info("📌 Using default logo.png from directory") with open('./logo.png', 'rb') as f: st.image(f, caption="Default Logo", width=150) st.divider() st.subheader("🎨 Typography") selected_font = st.selectbox( "Font Family", options=list(FONTS.keys()), index=0, help="Choose from 5 curated Google Fonts" ) font_size = st.slider( "Font Size (pt)", min_value=9, max_value=14, value=11, step=1, help="Base font size for body text" ) spacing = st.radio( "Spacing", options=["normal", "spacious"], index=0, help="Line height and margin density" ) st.divider() st.caption("💡 Tip: Use ``` with language for syntax highlighting") # Main content area col1, col2 = st.columns([1, 1]) with col1: st.subheader("📝 Input") title = st.text_input( "Document Title", value="My Document", max_chars=100, help="Appears in the header of each page" ) # Markdown input tabs input_tab1, input_tab2 = st.tabs(["✍️ Write Markdown", "📁 Upload File"]) with input_tab1: markdown_text = st.text_area( "Markdown Content", value="""# Welcome to Markdown PDF This is a **sample** document with **syntax highlighting**. ## Features - Beautiful typography - Professional layout - Code highlighting with Pygments - Atomic list item page breaks ## Code Example ```python def fibonacci(n): \"\"\"Calculate Fibonacci number\"\"\" if n <= 1: return n return fibonacci(n-1) + fibonacci(n-2) print(fibonacci(10)) ``` ## JavaScript Example ```javascript const greeting = "Hello, World!"; console.log(greeting); ``` Enjoy! 🎉""", height=400, help="Supports GitHub-Flavored Markdown" ) with input_tab2: uploaded_file = st.file_uploader( "Upload Markdown File", type=['md', 'markdown', 'txt'], help="Upload a .md or .txt file" ) if uploaded_file is not None: markdown_text = uploaded_file.read().decode('utf-8') st.success(f"✅ Loaded {uploaded_file.name}") with col2: st.subheader("👁️ Preview") # Generate HTML preview try: # Simple preview without full styling preview_md = markdown.Markdown(extensions=['extra', 'tables', 'fenced_code', 'codehilite']) preview_html = preview_md.convert(markdown_text) # Display preview with basic styling st.markdown( f'''
{preview_html}
''', unsafe_allow_html=True ) except Exception as e: st.error(f"Preview error: {str(e)}") # Generate PDF button st.divider() col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1]) with col_btn2: if st.button("🎨 Generate PDF", type="primary", use_container_width=True): with st.spinner("Generating PDF with syntax highlighting..."): try: # Process logo logo_data = None if logo_file: logo_data = image_to_base64(logo_file) elif os.path.exists('./logo.png'): with open('./logo.png', 'rb') as f: img_bytes = f.read() img_base64 = base64.b64encode(img_bytes).decode() # Detect format ext = 'png' logo_data = f"data:image/{ext};base64,{img_base64}" # Generate HTML and PDF html_content = generate_html( markdown_text, title, selected_font, spacing, font_size, logo_data ) pdf_buffer = generate_pdf(html_content) st.success("✅ PDF generated successfully!") # Download button filename = f"{title.replace(' ', '_').lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" st.download_button( label="⬇️ Download PDF", data=pdf_buffer, file_name=filename, mime="application/pdf", use_container_width=True ) st.info(f"📄 Filename: `{filename}`") except Exception as e: st.error(f"❌ Error generating PDF: {str(e)}") with st.expander("Show error details"): st.exception(e) # Footer st.divider() col_footer1, col_footer2, col_footer3 = st.columns(3) with col_footer1: st.caption("🚀 Built with Streamlit") with col_footer2: st.caption("📚 Supports GitHub-Flavored Markdown") with col_footer3: st.caption("🎨 Professional PDF output with syntax highlighting")