MarkdownToPDF / src /streamlit_app.py
syedkhalid076's picture
Changed the background of preview section tk be of the same dark color as of other elements.
e5f07d8 verified
import streamlit as st
import markdown
from weasyprint import HTML, CSS
from datetime import datetime
import base64
import os
import re
import zlib
import requests
from io import BytesIO
from PIL import Image
# Page config
st.set_page_config(page_title="Markdown to PDF", layout="wide", page_icon="πŸ“„")
# Font options
FONTS = {
"Open Sans": "Open+Sans:wght@400;600;700",
"Montserrat": "Montserrat:wght@400;600;700",
"DM Mono": "DM+Mono:wght@400;500",
"Anonymous Pro": "Anonymous+Pro:wght@400;700",
"Inconsolata": "Inconsolata:wght@400;700"
}
MONOSPACE_FONTS = ["DM Mono", "Anonymous Pro", "Inconsolata"]
def get_css_template(font_name, spacing="normal", font_size=11):
"""Generate CSS template with selected font and spacing"""
is_mono = font_name in MONOSPACE_FONTS
code_font = font_name if is_mono else "DM Mono"
google_fonts_url = f"https://fonts.googleapis.com/css2?family={FONTS[font_name]}"
if not is_mono:
google_fonts_url += f"&family={FONTS[code_font]}"
# Custom line-height per font type
if font_name in MONOSPACE_FONTS:
line_height = 1.6 if spacing == "spacious" else 1.4
else:
line_height = 1.8 if spacing == "spacious" else 1.6
margin_multiplier = 1.2 if spacing == "spacious" else 1.0
return f"""
@import url('{google_fonts_url}');
@page {{
size: A4;
margin: {2.5 * margin_multiplier}cm {2 * margin_multiplier}cm;
@top-left {{
content: element(header-left);
vertical-align: middle;
}}
@top-right {{
content: element(header-right);
vertical-align: middle;
text-align: right;
}}
@bottom-center {{
content: "Page " counter(page) " of " counter(pages);
font-family: '{font_name}', sans-serif;
font-size: 9pt;
color: #666;
}}
@bottom-right {{
content: "{datetime.now().strftime('%B %d, %Y')}";
font-family: '{font_name}', sans-serif;
font-size: 9pt;
color: #666;
}}
}}
/* Add extra space after header on pages 2+ */
@page :not(:first) {{
margin-top: {3.5 * margin_multiplier}cm;
}}
@page :first {{
margin-top: {2.5 * margin_multiplier}cm;
}}
.header-left {{
position: running(header-left);
}}
.header-right {{
position: running(header-right);
}}
.logo-container {{
max-width: 120px;
max-height: 40px;
display: inline-block;
}}
.logo-container img {{
max-width: 120px;
max-height: 40px;
width: auto;
height: auto;
display: block;
}}
.title-header {{
font-family: '{font_name}', sans-serif;
font-size: 14pt;
font-weight: 600;
color: #2c3e50;
margin: 0;
padding: 0;
}}
body {{
font-family: '{font_name}', sans-serif;
font-size: {font_size}pt;
line-height: {line_height};
color: #333;
margin: 0;
padding: 0;
}}
h1, h2, h3, h4, h5, h6 {{
font-family: '{font_name}', sans-serif;
color: #2c3e50;
margin-top: {1.5 * margin_multiplier}em;
margin-bottom: {0.5 * margin_multiplier}em;
page-break-after: avoid;
font-weight: 600;
}}
h1 {{ font-size: {font_size * 2}pt; border-bottom: 2px solid #e0e0e0; padding-bottom: 0.3em; }}
h2 {{ font-size: {font_size * 1.6}pt; border-bottom: 1px solid #e0e0e0; padding-bottom: 0.2em; }}
h3 {{ font-size: {font_size * 1.3}pt; }}
h4 {{ font-size: {font_size * 1.1}pt; }}
h5 {{ font-size: {font_size}pt; }}
h6 {{ font-size: {font_size * 0.9}pt; color: #666; }}
p {{
margin: {0.8 * margin_multiplier}em 0;
}}
/* List item atomic page breaking */
ul, ol {{
margin: {1 * margin_multiplier}em 0;
padding-left: 2em;
}}
li {{
margin: {0.5 * margin_multiplier}em 0;
page-break-inside: avoid;
break-inside: avoid;
}}
/* Prevent orphaned list items */
ul, ol {{
orphans: 3;
widows: 3;
}}
blockquote {{
border-left: 4px solid #3498db;
margin: {1.2 * margin_multiplier}em 0;
padding: {0.5 * margin_multiplier}em 0 {0.5 * margin_multiplier}em 1em;
background: #f8f9fa;
font-style: italic;
color: #555;
page-break-inside: avoid;
}}
code {{
font-family: '{code_font}', monospace;
font-size: {font_size * 0.9}pt;
background: #f4f4f4;
padding: 0.1em 0.3em;
border-radius: 3px;
color: #c7254e;
}}
pre {{
font-family: '{code_font}', monospace;
font-size: {font_size * 0.85}pt;
background: #f8f8f8;
border: 1px solid #ddd;
border-radius: 4px;
padding: {1 * margin_multiplier}em;
overflow-x: auto;
line-height: 1.4;
page-break-inside: avoid;
margin: {1 * margin_multiplier}em 0;
}}
pre code {{
background: none;
padding: 0;
color: #333;
}}
table {{
border-collapse: collapse;
width: 100%;
margin: {1.2 * margin_multiplier}em 0;
page-break-inside: avoid;
}}
th, td {{
border: 1px solid #ddd;
padding: {0.6 * margin_multiplier}em {0.8 * margin_multiplier}em;
text-align: left;
}}
th {{
background: #f5f5f5;
font-weight: 600;
color: #2c3e50;
}}
tr:nth-child(even) {{
background: #fafafa;
}}
img {{
max-width: 100%;
height: auto;
display: block;
margin: {1.2 * margin_multiplier}em 0;
page-break-inside: avoid;
}}
hr {{
border: none;
border-top: 1px solid #ddd;
margin: {2 * margin_multiplier}em 0;
}}
a {{
color: #3498db;
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
/* Mermaid diagram container - fits within one page */
.mermaid-container {{
max-height: 600px;
width: 100%;
page-break-inside: avoid;
break-inside: avoid;
margin: {1.5 * margin_multiplier}em 0;
text-align: center;
background: #fafafa;
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 1em;
box-sizing: border-box;
}}
.mermaid-container img {{
max-width: 100%;
max-height: 550px;
width: auto;
height: auto;
object-fit: contain;
display: block;
margin: 0 auto;
}}
.mermaid-error {{
padding: 1em;
background: #fff3cd;
border: 1px solid #ffc107;
border-radius: 4px;
color: #856404;
margin: 1em 0;
}}
/* Syntax highlighting for code blocks - Complete Pygments style */
.codehilite .hll {{ background-color: #ffffcc }}
.codehilite .c {{ color: #008000; font-style: italic }} /* Comment */
.codehilite .err {{ border: 1px solid #FF0000 }} /* Error */
.codehilite .k {{ color: #0000ff; font-weight: bold }} /* Keyword */
.codehilite .o {{ color: #666666 }} /* Operator */
.codehilite .ch {{ color: #008000; font-style: italic }} /* Comment.Hashbang */
.codehilite .cm {{ color: #008000; font-style: italic }} /* Comment.Multiline */
.codehilite .cp {{ color: #0000ff }} /* Comment.Preproc */
.codehilite .cpf {{ color: #008000; font-style: italic }} /* Comment.PreprocFile */
.codehilite .c1 {{ color: #008000; font-style: italic }} /* Comment.Single */
.codehilite .cs {{ color: #008000; font-style: italic }} /* Comment.Special */
.codehilite .gd {{ color: #A00000 }} /* Generic.Deleted */
.codehilite .ge {{ font-style: italic }} /* Generic.Emph */
.codehilite .gr {{ color: #FF0000 }} /* Generic.Error */
.codehilite .gh {{ color: #000080; font-weight: bold }} /* Generic.Heading */
.codehilite .gi {{ color: #00A000 }} /* Generic.Inserted */
.codehilite .go {{ color: #888888 }} /* Generic.Output */
.codehilite .gp {{ color: #000080; font-weight: bold }} /* Generic.Prompt */
.codehilite .gs {{ font-weight: bold }} /* Generic.Strong */
.codehilite .gu {{ color: #800080; font-weight: bold }} /* Generic.Subheading */
.codehilite .gt {{ color: #0044DD }} /* Generic.Traceback */
.codehilite .kc {{ color: #0000ff; font-weight: bold }} /* Keyword.Constant */
.codehilite .kd {{ color: #0000ff; font-weight: bold }} /* Keyword.Declaration */
.codehilite .kn {{ color: #0000ff; font-weight: bold }} /* Keyword.Namespace */
.codehilite .kp {{ color: #0000ff }} /* Keyword.Pseudo */
.codehilite .kr {{ color: #0000ff; font-weight: bold }} /* Keyword.Reserved */
.codehilite .kt {{ color: #2b91af }} /* Keyword.Type */
.codehilite .m {{ color: #009999 }} /* Literal.Number */
.codehilite .s {{ color: #a31515 }} /* Literal.String */
.codehilite .na {{ color: #FF0000 }} /* Name.Attribute */
.codehilite .nb {{ color: #0086B3 }} /* Name.Builtin */
.codehilite .nc {{ color: #2b91af; font-weight: bold }} /* Name.Class */
.codehilite .no {{ color: #008080 }} /* Name.Constant */
.codehilite .nd {{ color: #AA22FF }} /* Name.Decorator */
.codehilite .ni {{ color: #999999; font-weight: bold }} /* Name.Entity */
.codehilite .ne {{ color: #D2413A; font-weight: bold }} /* Name.Exception */
.codehilite .nf {{ color: #000000; font-weight: bold }} /* Name.Function */
.codehilite .nl {{ color: #A0A000 }} /* Name.Label */
.codehilite .nn {{ color: #0000FF; font-weight: bold }} /* Name.Namespace */
.codehilite .nt {{ color: #0000ff }} /* Name.Tag */
.codehilite .nv {{ color: #008080 }} /* Name.Variable */
.codehilite .ow {{ color: #0000ff; font-weight: bold }} /* Operator.Word */
.codehilite .w {{ color: #bbbbbb }} /* Text.Whitespace */
.codehilite .mb {{ color: #009999 }} /* Literal.Number.Bin */
.codehilite .mf {{ color: #009999 }} /* Literal.Number.Float */
.codehilite .mh {{ color: #009999 }} /* Literal.Number.Hex */
.codehilite .mi {{ color: #009999 }} /* Literal.Number.Integer */
.codehilite .mo {{ color: #009999 }} /* Literal.Number.Oct */
.codehilite .sa {{ color: #a31515 }} /* Literal.String.Affix */
.codehilite .sb {{ color: #a31515 }} /* Literal.String.Backtick */
.codehilite .sc {{ color: #a31515 }} /* Literal.String.Char */
.codehilite .dl {{ color: #a31515 }} /* Literal.String.Delimiter */
.codehilite .sd {{ color: #a31515; font-style: italic }} /* Literal.String.Doc */
.codehilite .s2 {{ color: #a31515 }} /* Literal.String.Double */
.codehilite .se {{ color: #a31515; font-weight: bold }} /* Literal.String.Escape */
.codehilite .sh {{ color: #a31515 }} /* Literal.String.Heredoc */
.codehilite .si {{ color: #a31515 }} /* Literal.String.Interpol */
.codehilite .sx {{ color: #a31515 }} /* Literal.String.Other */
.codehilite .sr {{ color: #a31515 }} /* Literal.String.Regex */
.codehilite .s1 {{ color: #a31515 }} /* Literal.String.Single */
.codehilite .ss {{ color: #a31515 }} /* Literal.String.Symbol */
.codehilite .bp {{ color: #0086B3 }} /* Name.Builtin.Pseudo */
.codehilite .fm {{ color: #000000; font-weight: bold }} /* Name.Function.Magic */
.codehilite .vc {{ color: #008080 }} /* Name.Variable.Class */
.codehilite .vg {{ color: #008080 }} /* Name.Variable.Global */
.codehilite .vi {{ color: #008080 }} /* Name.Variable.Instance */
.codehilite .vm {{ color: #008080 }} /* Name.Variable.Magic */
.codehilite .il {{ color: #009999 }} /* Literal.Number.Integer.Long */
"""
def generate_html(markdown_text, title, font_name, spacing="normal", font_size=11, logo_data=None):
"""Convert Markdown to styled HTML"""
# Process Mermaid blocks first (renders diagrams and keeps code blocks)
processed_text = process_mermaid_blocks(markdown_text)
# Convert Markdown to HTML with extensions
md = markdown.Markdown(
extensions=[
'extra',
'codehilite',
'tables',
'fenced_code',
'nl2br',
'sane_lists'
],
extension_configs={
'codehilite': {
'linenums': False,
'guess_lang': True,
'css_class': 'codehilite',
'use_pygments': True
}
}
)
body_html = md.convert(processed_text)
# Handle logo
logo_html = ""
if logo_data:
logo_html = f'<div class="logo-container"><img src="{logo_data}" alt="Logo" /></div>'
css = get_css_template(font_name, spacing, font_size)
html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>{css}</style>
</head>
<body>
<div class="header-left">
{logo_html}
</div>
<div class="header-right">
<div class="title-header">{title}</div>
</div>
{body_html}
</body>
</html>
"""
return html
def generate_pdf(html_content):
"""Generate PDF from HTML using WeasyPrint"""
pdf_buffer = BytesIO()
HTML(string=html_content).write_pdf(pdf_buffer)
pdf_buffer.seek(0)
return pdf_buffer
def image_to_base64(image_file):
"""Convert uploaded image to base64 data URL"""
try:
# Open image with PIL to validate and get format
img = Image.open(image_file)
# Convert to RGB if necessary (for PNG with transparency)
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = background
# Save to buffer
buffer = BytesIO()
img.save(buffer, format='PNG')
buffer.seek(0)
# Encode to base64
img_base64 = base64.b64encode(buffer.read()).decode()
return f"data:image/png;base64,{img_base64}"
except Exception as e:
st.error(f"Error processing image: {str(e)}")
return None
def encode_mermaid_for_ink(mermaid_code):
"""Encode Mermaid code for mermaid.ink API using pako deflate + base64"""
# Use zlib to compress (pako compatible)
compressed = zlib.compress(mermaid_code.encode('utf-8'), level=9)
# Remove zlib header (first 2 bytes) and checksum (last 4 bytes) for raw deflate
raw_deflate = compressed[2:-4]
# Base64 encode with URL-safe characters
encoded = base64.urlsafe_b64encode(raw_deflate).decode('utf-8')
return encoded
def render_mermaid_to_svg(mermaid_code):
"""
Render Mermaid code to SVG using mermaid.ink API
Returns: SVG content as string, or None on failure
"""
try:
encoded = encode_mermaid_for_ink(mermaid_code)
url = f"https://mermaid.ink/svg/pako:{encoded}"
response = requests.get(url, timeout=30)
if response.status_code == 200:
return response.text
else:
return None
except Exception as e:
print(f"Mermaid rendering error: {e}")
return None
def process_mermaid_blocks(markdown_text):
"""
Parse ::mermaid...:: blocks and replace with rendered SVG + code block
Returns: modified markdown/HTML hybrid with embedded diagrams
"""
# Pattern to match ::mermaid\n...\n::
pattern = r'::mermaid\n(.*?)\n::'
def replace_mermaid(match):
mermaid_code = match.group(1).strip()
# Render to SVG
svg_content = render_mermaid_to_svg(mermaid_code)
if svg_content:
# Embed SVG as base64 data URL
svg_base64 = base64.b64encode(svg_content.encode('utf-8')).decode('utf-8')
img_tag = f'<div class="mermaid-container"><img src="data:image/svg+xml;base64,{svg_base64}" alt="Mermaid Diagram" /></div>'
else:
img_tag = '<div class="mermaid-error">⚠️ Failed to render Mermaid diagram</div>'
# Create code block for raw mermaid code (will be processed by markdown later)
code_block = f'\n```mermaid\n{mermaid_code}\n```\n'
# Return both: image first, then code block
return f'\n{img_tag}\n{code_block}'
# Process all mermaid blocks
result = re.sub(pattern, replace_mermaid, markdown_text, flags=re.DOTALL)
return result
# Streamlit UI
st.title("πŸ“„ Markdown to PDF Converter")
st.markdown("Convert your Markdown documents into beautifully formatted PDFs with syntax highlighting")
# Sidebar for settings
with st.sidebar:
st.header("βš™οΈ Settings")
# Logo upload
st.subheader("πŸ–ΌοΈ Logo")
logo_file = st.file_uploader(
"Upload Logo (PNG, JPG, JPEG)",
type=['png', 'jpg', 'jpeg'],
help="Recommended size: 240x80px. Will be scaled to fit 120x40px in header."
)
# Show logo preview if uploaded
if logo_file:
st.image(logo_file, caption="Logo Preview", width=150)
else:
# Check if logo.png exists in directory
if os.path.exists('./logo.png'):
st.info("πŸ“Œ Using default logo.png from directory")
with open('./logo.png', 'rb') as f:
st.image(f, caption="Default Logo", width=150)
st.divider()
st.subheader("🎨 Typography")
selected_font = st.selectbox(
"Font Family",
options=list(FONTS.keys()),
index=0,
help="Choose from 5 curated Google Fonts"
)
font_size = st.slider(
"Font Size (pt)",
min_value=9,
max_value=14,
value=11,
step=1,
help="Base font size for body text"
)
spacing = st.radio(
"Spacing",
options=["normal", "spacious"],
index=0,
help="Line height and margin density"
)
st.divider()
st.caption("πŸ’‘ Tip: Use ``` with language for syntax highlighting")
# Main content area
col1, col2 = st.columns([1, 1])
with col1:
st.subheader("πŸ“ Input")
title = st.text_input(
"Document Title",
value="My Document",
max_chars=100,
help="Appears in the header of each page"
)
# Markdown input tabs
input_tab1, input_tab2 = st.tabs(["✍️ Write Markdown", "πŸ“ Upload File"])
with input_tab1:
markdown_text = st.text_area(
"Markdown Content",
value="""# Welcome to Markdown PDF
This is a **sample** document with **syntax highlighting**.
## Features
- Beautiful typography
- Professional layout
- Code highlighting with Pygments
- Atomic list item page breaks
## Code Example
```python
def fibonacci(n):
\"\"\"Calculate Fibonacci number\"\"\"
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
print(fibonacci(10))
```
## JavaScript Example
```javascript
const greeting = "Hello, World!";
console.log(greeting);
```
Enjoy! πŸŽ‰""",
height=400,
help="Supports GitHub-Flavored Markdown"
)
with input_tab2:
uploaded_file = st.file_uploader(
"Upload Markdown File",
type=['md', 'markdown', 'txt'],
help="Upload a .md or .txt file"
)
if uploaded_file is not None:
markdown_text = uploaded_file.read().decode('utf-8')
st.success(f"βœ… Loaded {uploaded_file.name}")
with col2:
st.subheader("πŸ‘οΈ Preview")
# Generate HTML preview
try:
# Simple preview without full styling
preview_md = markdown.Markdown(extensions=['extra', 'tables', 'fenced_code', 'codehilite'])
preview_html = preview_md.convert(markdown_text)
# Display preview with basic styling
st.markdown(
f'''<div style="border: 1px solid #181818; padding: 20px; background: #262631;
max-height: 500px; overflow-y: auto; border-radius: 5px;">
{preview_html}
</div>''',
unsafe_allow_html=True
)
except Exception as e:
st.error(f"Preview error: {str(e)}")
# Generate PDF button
st.divider()
col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1])
with col_btn2:
if st.button("🎨 Generate PDF", type="primary", use_container_width=True):
with st.spinner("Generating PDF with syntax highlighting..."):
try:
# Process logo
logo_data = None
if logo_file:
logo_data = image_to_base64(logo_file)
elif os.path.exists('./logo.png'):
with open('./logo.png', 'rb') as f:
img_bytes = f.read()
img_base64 = base64.b64encode(img_bytes).decode()
# Detect format
ext = 'png'
logo_data = f"data:image/{ext};base64,{img_base64}"
# Generate HTML and PDF
html_content = generate_html(
markdown_text,
title,
selected_font,
spacing,
font_size,
logo_data
)
pdf_buffer = generate_pdf(html_content)
st.success("βœ… PDF generated successfully!")
# Download button
filename = f"{title.replace(' ', '_').lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
st.download_button(
label="⬇️ Download PDF",
data=pdf_buffer,
file_name=filename,
mime="application/pdf",
use_container_width=True
)
st.info(f"πŸ“„ Filename: `{filename}`")
except Exception as e:
st.error(f"❌ Error generating PDF: {str(e)}")
with st.expander("Show error details"):
st.exception(e)
# Footer
st.divider()
col_footer1, col_footer2, col_footer3 = st.columns(3)
with col_footer1:
st.caption("πŸš€ Built with Streamlit")
with col_footer2:
st.caption("πŸ“š Supports GitHub-Flavored Markdown")
with col_footer3:
st.caption("🎨 Professional PDF output with syntax highlighting")