markdown2png / app.py
broadfield-dev's picture
Update app.py
58d9915 verified
from flask import Flask, request, render_template_string, send_file, jsonify
import markdown
import imgkit
import os
import traceback
from io import BytesIO
import re
import base64
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.styles import get_all_styles, get_style_by_name
app = Flask(__name__)
TEMP_DIR = "/tmp/markdown_temp"
os.makedirs(TEMP_DIR, exist_ok=True)
# --- 1. PARSING LOGIC ---
def parse_repo2markdown(text):
components = []
# Extract File Structure
struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:|\Z)', text)
if struct_match:
components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()})
# Extract Individual Files
pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE)
for match in pattern.finditer(text):
filename = match.group(1).strip()
content = match.group(2).strip()
# FIX: Do not remove the wrapping code blocks.
# Keep 'content' as it is so the Markdown renderer sees the backticks.
components.append({'type': 'file', 'filename': filename, 'content': content})
return components
def parse_standard_readme(text):
components = []
parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
if parts[0].strip():
components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()})
for i in range(1, len(parts), 2):
components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
return components
def parse_changelog(text):
components = []
parts = re.split(r'^(## \[\d+\.\d+\.\d+.*?\].*?)$', text, flags=re.MULTILINE)
if parts[0].strip():
components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()})
for i in range(1, len(parts), 2):
components.append({'type': 'version', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
return components
def parse_agent_action(text):
components = []
action_pattern = re.compile(r'^### HF_ACTION: (.*)$', re.MULTILINE)
for match in action_pattern.finditer(text):
components.append({'type': 'action', 'filename': 'Agent Command', 'content': match.group(1).strip()})
file_pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\n## File Structure|\n### HF_ACTION:|\Z)', re.MULTILINE)
for match in file_pattern.finditer(text):
components.append({'type': 'file', 'filename': match.group(1).strip(), 'content': match.group(2).strip()})
return components
# --- 2. HTML GENERATION & STYLING ---
def build_full_html(markdown_text, styles, for_image=False):
wrapper_id = "#output-wrapper"
font_family = styles.get('font_family', "sans-serif")
# 1. Google Fonts (Only for Web Preview to avoid network errors in image gen)
google_font_link = ""
if not for_image and "sans-serif" not in font_family and "monospace" not in font_family:
clean_font_name = font_family.split(',')[0].strip("'\"")
google_font_link = f'<link href="https://fonts.googleapis.com/css2?family={clean_font_name.replace(" ", "+")}:wght@400;700&display=swap" rel="stylesheet">'
# 2. Syntax Highlighting Logic
highlight_theme = styles.get('highlight_theme', 'monokai')
# Get the background color from the theme, or default to light/dark gray
try:
style_obj = get_style_by_name(highlight_theme)
bg_color = style_obj.background_color
except:
bg_color = "#272822" if highlight_theme == 'monokai' else "#f6f8fa"
# Generate Pygments CSS definitions
pygments_css = ""
if highlight_theme != 'none':
try:
formatter = HtmlFormatter(style=highlight_theme)
# We scope this to our wrapper to avoid global pollution
pygments_css = formatter.get_style_defs(f'{wrapper_id} .codehilite')
except:
pygments_css = ""
# 3. CSS Construction
scoped_css = f"""
body {{ background-color: {styles.get('background_color', '#ffffff')}; margin: 0; padding: 0; }}
{wrapper_id} {{
font-family: {font_family};
font-size: {styles.get('font_size', '16')}px;
line-height: {styles.get('line_height', '1.6')};
color: {styles.get('text_color', '#333')};
background-color: {styles.get('background_color', '#fff')};
padding: {styles.get('page_padding', '40')}px;
}}
/* Table Styling */
{wrapper_id} table {{ border-collapse: collapse; width: 100%; margin-bottom: 1em; }}
{wrapper_id} th, {wrapper_id} td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
{wrapper_id} th {{ background-color: #f8f8f8; font-weight: bold; }}
/* Headers */
{wrapper_id} h1, {wrapper_id} h2, {wrapper_id} h3 {{ border-bottom: 1px solid #eee; padding-bottom: 5px; margin-top: 1.5em; }}
/* --- CODE BOX STYLING (FIXED) --- */
/* Apply box styling to ALL pre tags (plain or highlighted) */
{wrapper_id} pre {{
background-color: {bg_color}; /* Theme bg or default */
color: {styles.get('text_color', '#333')};
padding: {styles.get('code_padding', '15')}px;
border-radius: 6px;
border: 1px solid rgba(0,0,0,0.1);
overflow-x: auto;
margin: 1em 0;
line-height: 1.45;
}}
/* Ensure code inside pre doesn't double-pad */
{wrapper_id} pre code {{
background-color: transparent;
padding: 0;
border: none;
font-family: 'Fira Code', 'Consolas', 'Monaco', monospace;
font-size: 0.9em;
}}
/* Specific override for Pygments container if it exists */
{wrapper_id} .codehilite {{
background-color: {bg_color};
border-radius: 6px;
margin: 1em 0;
}}
{wrapper_id} .codehilite pre {{
margin: 0;
border: none; /* Let container handle border if needed */
}}
/* Syntax Highlighting Colors */
{pygments_css}
/* Custom User Overrides */
{styles.get('custom_css', '')}
"""
# 4. Render Markdown
# We use 'fenced_code' to catch ``` blocks and 'codehilite' to color them.
# 'guess_lang=False' prevents Pygments from crashing on unknown langs.
html_content = markdown.markdown(
markdown_text,
extensions=['fenced_code', 'tables', 'codehilite', 'nl2br'],
extension_configs={
'codehilite': {
'css_class': 'codehilite',
'guess_lang': False,
'noclasses': False
}
}
)
return f"""
<!DOCTYPE html>
<html>
<head>
<meta charset='UTF-8'>
{google_font_link}
<style>{scoped_css}</style>
</head>
<body>
<div id='output-wrapper'>{html_content}</div>
</body>
</html>
"""
# --- 3. FLASK ROUTES ---
@app.route('/parse', methods=['POST'])
def parse_endpoint():
text = request.form.get('markdown_text', '')
try:
if "## File Structure" in text and "### File:" in text:
format_name, components = "Repo2Markdown", parse_repo2markdown(text)
elif re.search(r'^### HF_ACTION:', text, flags=re.MULTILINE):
format_name, components = "Agent Action", parse_agent_action(text)
elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE):
format_name, components = "Changelog", parse_changelog(text)
elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE):
format_name, components = "Standard README", parse_standard_readme(text)
else:
format_name, components = "Plain Markdown", [{'type': 'text', 'filename': 'Full Content', 'content': text}]
return jsonify({'format': format_name, 'components': components})
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
@app.route('/convert', methods=['POST'])
def convert_endpoint():
data = request.json
try:
styles = data.get('styles', {})
# Dynamic width from user input, defaulting to 1024
target_width = styles.get('img_width', 1024)
# 1. Determine the HTML for the image
if data.get('current_html') and data.get('current_html').strip():
# Wrap the preview content to ensure styles are applied
image_html = f"<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>{data['current_html']}</body></html>"
else:
image_html = build_full_html(data.get('markdown_text', ''), styles, for_image=True)
preview_html = build_full_html(data.get('markdown_text', ''), styles, for_image=False)
# 2. Update Wkhtmltopdf Options
options = {
"quiet": "",
"encoding": "UTF-8",
"width": target_width, # APPLY THE DYNAMIC WIDTH HERE
"disable-smart-width": "",
"enable-local-file-access": "",
"disable-javascript": "",
"load-error-handling": "ignore",
"load-media-error-handling": "ignore"
}
if data.get('download', False):
if data.get('download_type') == 'html':
return send_file(BytesIO(preview_html.encode("utf-8")), as_attachment=True, download_name="output.html", mimetype="text/html")
png_bytes = imgkit.from_string(image_html, False, options=options)
return send_file(BytesIO(png_bytes), as_attachment=True, download_name="output.png", mimetype="image/png")
try:
png_bytes = imgkit.from_string(image_html, False, options=options)
b64_img = base64.b64encode(png_bytes).decode('utf-8')
except Exception:
return jsonify({'preview_html': preview_html, 'preview_png_base64': None, 'warning': 'Image generation failed.'})
return jsonify({'preview_html': preview_html, 'preview_png_base64': b64_img})
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
@app.route('/')
def index():
return render_template_string("""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"><title>Intelligent Markdown Converter</title>
<style>
:root { --bg: #f4f7f6; --text: #333; --card: #fff; --border: #ddd; --primary: #5a32a3; }
body.dark-mode { --bg: #1a1a1a; --text: #eee; --card: #252525; --border: #444; }
body { font-family: 'Inter', sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; background: var(--bg); color: var(--text); transition: background 0.3s; }
fieldset { border: 1px solid var(--border); background: var(--card); padding: 20px; margin-bottom: 25px; border-radius: 12px; }
legend { font-weight: bold; padding: 0 10px; color: var(--primary); }
textarea { width: 100%; border-radius: 6px; padding: 12px; border: 1px solid var(--border); background: var(--card); color: var(--text); font-family: 'Fira Code', monospace; box-sizing: border-box; }
.format-banner { background: var(--primary); color: white; padding: 6px 16px; border-radius: 20px; font-size: 13px; display: inline-block; margin-bottom: 15px; font-weight: bold; }
.style-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 15px; }
.comp-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 15px; }
.comp-card { background: var(--card); border: 1px solid var(--border); padding: 12px; border-radius: 8px; }
.comp-card textarea { height: 60px; font-size: 11px; margin-top: 8px; opacity: 0.8; pointer-events: none; resize: none; }
.action-bar { display: flex; gap: 15px; margin-top: 20px; }
button { padding: 12px 24px; cursor: pointer; border: none; border-radius: 6px; font-weight: bold; }
.btn-primary { background: var(--primary); color: #fff; }
.btn-secondary { background: #333; color: #fff; border: 1px solid #555; }
.btn-download { background: #28a745; color: white; font-size: 12px; padding: 5px 10px; margin-left: 10px;}
.preview-section { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 30px; }
.preview-box { background: #fff; border: 1px solid #ddd; border-radius: 8px; height: 600px; display: flex; flex-direction: column; overflow: hidden; }
.preview-header { background: #eee; padding: 10px; display: flex; justify-content: space-between; align-items: center; color: #333; font-weight: bold; border-bottom: 1px solid #ddd; }
.preview-content { flex: 1; overflow: auto; padding: 15px; }
img { max-width: 100%; height: auto; border: 1px solid #eee; }
</style>
</head>
<body>
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:20px;">
<h1>Markdown Tool</h1>
<button onclick="document.body.classList.toggle('dark-mode')" class="btn-secondary">🌓 Dark Mode</button>
</div>
<fieldset>
<legend>1. Input</legend>
<textarea id="md-input" rows="8"></textarea>
<div class="action-bar"><button onclick="analyze()" class="btn-primary" id="load-btn">Analyze Content</button></div>
</fieldset>
<div id="comp-section" style="display:none;">
<div id="detected-format" class="format-banner"></div>
<fieldset>
<legend>2. Components</legend>
<div class="comp-grid" id="comp-list"></div>
</fieldset>
</div>
<fieldset>
<legend>3. Styles</legend>
<div class="style-grid">
<div><label>Font</label><select id="f_family">
<option value="sans-serif">Sans-Serif</option>
<option value="'Inter', sans-serif">Inter</option>
<option value="monospace">Monospace</option>
<option value="serif">Serif</option>
</select></div>
<div><label>Size</label><input type="number" id="f_size" value="16"></div>
<div><label>Height</label><input type="number" id="l_height" value="1.6" step="0.1"></div>
<div><label>Image Width (px)</label><input type="number" id="img_width" value="1024" min="400" max="3000"></div>
<div><label>Text</label><input type="color" id="t_color" value="#333333"></div>
<div><label>BG</label><input type="color" id="b_color" value="#ffffff"></div>
<div><label>Syntax</label><select id="h_theme">
{% for s in styles %}<option value="{{s}}" {% if s == 'monokai' %}selected{% endif %}>{{s}}</option>{% endfor %}
</select></div>
</div>
<textarea id="c_css" rows="2" style="margin-top:15px;" placeholder="Custom CSS..."></textarea>
</fieldset>
<button onclick="process('preview')" class="btn-primary" id="gen-btn" style="width:100%; height:50px; font-size:18px;">GENERATE PREVIEW</button>
<div id="preview-area" style="display:none;">
<div class="preview-section">
<div class="preview-box">
<div class="preview-header">
HTML Preview <button class="btn-download" onclick="process('download', 'html')">Download</button>
</div>
<div id="html-prev" class="preview-content"></div>
</div>
<div class="preview-box">
<div class="preview-header">
PNG Preview <button class="btn-download" onclick="process('download', 'png')">Download</button>
</div>
<div id="png-prev" class="preview-content" style="background:#f0f0f0; align-items:center; justify-content:center; text-align:center;"></div>
</div>
</div>
</div>
<script>
async function analyze() {
const btn = document.getElementById('load-btn');
const text = document.getElementById('md-input').value;
if(!text) return alert("Please enter text.");
btn.innerText = "Analyzing...";
const fd = new FormData();
fd.append('markdown_text', text);
try {
const res = await fetch('/parse', {method:'POST', body:fd});
const data = await res.json();
if(data.error) { alert(data.error); return; }
document.getElementById('detected-format').innerText = "Detected: " + data.format;
const list = document.getElementById('comp-list');
list.innerHTML = '';
data.components.forEach(c => {
const safe = btoa(unescape(encodeURIComponent(c.content)));
list.innerHTML += `
<div class="comp-card">
<label style="cursor:pointer; display:block;">
<input type="checkbox" checked class="c-check" data-name="${c.filename}" data-content="${safe}">
<b>${c.filename}</b>
</label>
<textarea readonly>${c.content.substring(0,80)}...</textarea>
</div>`;
});
document.getElementById('comp-section').style.display = 'block';
} catch(e) { alert(e); }
finally { btn.innerText = "Analyze Content"; }
}
async function process(action, type = null) {
const btn = document.getElementById('gen-btn');
const htmlPreviewContent = document.getElementById('html-prev').innerHTML
let md = "";
const checks = document.querySelectorAll('.c-check');
let hasSelection = false;
checks.forEach(c => {
if(c.checked) {
hasSelection = true;
const content = decodeURIComponent(escape(atob(c.dataset.content)));
if(!c.dataset.name.includes("Intro") && !c.dataset.name.includes("Structure")) {
md += "### File: " + c.dataset.name + "\\n";
}
md += content + "\\n\\n";
}
});
if(!hasSelection) md = document.getElementById('md-input').value;
const imgWidth = document.getElementById('img_width').value || 1024;
const previewContent = document.getElementById('html-prev').innerHTML;
const payload = {
markdown_text: md,
current_html: previewContent, // Send the current preview HTML
download: action === 'download',
download_type: type,
styles: {
font_family: document.getElementById('f_family').value,
font_size: document.getElementById('f_size').value,
line_height: document.getElementById('l_height').value,
text_color: document.getElementById('t_color').value,
background_color: document.getElementById('b_color').value,
highlight_theme: document.getElementById('h_theme').value,
custom_css: document.getElementById('c_css').value,
img_width: imgWidth, // Include the custom width
page_padding: 40,
code_padding: 15
}
};
if(action === 'preview') btn.innerText = "Processing...";
try {
const res = await fetch('/convert', {
method:'POST',
headers:{'Content-Type':'application/json'},
body:JSON.stringify(payload)
});
if(action === 'download') {
if(!res.ok) throw new Error("Download failed");
const blob = await res.blob();
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = `export.${type}`;
a.click();
} else {
const data = await res.json();
if(data.error) throw new Error(data.error);
document.getElementById('preview-area').style.display = 'block';
document.getElementById('html-prev').innerHTML = data.preview_html;
const pngContainer = document.getElementById('png-prev');
if(data.preview_png_base64) {
pngContainer.innerHTML = `<img src="data:image/png;base64,${data.preview_png_base64}">`;
} else {
pngContainer.innerHTML = `<p style="color:orange">${data.warning || "Image Error"}</p>`;
}
document.getElementById('preview-area').scrollIntoView({behavior: 'smooth'});
}
} catch(e) { alert("Error: " + e.message); }
finally { if(action === 'preview') btn.innerText = "GENERATE PREVIEW"; }
}
</script>
</body></html>
""", styles=sorted(list(get_all_styles())))
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)