Spaces:

broadfield-dev
/

markdown2png

Paused

App Files Files Community

markdown2png / app.py

broadfield-dev

Update app.py

58d9915 verified about 1 month ago

raw

history blame contribute delete

22.5 kB

	from flask import Flask, request, render_template_string, send_file, jsonify
	import markdown
	import imgkit
	import os
	import traceback
	from io import BytesIO
	import re
	import base64
	from pygments import highlight
	from pygments.lexers import get_lexer_by_name
	from pygments.formatters import HtmlFormatter
	from pygments.styles import get_all_styles, get_style_by_name

	app = Flask(__name__)
	TEMP_DIR = "/tmp/markdown_temp"
	os.makedirs(TEMP_DIR, exist_ok=True)

	# --- 1. PARSING LOGIC ---

	def parse_repo2markdown(text):
	components = []
	# Extract File Structure
	struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:\|\Z)', text)
	if struct_match:
	components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()})

	# Extract Individual Files
	pattern = re.compile(r'### File: (.?)\n([\s\S]?)(?=\n### File:\|\Z)', re.MULTILINE)
	for match in pattern.finditer(text):
	filename = match.group(1).strip()
	content = match.group(2).strip()

	# FIX: Do not remove the wrapping code blocks.
	# Keep 'content' as it is so the Markdown renderer sees the backticks.
	components.append({'type': 'file', 'filename': filename, 'content': content})
	return components

	def parse_standard_readme(text):
	components = []
	parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE)
	if parts[0].strip():
	components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()})
	for i in range(1, len(parts), 2):
	components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
	return components

	def parse_changelog(text):
	components = []
	parts = re.split(r'^(## \[\d+\.\d+\.\d+.?\].?)$', text, flags=re.MULTILINE)
	if parts[0].strip():
	components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()})
	for i in range(1, len(parts), 2):
	components.append({'type': 'version', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()})
	return components

	def parse_agent_action(text):
	components = []
	action_pattern = re.compile(r'^### HF_ACTION: (.*)$', re.MULTILINE)
	for match in action_pattern.finditer(text):
	components.append({'type': 'action', 'filename': 'Agent Command', 'content': match.group(1).strip()})
	file_pattern = re.compile(r'### File: (.?)\n([\s\S]?)(?=\n### File:\|\n## File Structure\|\n### HF_ACTION:\|\Z)', re.MULTILINE)
	for match in file_pattern.finditer(text):
	components.append({'type': 'file', 'filename': match.group(1).strip(), 'content': match.group(2).strip()})
	return components

	# --- 2. HTML GENERATION & STYLING ---

	def build_full_html(markdown_text, styles, for_image=False):
	wrapper_id = "#output-wrapper"
	font_family = styles.get('font_family', "sans-serif")

	# 1. Google Fonts (Only for Web Preview to avoid network errors in image gen)
	google_font_link = ""
	if not for_image and "sans-serif" not in font_family and "monospace" not in font_family:
	clean_font_name = font_family.split(',')[0].strip("'\"")
	google_font_link = f'<link href="https://fonts.googleapis.com/css2?family={clean_font_name.replace(" ", "+")}:wght@400;700&display=swap" rel="stylesheet">'

	# 2. Syntax Highlighting Logic
	highlight_theme = styles.get('highlight_theme', 'monokai')

	# Get the background color from the theme, or default to light/dark gray
	try:
	style_obj = get_style_by_name(highlight_theme)
	bg_color = style_obj.background_color
	except:
	bg_color = "#272822" if highlight_theme == 'monokai' else "#f6f8fa"

	# Generate Pygments CSS definitions
	pygments_css = ""
	if highlight_theme != 'none':
	try:
	formatter = HtmlFormatter(style=highlight_theme)
	# We scope this to our wrapper to avoid global pollution
	pygments_css = formatter.get_style_defs(f'{wrapper_id} .codehilite')
	except:
	pygments_css = ""

	# 3. CSS Construction
	scoped_css = f"""
	body {{ background-color: {styles.get('background_color', '#ffffff')}; margin: 0; padding: 0; }}

	{wrapper_id} {{
	font-family: {font_family};
	font-size: {styles.get('font_size', '16')}px;
	line-height: {styles.get('line_height', '1.6')};
	color: {styles.get('text_color', '#333')};
	background-color: {styles.get('background_color', '#fff')};
	padding: {styles.get('page_padding', '40')}px;
	}}

	/* Table Styling */
	{wrapper_id} table {{ border-collapse: collapse; width: 100%; margin-bottom: 1em; }}
	{wrapper_id} th, {wrapper_id} td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
	{wrapper_id} th {{ background-color: #f8f8f8; font-weight: bold; }}

	/* Headers */
	{wrapper_id} h1, {wrapper_id} h2, {wrapper_id} h3 {{ border-bottom: 1px solid #eee; padding-bottom: 5px; margin-top: 1.5em; }}
	/* --- CODE BOX STYLING (FIXED) --- */

	/* Apply box styling to ALL pre tags (plain or highlighted) */
	{wrapper_id} pre {{
	background-color: {bg_color}; /* Theme bg or default */
	color: {styles.get('text_color', '#333')};
	padding: {styles.get('code_padding', '15')}px;
	border-radius: 6px;
	border: 1px solid rgba(0,0,0,0.1);
	overflow-x: auto;
	margin: 1em 0;
	line-height: 1.45;
	}}
	/* Ensure code inside pre doesn't double-pad */
	{wrapper_id} pre code {{
	background-color: transparent;
	padding: 0;
	border: none;
	font-family: 'Fira Code', 'Consolas', 'Monaco', monospace;
	font-size: 0.9em;
	}}
	/* Specific override for Pygments container if it exists */
	{wrapper_id} .codehilite {{
	background-color: {bg_color};
	border-radius: 6px;
	margin: 1em 0;
	}}

	{wrapper_id} .codehilite pre {{
	margin: 0;
	border: none; /* Let container handle border if needed */
	}}
	/* Syntax Highlighting Colors */
	{pygments_css}

	/* Custom User Overrides */
	{styles.get('custom_css', '')}
	"""

	# 4. Render Markdown
	# We use 'fenced_code' to catch ``` blocks and 'codehilite' to color them.
	# 'guess_lang=False' prevents Pygments from crashing on unknown langs.
	html_content = markdown.markdown(
	markdown_text,
	extensions=['fenced_code', 'tables', 'codehilite', 'nl2br'],
	extension_configs={
	'codehilite': {
	'css_class': 'codehilite',
	'guess_lang': False,
	'noclasses': False
	}
	}
	)

	return f"""
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset='UTF-8'>
	{google_font_link}
	<style>{scoped_css}</style>
	</head>
	<body>
	<div id='output-wrapper'>{html_content}</div>
	</body>
	</html>
	"""

	# --- 3. FLASK ROUTES ---

	@app.route('/parse', methods=['POST'])
	def parse_endpoint():
	text = request.form.get('markdown_text', '')
	try:
	if "## File Structure" in text and "### File:" in text:
	format_name, components = "Repo2Markdown", parse_repo2markdown(text)
	elif re.search(r'^### HF_ACTION:', text, flags=re.MULTILINE):
	format_name, components = "Agent Action", parse_agent_action(text)
	elif re.search(r'^## \[\d+\.\d+\.\d+.?\].?$', text, flags=re.MULTILINE):
	format_name, components = "Changelog", parse_changelog(text)
	elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE):
	format_name, components = "Standard README", parse_standard_readme(text)
	else:
	format_name, components = "Plain Markdown", [{'type': 'text', 'filename': 'Full Content', 'content': text}]
	return jsonify({'format': format_name, 'components': components})
	except Exception as e:
	traceback.print_exc()
	return jsonify({'error': str(e)}), 500

	@app.route('/convert', methods=['POST'])
	def convert_endpoint():
	data = request.json
	try:
	styles = data.get('styles', {})
	# Dynamic width from user input, defaulting to 1024
	target_width = styles.get('img_width', 1024)

	# 1. Determine the HTML for the image
	if data.get('current_html') and data.get('current_html').strip():
	# Wrap the preview content to ensure styles are applied
	image_html = f"<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>{data['current_html']}</body></html>"
	else:
	image_html = build_full_html(data.get('markdown_text', ''), styles, for_image=True)

	preview_html = build_full_html(data.get('markdown_text', ''), styles, for_image=False)

	# 2. Update Wkhtmltopdf Options
	options = {
	"quiet": "",
	"encoding": "UTF-8",
	"width": target_width, # APPLY THE DYNAMIC WIDTH HERE
	"disable-smart-width": "",
	"enable-local-file-access": "",
	"disable-javascript": "",
	"load-error-handling": "ignore",
	"load-media-error-handling": "ignore"
	}

	if data.get('download', False):
	if data.get('download_type') == 'html':
	return send_file(BytesIO(preview_html.encode("utf-8")), as_attachment=True, download_name="output.html", mimetype="text/html")

	png_bytes = imgkit.from_string(image_html, False, options=options)
	return send_file(BytesIO(png_bytes), as_attachment=True, download_name="output.png", mimetype="image/png")

	try:
	png_bytes = imgkit.from_string(image_html, False, options=options)
	b64_img = base64.b64encode(png_bytes).decode('utf-8')
	except Exception:
	return jsonify({'preview_html': preview_html, 'preview_png_base64': None, 'warning': 'Image generation failed.'})

	return jsonify({'preview_html': preview_html, 'preview_png_base64': b64_img})

	except Exception as e:
	traceback.print_exc()
	return jsonify({'error': str(e)}), 500

	@app.route('/')
	def index():
	return render_template_string("""
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8"><title>Intelligent Markdown Converter</title>
	<style>
	:root { --bg: #f4f7f6; --text: #333; --card: #fff; --border: #ddd; --primary: #5a32a3; }
	body.dark-mode { --bg: #1a1a1a; --text: #eee; --card: #252525; --border: #444; }
	body { font-family: 'Inter', sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; background: var(--bg); color: var(--text); transition: background 0.3s; }
	fieldset { border: 1px solid var(--border); background: var(--card); padding: 20px; margin-bottom: 25px; border-radius: 12px; }
	legend { font-weight: bold; padding: 0 10px; color: var(--primary); }
	textarea { width: 100%; border-radius: 6px; padding: 12px; border: 1px solid var(--border); background: var(--card); color: var(--text); font-family: 'Fira Code', monospace; box-sizing: border-box; }
	.format-banner { background: var(--primary); color: white; padding: 6px 16px; border-radius: 20px; font-size: 13px; display: inline-block; margin-bottom: 15px; font-weight: bold; }
	.style-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 15px; }
	.comp-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 15px; }
	.comp-card { background: var(--card); border: 1px solid var(--border); padding: 12px; border-radius: 8px; }
	.comp-card textarea { height: 60px; font-size: 11px; margin-top: 8px; opacity: 0.8; pointer-events: none; resize: none; }
	.action-bar { display: flex; gap: 15px; margin-top: 20px; }
	button { padding: 12px 24px; cursor: pointer; border: none; border-radius: 6px; font-weight: bold; }
	.btn-primary { background: var(--primary); color: #fff; }
	.btn-secondary { background: #333; color: #fff; border: 1px solid #555; }
	.btn-download { background: #28a745; color: white; font-size: 12px; padding: 5px 10px; margin-left: 10px;}
	.preview-section { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 30px; }
	.preview-box { background: #fff; border: 1px solid #ddd; border-radius: 8px; height: 600px; display: flex; flex-direction: column; overflow: hidden; }
	.preview-header { background: #eee; padding: 10px; display: flex; justify-content: space-between; align-items: center; color: #333; font-weight: bold; border-bottom: 1px solid #ddd; }
	.preview-content { flex: 1; overflow: auto; padding: 15px; }
	img { max-width: 100%; height: auto; border: 1px solid #eee; }
	</style>
	</head>
	<body>
	<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:20px;">
	<h1>Markdown Tool</h1>
	<button onclick="document.body.classList.toggle('dark-mode')" class="btn-secondary">🌓 Dark Mode</button>
	</div>
	<fieldset>
	<legend>1. Input</legend>
	<textarea id="md-input" rows="8"></textarea>
	<div class="action-bar"><button onclick="analyze()" class="btn-primary" id="load-btn">Analyze Content</button></div>
	</fieldset>
	<div id="comp-section" style="display:none;">
	<div id="detected-format" class="format-banner"></div>
	<fieldset>
	<legend>2. Components</legend>
	<div class="comp-grid" id="comp-list"></div>
	</fieldset>
	</div>
	<fieldset>
	<legend>3. Styles</legend>
	<div class="style-grid">
	<div><label>Font</label><select id="f_family">
	<option value="sans-serif">Sans-Serif</option>
	<option value="'Inter', sans-serif">Inter</option>
	<option value="monospace">Monospace</option>
	<option value="serif">Serif</option>
	</select></div>
	<div><label>Size</label><input type="number" id="f_size" value="16"></div>
	<div><label>Height</label><input type="number" id="l_height" value="1.6" step="0.1"></div>
	<div><label>Image Width (px)</label><input type="number" id="img_width" value="1024" min="400" max="3000"></div>
	<div><label>Text</label><input type="color" id="t_color" value="#333333"></div>
	<div><label>BG</label><input type="color" id="b_color" value="#ffffff"></div>
	<div><label>Syntax</label><select id="h_theme">
	{% for s in styles %}<option value="{{s}}" {% if s == 'monokai' %}selected{% endif %}>{{s}}</option>{% endfor %}
	</select></div>
	</div>
	<textarea id="c_css" rows="2" style="margin-top:15px;" placeholder="Custom CSS..."></textarea>
	</fieldset>
	<button onclick="process('preview')" class="btn-primary" id="gen-btn" style="width:100%; height:50px; font-size:18px;">GENERATE PREVIEW</button>
	<div id="preview-area" style="display:none;">
	<div class="preview-section">
	<div class="preview-box">
	<div class="preview-header">
	HTML Preview <button class="btn-download" onclick="process('download', 'html')">Download</button>
	</div>
	<div id="html-prev" class="preview-content"></div>
	</div>
	<div class="preview-box">
	<div class="preview-header">
	PNG Preview <button class="btn-download" onclick="process('download', 'png')">Download</button>
	</div>
	<div id="png-prev" class="preview-content" style="background:#f0f0f0; align-items:center; justify-content:center; text-align:center;"></div>
	</div>
	</div>
	</div>
	<script>
	async function analyze() {
	const btn = document.getElementById('load-btn');
	const text = document.getElementById('md-input').value;
	if(!text) return alert("Please enter text.");

	btn.innerText = "Analyzing...";
	const fd = new FormData();
	fd.append('markdown_text', text);

	try {
	const res = await fetch('/parse', {method:'POST', body:fd});
	const data = await res.json();
	if(data.error) { alert(data.error); return; }
	document.getElementById('detected-format').innerText = "Detected: " + data.format;
	const list = document.getElementById('comp-list');
	list.innerHTML = '';
	data.components.forEach(c => {
	const safe = btoa(unescape(encodeURIComponent(c.content)));
	list.innerHTML += `
	<div class="comp-card">
	<label style="cursor:pointer; display:block;">
	<input type="checkbox" checked class="c-check" data-name="${c.filename}" data-content="${safe}">
	<b>${c.filename}</b>
	</label>
	<textarea readonly>${c.content.substring(0,80)}...</textarea>
	</div>`;
	});
	document.getElementById('comp-section').style.display = 'block';
	} catch(e) { alert(e); }
	finally { btn.innerText = "Analyze Content"; }
	}
	async function process(action, type = null) {
	const btn = document.getElementById('gen-btn');
	const htmlPreviewContent = document.getElementById('html-prev').innerHTML
	let md = "";
	const checks = document.querySelectorAll('.c-check');
	let hasSelection = false;

	checks.forEach(c => {
	if(c.checked) {
	hasSelection = true;
	const content = decodeURIComponent(escape(atob(c.dataset.content)));
	if(!c.dataset.name.includes("Intro") && !c.dataset.name.includes("Structure")) {
	md += "### File: " + c.dataset.name + "\\n";
	}
	md += content + "\\n\\n";
	}
	});

	if(!hasSelection) md = document.getElementById('md-input').value;
	const imgWidth = document.getElementById('img_width').value \|\| 1024;
	const previewContent = document.getElementById('html-prev').innerHTML;

	const payload = {
	markdown_text: md,
	current_html: previewContent, // Send the current preview HTML
	download: action === 'download',
	download_type: type,
	styles: {
	font_family: document.getElementById('f_family').value,
	font_size: document.getElementById('f_size').value,
	line_height: document.getElementById('l_height').value,
	text_color: document.getElementById('t_color').value,
	background_color: document.getElementById('b_color').value,
	highlight_theme: document.getElementById('h_theme').value,
	custom_css: document.getElementById('c_css').value,
	img_width: imgWidth, // Include the custom width
	page_padding: 40,
	code_padding: 15
	}
	};
	if(action === 'preview') btn.innerText = "Processing...";

	try {
	const res = await fetch('/convert', {
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify(payload)
	});

	if(action === 'download') {
	if(!res.ok) throw new Error("Download failed");
	const blob = await res.blob();
	const a = document.createElement('a');
	a.href = URL.createObjectURL(blob);
	a.download = `export.${type}`;
	a.click();
	} else {
	const data = await res.json();
	if(data.error) throw new Error(data.error);
	document.getElementById('preview-area').style.display = 'block';
	document.getElementById('html-prev').innerHTML = data.preview_html;
	const pngContainer = document.getElementById('png-prev');
	if(data.preview_png_base64) {
	pngContainer.innerHTML = `<img src="data:image/png;base64,${data.preview_png_base64}">`;
	} else {
	pngContainer.innerHTML = `<p style="color:orange">${data.warning \|\| "Image Error"}</p>`;
	}
	document.getElementById('preview-area').scrollIntoView({behavior: 'smooth'});
	}
	} catch(e) { alert("Error: " + e.message); }
	finally { if(action === 'preview') btn.innerText = "GENERATE PREVIEW"; }
	}
	</script>
	</body></html>
	""", styles=sorted(list(get_all_styles())))

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)