from flask import Flask, request, render_template_string, send_file, jsonify import markdown import imgkit import os import traceback from io import BytesIO import re import base64 from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.styles import get_all_styles, get_style_by_name app = Flask(__name__) TEMP_DIR = "/tmp/markdown_temp" os.makedirs(TEMP_DIR, exist_ok=True) # --- 1. PARSING LOGIC --- def parse_repo2markdown(text): components = [] # Extract File Structure struct_match = re.search(r'## File Structure\n([\s\S]*?)(?=\n### File:|\Z)', text) if struct_match: components.append({'type': 'structure', 'filename': 'File Structure', 'content': struct_match.group(1).strip()}) # Extract Individual Files pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\Z)', re.MULTILINE) for match in pattern.finditer(text): filename = match.group(1).strip() content = match.group(2).strip() # FIX: Do not remove the wrapping code blocks. # Keep 'content' as it is so the Markdown renderer sees the backticks. components.append({'type': 'file', 'filename': filename, 'content': content}) return components def parse_standard_readme(text): components = [] parts = re.split(r'^(## .*?)$', text, flags=re.MULTILINE) if parts[0].strip(): components.append({'type': 'intro', 'filename': 'Header/Intro', 'content': parts[0].strip()}) for i in range(1, len(parts), 2): components.append({'type': 'section', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()}) return components def parse_changelog(text): components = [] parts = re.split(r'^(## \[\d+\.\d+\.\d+.*?\].*?)$', text, flags=re.MULTILINE) if parts[0].strip(): components.append({'type': 'intro', 'filename': 'Header', 'content': parts[0].strip()}) for i in range(1, len(parts), 2): components.append({'type': 'version', 'filename': parts[i].replace('##', '').strip(), 'content': parts[i+1].strip()}) return components def parse_agent_action(text): components = [] action_pattern = re.compile(r'^### HF_ACTION: (.*)$', re.MULTILINE) for match in action_pattern.finditer(text): components.append({'type': 'action', 'filename': 'Agent Command', 'content': match.group(1).strip()}) file_pattern = re.compile(r'### File: (.*?)\n([\s\S]*?)(?=\n### File:|\n## File Structure|\n### HF_ACTION:|\Z)', re.MULTILINE) for match in file_pattern.finditer(text): components.append({'type': 'file', 'filename': match.group(1).strip(), 'content': match.group(2).strip()}) return components # --- 2. HTML GENERATION & STYLING --- def build_full_html(markdown_text, styles, for_image=False): wrapper_id = "#output-wrapper" font_family = styles.get('font_family', "sans-serif") # 1. Google Fonts (Only for Web Preview to avoid network errors in image gen) google_font_link = "" if not for_image and "sans-serif" not in font_family and "monospace" not in font_family: clean_font_name = font_family.split(',')[0].strip("'\"") google_font_link = f'' # 2. Syntax Highlighting Logic highlight_theme = styles.get('highlight_theme', 'monokai') # Get the background color from the theme, or default to light/dark gray try: style_obj = get_style_by_name(highlight_theme) bg_color = style_obj.background_color except: bg_color = "#272822" if highlight_theme == 'monokai' else "#f6f8fa" # Generate Pygments CSS definitions pygments_css = "" if highlight_theme != 'none': try: formatter = HtmlFormatter(style=highlight_theme) # We scope this to our wrapper to avoid global pollution pygments_css = formatter.get_style_defs(f'{wrapper_id} .codehilite') except: pygments_css = "" # 3. CSS Construction scoped_css = f""" body {{ background-color: {styles.get('background_color', '#ffffff')}; margin: 0; padding: 0; }} {wrapper_id} {{ font-family: {font_family}; font-size: {styles.get('font_size', '16')}px; line-height: {styles.get('line_height', '1.6')}; color: {styles.get('text_color', '#333')}; background-color: {styles.get('background_color', '#fff')}; padding: {styles.get('page_padding', '40')}px; }} /* Table Styling */ {wrapper_id} table {{ border-collapse: collapse; width: 100%; margin-bottom: 1em; }} {wrapper_id} th, {wrapper_id} td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }} {wrapper_id} th {{ background-color: #f8f8f8; font-weight: bold; }} /* Headers */ {wrapper_id} h1, {wrapper_id} h2, {wrapper_id} h3 {{ border-bottom: 1px solid #eee; padding-bottom: 5px; margin-top: 1.5em; }} /* --- CODE BOX STYLING (FIXED) --- */ /* Apply box styling to ALL pre tags (plain or highlighted) */ {wrapper_id} pre {{ background-color: {bg_color}; /* Theme bg or default */ color: {styles.get('text_color', '#333')}; padding: {styles.get('code_padding', '15')}px; border-radius: 6px; border: 1px solid rgba(0,0,0,0.1); overflow-x: auto; margin: 1em 0; line-height: 1.45; }} /* Ensure code inside pre doesn't double-pad */ {wrapper_id} pre code {{ background-color: transparent; padding: 0; border: none; font-family: 'Fira Code', 'Consolas', 'Monaco', monospace; font-size: 0.9em; }} /* Specific override for Pygments container if it exists */ {wrapper_id} .codehilite {{ background-color: {bg_color}; border-radius: 6px; margin: 1em 0; }} {wrapper_id} .codehilite pre {{ margin: 0; border: none; /* Let container handle border if needed */ }} /* Syntax Highlighting Colors */ {pygments_css} /* Custom User Overrides */ {styles.get('custom_css', '')} """ # 4. Render Markdown # We use 'fenced_code' to catch ``` blocks and 'codehilite' to color them. # 'guess_lang=False' prevents Pygments from crashing on unknown langs. html_content = markdown.markdown( markdown_text, extensions=['fenced_code', 'tables', 'codehilite', 'nl2br'], extension_configs={ 'codehilite': { 'css_class': 'codehilite', 'guess_lang': False, 'noclasses': False } } ) return f""" {google_font_link}
{html_content}
""" # --- 3. FLASK ROUTES --- @app.route('/parse', methods=['POST']) def parse_endpoint(): text = request.form.get('markdown_text', '') try: if "## File Structure" in text and "### File:" in text: format_name, components = "Repo2Markdown", parse_repo2markdown(text) elif re.search(r'^### HF_ACTION:', text, flags=re.MULTILINE): format_name, components = "Agent Action", parse_agent_action(text) elif re.search(r'^## \[\d+\.\d+\.\d+.*?\].*?$', text, flags=re.MULTILINE): format_name, components = "Changelog", parse_changelog(text) elif re.search(r'^# ', text, flags=re.MULTILINE) and re.search(r'^## ', text, flags=re.MULTILINE): format_name, components = "Standard README", parse_standard_readme(text) else: format_name, components = "Plain Markdown", [{'type': 'text', 'filename': 'Full Content', 'content': text}] return jsonify({'format': format_name, 'components': components}) except Exception as e: traceback.print_exc() return jsonify({'error': str(e)}), 500 @app.route('/convert', methods=['POST']) def convert_endpoint(): data = request.json try: styles = data.get('styles', {}) # Dynamic width from user input, defaulting to 1024 target_width = styles.get('img_width', 1024) # 1. Determine the HTML for the image if data.get('current_html') and data.get('current_html').strip(): # Wrap the preview content to ensure styles are applied image_html = f"{data['current_html']}" else: image_html = build_full_html(data.get('markdown_text', ''), styles, for_image=True) preview_html = build_full_html(data.get('markdown_text', ''), styles, for_image=False) # 2. Update Wkhtmltopdf Options options = { "quiet": "", "encoding": "UTF-8", "width": target_width, # APPLY THE DYNAMIC WIDTH HERE "disable-smart-width": "", "enable-local-file-access": "", "disable-javascript": "", "load-error-handling": "ignore", "load-media-error-handling": "ignore" } if data.get('download', False): if data.get('download_type') == 'html': return send_file(BytesIO(preview_html.encode("utf-8")), as_attachment=True, download_name="output.html", mimetype="text/html") png_bytes = imgkit.from_string(image_html, False, options=options) return send_file(BytesIO(png_bytes), as_attachment=True, download_name="output.png", mimetype="image/png") try: png_bytes = imgkit.from_string(image_html, False, options=options) b64_img = base64.b64encode(png_bytes).decode('utf-8') except Exception: return jsonify({'preview_html': preview_html, 'preview_png_base64': None, 'warning': 'Image generation failed.'}) return jsonify({'preview_html': preview_html, 'preview_png_base64': b64_img}) except Exception as e: traceback.print_exc() return jsonify({'error': str(e)}), 500 @app.route('/') def index(): return render_template_string(""" Intelligent Markdown Converter

Markdown Tool

1. Input
3. Styles
""", styles=sorted(list(get_all_styles()))) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)