Prathamesh Sarjerao Vaidya
completed the project
3e27995
#!/usr/bin/env python3
import re
import os
import sys
import subprocess
from pathlib import Path
def process_mermaid_diagrams(content, file_dir):
"""Convert mermaid diagrams to images"""
mermaid_pattern = r'```mermaid\n(.*?)\n```'
def replace_mermaid(match):
mermaid_code = match.group(1)
# Create a unique filename for this diagram
diagram_hash = str(abs(hash(mermaid_code)))
mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
# Write mermaid code to file
try:
with open(mermaid_file, 'w', encoding='utf-8') as f:
f.write(mermaid_code)
except Exception as e:
print(f"Error writing mermaid file: {e}")
return f'\n```\n{mermaid_code}\n```\n'
try:
# Method 1: Try with config file (newer versions)
config_file = os.path.join(file_dir, '..', '..', '.github', 'workflows', 'puppeteer-config.json')
if os.path.exists(config_file):
result = subprocess.run([
'mmdc', '-i', mermaid_file, '-o', svg_file,
'--theme', 'default', '--backgroundColor', 'white',
'--configFile', config_file,
'--puppeteerConfig', config_file
], check=True, capture_output=True, text=True, timeout=60)
else:
# Method 2: Try without puppeteer config (fallback)
result = subprocess.run([
'mmdc', '-i', mermaid_file, '-o', svg_file,
'--theme', 'default', '--backgroundColor', 'white',
'--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--single-process"]}'
], check=True, capture_output=True, text=True, timeout=60)
# Convert SVG to PNG for better PDF compatibility
subprocess.run([
'rsvg-convert', '-f', 'png', '-o', png_file,
'--width', '1200', '--height', '800', svg_file
], check=True, capture_output=True, text=True)
# Clean up intermediate files
try:
os.remove(mermaid_file)
if os.path.exists(svg_file):
os.remove(svg_file)
except:
pass
# Return markdown image syntax
return (
f'\n<div class="mermaid-container">\n\n'
f'![Architecture Diagram]({os.path.basename(png_file)})\n\n'
f'</div>\n'
)
except subprocess.CalledProcessError as e:
print(f"Error converting mermaid diagram: {e}")
print(f"Command output: {e.stderr if e.stderr else 'No stderr'}")
# Fallback: Try basic mmdc command without any config
try:
print("Trying basic mmdc command...")
subprocess.run([
'mmdc', '-i', mermaid_file, '-o', svg_file,
'--puppeteerConfig', '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--single-process"]}'
], check=True, capture_output=True, text=True, timeout=60)
# Convert to PNG
subprocess.run([
'rsvg-convert', '-f', 'png', '-o', png_file,
'--width', '1200', '--height', '800', svg_file
], check=True, capture_output=True, text=True)
# Clean up
try:
os.remove(mermaid_file)
if os.path.exists(svg_file):
os.remove(svg_file)
except:
pass
return (
f'\n<div class="mermaid-container">\n\n'
f'![Architecture Diagram]({os.path.basename(png_file)})\n\n'
f'</div>\n'
)
except subprocess.CalledProcessError as e2:
print(f"Basic mmdc also failed: {e2}")
try:
os.remove(mermaid_file)
except:
pass
# Return original mermaid code if all rendering fails
print("All Mermaid rendering methods failed, keeping original code")
return f'\n```mermaid\n{mermaid_code}\n```\n'
except Exception as e:
print(f"Unexpected error with mermaid: {e}")
try:
os.remove(mermaid_file)
except:
pass
return f'\n```mermaid\n{mermaid_code}\n```\n'
return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
def clean_emojis_and_fix_images(content, file_dir):
"""Remove/replace emojis and fix image paths"""
emoji_replacements = {
'🎡': '[Audio]',
'🎬': '[Video]',
'πŸ“': '[Document]',
'πŸ“Š': '[Analytics]',
'🧠': '[AI]',
'πŸŽ₯': '[Media]',
'πŸ“„': '[File]',
'βœ…': '[Success]',
'❌': '[Failed]'
}
for emoji, replacement in emoji_replacements.items():
content = content.replace(emoji, replacement)
# Pattern to match markdown images
img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
def replace_image(match):
alt_text = match.group(1)
img_path = match.group(2)
if not img_path.startswith(('http://', 'https://', '/')):
abs_img_path = os.path.join(file_dir, img_path)
if os.path.exists(abs_img_path):
img_path = os.path.relpath(abs_img_path, file_dir)
return (
f'<img src="{img_path}" alt="{alt_text}" '
f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
)
content = re.sub(img_pattern, replace_image, content)
# Fix existing HTML img tags
content = re.sub(
r'<img\s+([^>]*?)\s*/?>',
lambda m: (
f'<img {m.group(1)} '
f'style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
),
content
)
return content
def main():
if len(sys.argv) != 2:
print("Usage: python preprocess_markdown.py <markdown_file>")
sys.exit(1)
md_file = sys.argv[1]
if not os.path.exists(md_file):
print(f"Error: File {md_file} does not exist")
sys.exit(1)
try:
file_dir = os.path.dirname(os.path.abspath(md_file))
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
print(f"Processing file: {md_file}")
print(f"File directory: {file_dir}")
print(f"Content length: {len(content)} characters")
# Process mermaid diagrams
content = process_mermaid_diagrams(content, file_dir)
print(f"Mermaid processing complete. Content length: {len(content)}")
# Clean emojis and fix image paths
content = clean_emojis_and_fix_images(content, file_dir)
print(f"Image path fixing complete. Content length: {len(content)}")
# Write processed content
processed_file = md_file.replace('.md', '_processed.md')
with open(processed_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Processed file saved as: {processed_file}")
print(processed_file)
except Exception as e:
print(f"Error processing {md_file}: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()