| | #!/usr/bin/env node |
| |
|
| | import { readFileSync, writeFileSync, existsSync } from 'fs'; |
| | import { join, dirname, basename, extname } from 'path'; |
| | import { fileURLToPath } from 'url'; |
| | import { extractAndGenerateFrontmatter } from './metadata-extractor.mjs'; |
| |
|
| | const __filename = fileURLToPath(import.meta.url); |
| | const __dirname = dirname(__filename); |
| |
|
| | |
| | const DEFAULT_INPUT = join(__dirname, 'output', 'main.md'); |
| | const DEFAULT_OUTPUT = join(__dirname, 'output', 'main.mdx'); |
| |
|
| | function parseArgs() { |
| | const args = process.argv.slice(2); |
| | const config = { |
| | input: DEFAULT_INPUT, |
| | output: DEFAULT_OUTPUT, |
| | }; |
| |
|
| | for (const arg of args) { |
| | if (arg.startsWith('--input=')) { |
| | config.input = arg.substring('--input='.length); |
| | } else if (arg.startsWith('--output=')) { |
| | config.output = arg.substring('--output='.length); |
| | } else if (arg === '--help' || arg === '-h') { |
| | console.log(` |
| | π Markdown to MDX Converter |
| | |
| | Usage: |
| | node mdx-converter.mjs [options] |
| | |
| | Options: |
| | --input=PATH Input Markdown file (default: ${DEFAULT_INPUT}) |
| | --output=PATH Output MDX file (default: ${DEFAULT_OUTPUT}) |
| | --help, -h Show this help |
| | |
| | Examples: |
| | # Basic conversion |
| | node mdx-converter.mjs |
| | |
| | # Custom paths |
| | node mdx-converter.mjs --input=article.md --output=article.mdx |
| | `); |
| | process.exit(0); |
| | } else if (!config.input) { |
| | config.input = arg; |
| | } else if (!config.output) { |
| | config.output = arg; |
| | } |
| | } |
| | return config; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | const usedComponents = new Set(); |
| |
|
| | |
| | |
| | |
| | const imageImports = new Map(); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function generateImageVarName(src) { |
| | |
| | const filename = src.split('/').pop().replace(/\.[^.]+$/, ''); |
| | return filename.replace(/[^a-zA-Z0-9]/g, '_').replace(/^[0-9]/, 'img_$&'); |
| | } |
| |
|
| | function addComponentImports(content) { |
| | console.log(' π¦ Adding component and image imports...'); |
| |
|
| | let imports = []; |
| |
|
| | |
| | if (usedComponents.size > 0) { |
| | const componentImports = Array.from(usedComponents) |
| | .map(component => `import ${component} from '../components/${component}.astro';`); |
| | imports.push(...componentImports); |
| | console.log(` β
Importing components: ${Array.from(usedComponents).join(', ')}`); |
| | } |
| |
|
| | |
| | if (imageImports.size > 0) { |
| | const imageImportStatements = Array.from(imageImports.entries()) |
| | .map(([src, varName]) => `import ${varName} from '${src}';`); |
| | imports.push(...imageImportStatements); |
| | console.log(` β
Importing ${imageImports.size} image(s)`); |
| | } |
| |
|
| | if (imports.length === 0) { |
| | console.log(' βΉοΈ No imports needed'); |
| | return content; |
| | } |
| |
|
| | const importBlock = imports.join('\n'); |
| |
|
| | |
| | const frontmatterEnd = content.indexOf('---', 3) + 3; |
| | if (frontmatterEnd > 2) { |
| | return content.slice(0, frontmatterEnd) + '\n\n' + importBlock + '\n' + content.slice(frontmatterEnd); |
| | } else { |
| | |
| | return importBlock + '\n\n' + content; |
| | } |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | function convertSubfiguresToMultiImage(content) { |
| | console.log(' πΌοΈβ¨ Converting subfigures to MultiImage components...'); |
| |
|
| | let convertedCount = 0; |
| |
|
| | |
| | |
| | const subfigureGroupPattern = /<figure>\s*((?:<figure>[\s\S]*?<\/figure>\s*){2,})<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g; |
| |
|
| | const convertedContent = content.replace(subfigureGroupPattern, (match, figuresMatch, globalCaption) => { |
| | convertedCount++; |
| |
|
| | |
| | |
| | const individualFigurePattern = /<figure>\s*<img src="([^"]*)"[^>]*\/>\s*<p><span id="([^"]*)"[^&]*><\/span><\/p>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/g; |
| |
|
| | const images = []; |
| | let figureMatch; |
| |
|
| | while ((figureMatch = individualFigurePattern.exec(figuresMatch)) !== null) { |
| | const [, src, id, caption] = figureMatch; |
| |
|
| | |
| | const cleanSrc = src.replace(/.*\/output\/assets\//, './assets/') |
| | .replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/'); |
| |
|
| | |
| | const cleanCaption = caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | const altText = cleanCaption.length > 100 |
| | ? cleanCaption.substring(0, 100) + '...' |
| | : cleanCaption; |
| |
|
| | |
| | const varName = generateImageVarName(cleanSrc); |
| | imageImports.set(cleanSrc, varName); |
| |
|
| | images.push({ |
| | src: varName, |
| | alt: altText, |
| | caption: cleanCaption, |
| | id: id |
| | }); |
| | } |
| |
|
| | |
| | const cleanGlobalCaption = globalCaption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | usedComponents.add('MultiImage'); |
| |
|
| | |
| | let layout = 'auto'; |
| | if (images.length === 2) layout = '2-column'; |
| | else if (images.length === 3) layout = '3-column'; |
| | else if (images.length === 4) layout = '4-column'; |
| |
|
| | |
| | const imagesJson = images.map(img => |
| | ` {\n src: ${img.src},\n alt: "${img.alt}",\n caption: "${img.caption}",\n id: "${img.id}"\n }` |
| | ).join(',\n'); |
| |
|
| | return `<MultiImage |
| | images={[ |
| | ${imagesJson} |
| | ]} |
| | layout="${layout}" |
| | zoomable |
| | downloadable |
| | caption="${cleanGlobalCaption}" |
| | />`; |
| | }); |
| |
|
| | if (convertedCount > 0) { |
| | console.log(` β
Converted ${convertedCount} subfigure group(s) to MultiImage component(s)`); |
| | } else { |
| | console.log(' βΉοΈ No subfigure groups found'); |
| | } |
| |
|
| | return convertedContent; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function createImageComponent(src, alt = '', id = '', caption = '', width = '') { |
| | const varName = generateImageVarName(src); |
| | imageImports.set(src, varName); |
| | usedComponents.add('Image'); |
| |
|
| | const props = []; |
| | props.push(`src={${varName}}`); |
| | props.push('zoomable'); |
| | props.push('downloadable'); |
| | if (id) props.push(`id="${id}"`); |
| | if (width) props.push(`width={${width}}`); |
| | if (alt) props.push(`alt="${alt}"`); |
| | if (caption) props.push(`caption={'${caption}'}`); |
| |
|
| | return `<Image\n ${props.join('\n ')}\n/>`; |
| | } |
| |
|
| | function transformImages(content) { |
| | console.log(' πΌοΈ Transforming images to Image components with imports...'); |
| |
|
| | let hasImages = false; |
| |
|
| | |
| | const cleanSrcPath = (src) => { |
| | return src.replace(/.*\/output\/assets\//, './assets/') |
| | .replace(/\/Users\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/[^\/]+\/app\/scripts\/latex-to-markdown\/output\/assets\//, './assets/'); |
| | }; |
| |
|
| | |
| | const cleanCaption = (caption) => { |
| | return caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| | }; |
| |
|
| | |
| | const cleanAltText = (alt, maxLength = 100) => { |
| | const cleaned = alt |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .trim(); |
| |
|
| | return cleaned.length > maxLength |
| | ? cleaned.substring(0, maxLength) + '...' |
| | : cleaned; |
| | }; |
| |
|
| | |
| | content = content.replace( |
| | /<figure id="([^"]*)">\s*<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*\/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs, |
| | (match, id, src, style, caption) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | const cleanCap = cleanCaption(caption); |
| | const altText = cleanAltText(cleanCap); |
| | hasImages = true; |
| |
|
| | return createImageComponent(cleanSrc, altText, id, cleanCap); |
| | } |
| | ); |
| |
|
| | |
| | content = content.replace( |
| | /<img src="([^"]*)"(?:\s+style="([^"]*)")?\s*(?:alt="([^"]*)")?\s*\/>/g, |
| | (match, src, style, alt) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | const cleanAlt = cleanAltText(alt || 'Figure'); |
| | hasImages = true; |
| |
|
| | return createImageComponent(cleanSrc, cleanAlt); |
| | } |
| | ); |
| |
|
| | |
| | content = content.replace( |
| | /<div class="wrapfigure">\s*r[\d.]+\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>/gs, |
| | (match, src) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | hasImages = true; |
| |
|
| | return createImageComponent(cleanSrc, 'Figure'); |
| | } |
| | ); |
| |
|
| | |
| | content = content.replace( |
| | /<figure id="([^"]*)">\s*<img src="([^"]*)" \/>\s*<figcaption>\s*(.*?)\s*<\/figcaption>\s*<\/figure>/gs, |
| | (match, id, src, caption) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | const cleanCap = cleanCaption(caption); |
| | const altText = cleanAltText(cleanCap); |
| | hasImages = true; |
| |
|
| | return createImageComponent(cleanSrc, altText, id, cleanCap); |
| | } |
| | ); |
| |
|
| | |
| | content = content.replace( |
| | /<figure id="([^"]*)">\s*<div class="minipage">\s*<img src="([^"]*)"[^>]*\/>\s*<\/div>\s*<figcaption[^>]*>(.*?)<\/figcaption>\s*<\/figure>/gs, |
| | (match, id, src, caption) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | const cleanCap = cleanCaption(caption); |
| | const altText = cleanAltText(cleanCap); |
| | hasImages = true; |
| |
|
| | return createImageComponent(cleanSrc, altText, id, cleanCap); |
| | } |
| | ); |
| |
|
| |
|
| | |
| | content = content.replace( |
| | /!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g, |
| | (match, alt, src, attributes) => { |
| | const cleanSrc = cleanSrcPath(src); |
| | const cleanAlt = cleanAltText(alt || 'Figure'); |
| | hasImages = true; |
| |
|
| | let id = ''; |
| | if (attributes) { |
| | const idMatch = attributes.match(/#([\w-]+)/); |
| | if (idMatch) id = idMatch[1]; |
| | } |
| |
|
| | return createImageComponent(cleanSrc, cleanAlt, id); |
| | } |
| | ); |
| |
|
| | if (hasImages) { |
| | console.log(' β
Image components with imports will be created'); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function transformImageFigures(content) { |
| | console.log(' π§ Transforming figures with Image components and separate figcaptions...'); |
| | |
| | let hasTransformed = false; |
| | |
| | |
| | |
| | content = content.replace( |
| | /<figure>\s*<Image([\s\S]*?)\/>\s*<span[^>]*><\/span>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/gs, |
| | (match, imageProps, caption) => { |
| | hasTransformed = true; |
| | |
| | |
| | const cleanCap = caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | const imageComponent = `<Image${imageProps} caption={'${cleanCap}'}/>`; |
| | return imageComponent; |
| | } |
| | ); |
| | |
| | |
| | content = content.replace( |
| | /<figure>\s*<Image([\s\S]*?)\/>\s*<p><span[^&]*><\/span><\/p>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/gs, |
| | (match, imageProps, caption) => { |
| | hasTransformed = true; |
| | |
| | |
| | const cleanCap = caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | const imageComponent = `<Image${imageProps} caption={'${cleanCap}'}/>`; |
| | return imageComponent; |
| | } |
| | ); |
| | |
| | |
| | content = content.replace( |
| | /<figure>\s*<div class="minipage">\s*<Image([\s\S]*?)\/>\s*<span[^>]*><\/span>\s*<\/div>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/gs, |
| | (match, imageProps, caption) => { |
| | hasTransformed = true; |
| | |
| | |
| | const cleanCap = caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | const imageComponent = `<Image${imageProps} caption={'${cleanCap}'}/>`; |
| | return imageComponent; |
| | } |
| | ); |
| | |
| | |
| | content = content.replace( |
| | /<figure>\s*<div class="minipage">\s*<Image([\s\S]*?)\/>\s*<p><span[^&]*><\/span><\/p>\s*<\/div>\s*<figcaption>([\s\S]*?)<\/figcaption>\s*<\/figure>/gs, |
| | (match, imageProps, caption) => { |
| | hasTransformed = true; |
| | |
| | |
| | const cleanCap = caption |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\n/g, ' ') |
| | .replace(/\r/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .replace(/'/g, "\\'") |
| | .trim(); |
| |
|
| | |
| | const imageComponent = `<Image${imageProps} caption={'${cleanCap}'}/>`; |
| | return imageComponent; |
| | } |
| | ); |
| | |
| | if (hasTransformed) { |
| | console.log(' β
Transformed figures with Image components to use caption props'); |
| | } else { |
| | console.log(' βΉοΈ No figures with Image components and separate figcaptions found'); |
| | } |
| | |
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function transformStyledSpans(content) { |
| | console.log(' π¨ Transforming styled spans...'); |
| |
|
| | |
| | content = content.replace( |
| | /<span style="color: ([^"]+)">(.*?)<\/span>/g, |
| | (match, color, text) => { |
| | |
| | const colorMap = { |
| | 'hf2': 'text-hf-secondary', |
| | 'hf1': 'text-hf-primary' |
| | }; |
| |
|
| | const className = colorMap[color] || `text-${color}`; |
| | return `<span class="${className}">${text}</span>`; |
| | } |
| | ); |
| |
|
| | |
| | content = content.replace( |
| | /\[([^\]]+)\]\{style="color: ([^"]+)"\}/g, |
| | (match, text, color) => { |
| | |
| | const colorMap = { |
| | 'hf2': 'text-hf-secondary', |
| | 'hf1': 'text-hf-primary' |
| | }; |
| |
|
| | const className = colorMap[color] || `text-${color}`; |
| | return `<span class="${className}">${text}</span>`; |
| | } |
| | ); |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function transformEpigraphs(content) { |
| | console.log(' π¬ Transforming epigraphs to Quote components...'); |
| |
|
| | let epigraphsConverted = 0; |
| |
|
| | |
| | |
| | content = content.replace( |
| | /<div class="epigraph">([\s\S]*?)<\/div>/g, |
| | (match, content) => { |
| | |
| | const lines = content.trim().split('\n').map(line => line.trim()).filter(line => line); |
| | |
| | let quoteText = ''; |
| | let author = ''; |
| | |
| | |
| | const quoteMatch = content.match(/\*([^*]+)\*/); |
| | if (quoteMatch) { |
| | quoteText = quoteMatch[1].trim(); |
| | } |
| | |
| | |
| | const lastLine = lines[lines.length - 1]; |
| | if (lastLine && !lastLine.includes('*') && !lastLine.includes('[')) { |
| | author = lastLine; |
| | } |
| | |
| | if (quoteText && author) { |
| | epigraphsConverted++; |
| | |
| | |
| | const cleanQuoteText = quoteText |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\s+/g, ' ') |
| | .replace(/"/g, '\\"') |
| | .trim(); |
| |
|
| | |
| | const cleanAuthor = author |
| | .replace(/<[^>]*>/g, '') |
| | .replace(/\s+/g, ' ') |
| | .replace(/"/g, '\\"') |
| | .trim(); |
| |
|
| | |
| | usedComponents.add('Quote'); |
| |
|
| | return `<Quote source="${cleanAuthor}"> |
| | ${cleanQuoteText} |
| | </Quote>`; |
| | } |
| | |
| | return match; |
| | } |
| | ); |
| |
|
| | if (epigraphsConverted > 0) { |
| | console.log(` β
Converted ${epigraphsConverted} epigraph(s) to Quote component(s)`); |
| | } else { |
| | console.log(' βΉοΈ No epigraphs found'); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function transformHighlightSpans(content) { |
| | console.log(' π― Transforming highlight spans to mark tags...'); |
| |
|
| | let highlightsConverted = 0; |
| |
|
| | |
| | content = content.replace( |
| | /<span class="highlight">(.*?)<\/span>/g, |
| | (match, text) => { |
| | highlightsConverted++; |
| | return `<mark>${text}</mark>`; |
| | } |
| | ); |
| |
|
| | if (highlightsConverted > 0) { |
| | console.log(` β
Converted ${highlightsConverted} highlight span(s) to mark tag(s)`); |
| | } else { |
| | console.log(' βΉοΈ No highlight spans found'); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function fixEscapedMarkTags(content) { |
| | console.log(' π― Fixing escaped mark tags...'); |
| |
|
| | let fixedCount = 0; |
| |
|
| | |
| | content = content.replace( |
| | /\\<mark\\>(.*?)\\<\/mark\\>/g, |
| | (match, text) => { |
| | fixedCount++; |
| | return `<mark>${text}</mark>`; |
| | } |
| | ); |
| |
|
| | if (fixedCount > 0) { |
| | console.log(` β
Fixed ${fixedCount} escaped mark tag(s)`); |
| | } else { |
| | console.log(' βΉοΈ No escaped mark tags found'); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function fixHtmlEscaping(content) { |
| | console.log(' π§ Fixing HTML escaping in spans...'); |
| |
|
| | let fixedCount = 0; |
| |
|
| | |
| | content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => { |
| | fixedCount++; |
| | |
| | const cleanStyle = style.replace('position- absolute;', 'position: absolute;'); |
| | return `<span id="${id}" style="${cleanStyle}"></span>`; |
| | }); |
| |
|
| | |
| | content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => { |
| | fixedCount++; |
| | |
| | let cleanText = text; |
| | if (className === 'highlight') { |
| | cleanText = text.replace(/^\(\d+\)\s*/, ''); |
| | } |
| | return `<span class="${className}">${cleanText}</span>`; |
| | }); |
| |
|
| | |
| | |
| | content = content.replace(/<p><span id="([^"]*)" style="([^"]*)"><\/span><\/p>/g, (match, id, style) => { |
| | fixedCount++; |
| | |
| | const cleanStyle = style.replace('position- absolute;', 'position: absolute;'); |
| | return `<span id="${id}" style="${cleanStyle}"></span>`; |
| | }); |
| |
|
| | |
| | |
| | content = content.replace(/<p><span class="([^"]*)">([^&]*)<\/span><\/p>/g, (match, className, text) => { |
| | fixedCount++; |
| | |
| | let cleanText = text; |
| | if (className === 'highlight') { |
| | cleanText = text.replace(/^\(\d+\)\s*/, ''); |
| | } |
| | return `<span class="${className}">${cleanText}</span>`; |
| | }); |
| |
|
| | if (fixedCount > 0) { |
| | console.log(` β
Fixed ${fixedCount} escaped span(s)`); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | function cleanHighlightNumbering(content) { |
| | console.log(' π’ Removing numbering from highlight spans...'); |
| |
|
| | let cleanedCount = 0; |
| | |
| | content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => { |
| | cleanedCount++; |
| | return `<span class="highlight">${text}</span>`; |
| | }); |
| |
|
| | if (cleanedCount > 0) { |
| | console.log(` β
Removed numbering from ${cleanedCount} highlight span(s)`); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | function transformReferenceLinks(content) { |
| | console.log(' π Transforming reference links...'); |
| |
|
| | |
| | return content.replace( |
| | /\[([^\]]+)\]\((#[^)]+)\)\{[^}]*reference[^}]*\}/g, |
| | (match, text, href) => { |
| | return `[${text}](${href})`; |
| | } |
| | ); |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | function ensureFrontmatter(content, latexContent = '') { |
| | console.log(' π Ensuring proper frontmatter...'); |
| |
|
| | if (!content.startsWith('---')) { |
| | let frontmatter; |
| |
|
| | if (latexContent) { |
| | |
| | frontmatter = extractAndGenerateFrontmatter(latexContent); |
| | console.log(' β
Generated frontmatter from LaTeX metadata'); |
| | } else { |
| | |
| | const currentDate = new Date().toLocaleDateString('en-US', { |
| | year: 'numeric', |
| | month: 'short', |
| | day: '2-digit' |
| | }); |
| | frontmatter = `--- |
| | title: "Research Article" |
| | published: "${currentDate}" |
| | tableOfContentsAutoCollapse: true |
| | --- |
| | |
| | `; |
| | console.log(' β
Generated basic frontmatter'); |
| | } |
| |
|
| | return frontmatter + content; |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function fixMixedMathDelimiters(content) { |
| | console.log(' π§ Fixing mixed math delimiters...'); |
| |
|
| | let fixedCount = 0; |
| |
|
| | |
| | content = content.replace(/\$`([^`]*)`\$/g, (match, mathContent) => { |
| | fixedCount++; |
| | return `$${mathContent}$`; |
| | }); |
| |
|
| | |
| | content = content.replace(/`([^`]*)`\$/g, (match, mathContent) => { |
| | fixedCount++; |
| | return `$${mathContent}$`; |
| | }); |
| |
|
| | |
| | content = content.replace(/\$`([^`]*)`(?!\$)/g, (match, mathContent) => { |
| | fixedCount++; |
| | return `$${mathContent}$`; |
| | }); |
| |
|
| | if (fixedCount > 0) { |
| | console.log(` β
Fixed ${fixedCount} mixed math delimiter(s)`); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function cleanOrphanedMathDelimiters(content) { |
| | console.log(' π§Ή Cleaning orphaned math delimiters...'); |
| | console.log(' π Content length:', content.length, 'chars'); |
| |
|
| | let fixedCount = 0; |
| |
|
| | |
| | |
| | content = content.replace(/^\$\$\s*$(?!\s*[\s\S]*?\$\$)/gm, () => { |
| | fixedCount++; |
| | return ''; |
| | }); |
| |
|
| | |
| | const mathMatches = content.match(/\$\$([\s\S]*?)\$\$/g); |
| | console.log(` π Found ${mathMatches ? mathMatches.length : 0} math blocks`); |
| |
|
| | content = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => { |
| | |
| | let cleanedMath = mathContent; |
| |
|
| | |
| | const backticksBefore = (mathContent.match(/`/g) || []).length; |
| |
|
| | if (backticksBefore > 0) { |
| | console.log(` π§ Found math block with ${backticksBefore} backtick(s)`); |
| | } |
| |
|
| | |
| | cleanedMath = cleanedMath.replace(/`/g, ''); |
| |
|
| | const backticksAfter = (cleanedMath.match(/`/g) || []).length; |
| |
|
| | if (backticksBefore > 0) { |
| | fixedCount++; |
| | console.log(` π§ Removed ${backticksBefore} backtick(s) from math block`); |
| | return `$$${cleanedMath}$$`; |
| | } |
| | return match; |
| | }); |
| |
|
| | |
| | content = content.replace(/\\begin\{align\}/g, (match) => { |
| | fixedCount++; |
| | return '\\begin{align}'; |
| | }); |
| |
|
| | content = content.replace(/\\end\{align\}/g, (match) => { |
| | fixedCount++; |
| | return '\\end{align}'; |
| | }); |
| |
|
| | |
| | |
| | content = content.replace(/``` math\s*\n([\s\S]*?)\n```\s*([^`\n]*?)\s*``` math/g, (match, math1, text, math2) => { |
| | if (text.trim().length > 0 && !text.includes('```')) { |
| | fixedCount++; |
| | return '```' + ' math\n' + math1 + '\n```\n\n' + text.trim() + '\n\n```' + ' math'; |
| | } |
| | return match; |
| | }); |
| |
|
| | if (fixedCount > 0) { |
| | console.log(` β
Fixed ${fixedCount} orphaned math delimiter(s)`); |
| | } |
| |
|
| | return content; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function cleanSingleLineMathNewlines(content) { |
| | console.log(' π’ Cleaning newlines in single-dollar math blocks ($...$)...'); |
| |
|
| | let cleanedCount = 0; |
| |
|
| | |
| | |
| | const cleanedContent = content.replace(/\$(?!\$)([\s\S]*?)\$(?!\$)/g, (match, mathContent) => { |
| | |
| | if (mathContent.includes('\n')) { |
| | cleanedCount++; |
| |
|
| | |
| | const cleanedMath = mathContent |
| | .replace(/\n+/g, ' ') |
| | .replace(/\r+/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .trim(); |
| |
|
| | return `$${cleanedMath}$`; |
| | } |
| | return match; |
| | }); |
| |
|
| | if (cleanedCount > 0) { |
| | console.log(` β
Cleaned ${cleanedCount} single-dollar math block(s) with newlines`); |
| | } |
| |
|
| | return cleanedContent; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function formatDisplayMathBlocks(content) { |
| | console.log(' π Formatting display math blocks with proper spacing...'); |
| |
|
| | let formattedCount = 0; |
| |
|
| | |
| | |
| | const formattedContent = content.replace(/\$\$([\s\S]*?)\$\$/g, (match, mathContent) => { |
| | formattedCount++; |
| |
|
| | |
| | const cleanedMath = mathContent.trim(); |
| |
|
| | |
| | return `\n$$\n${cleanedMath}\n$$\n`; |
| | }); |
| |
|
| | if (formattedCount > 0) { |
| | console.log(` β
Formatted ${formattedCount} display math block(s) with proper spacing`); |
| | } |
| |
|
| | return formattedContent; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function cleanFigcaptionNewlines(content) { |
| | console.log(' π Cleaning newlines in figcaption elements...'); |
| |
|
| | let cleanedCount = 0; |
| |
|
| | |
| | const cleanedContent = content.replace(/<figcaption([^>]*)>([\s\S]*?)<\/figcaption>/g, (match, attributes, captionContent) => { |
| | |
| | if (captionContent.includes('\n')) { |
| | cleanedCount++; |
| |
|
| | |
| | const cleanedCaption = captionContent |
| | .replace(/\n+/g, ' ') |
| | .replace(/\s+/g, ' ') |
| | .trim(); |
| |
|
| | return `<figcaption${attributes}>${cleanedCaption}</figcaption>`; |
| | } |
| |
|
| | return match; |
| | }); |
| |
|
| | if (cleanedCount > 0) { |
| | console.log(` β
Cleaned ${cleanedCount} figcaption element(s)`); |
| | } else { |
| | console.log(` βΉοΈ No figcaption elements with newlines found`); |
| | } |
| |
|
| | return cleanedContent; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function removeHtmlComments(content) { |
| | console.log(' ποΈ Removing HTML comments...'); |
| |
|
| | let removedCount = 0; |
| |
|
| | |
| | const cleanedContent = content.replace(/<!--[\s\S]*?-->/g, () => { |
| | removedCount++; |
| | return ''; |
| | }); |
| |
|
| | if (removedCount > 0) { |
| | console.log(` β
Removed ${removedCount} HTML comment(s)`); |
| | } |
| |
|
| | return cleanedContent; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function cleanMdxSyntax(content) { |
| | console.log(' π§Ή Cleaning MDX syntax...'); |
| |
|
| | return content |
| | |
| | |
| | .replace(/>\s*</g, '>\n<') |
| | |
| | .replace(/^(#{1,6}\s+[^{#\n]+)\{[^}]+\}$/gm, '$1') |
| | |
| | .replace(/\\("|')/g, '$1'); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | function processMdxContent(content, latexContent = '') { |
| | console.log('π§ Processing for Astro MDX compatibility...'); |
| |
|
| | |
| | usedComponents.clear(); |
| | imageImports.clear(); |
| |
|
| | let processedContent = content; |
| |
|
| | |
| | processedContent = ensureFrontmatter(processedContent, latexContent); |
| | processedContent = fixMixedMathDelimiters(processedContent); |
| |
|
| | |
| | const mathBlocksAfterMixed = (processedContent.match(/\$\$([\s\S]*?)\$\$/g) || []).length; |
| | console.log(` π Math blocks after mixed delimiters fix: ${mathBlocksAfterMixed}`); |
| |
|
| | processedContent = cleanOrphanedMathDelimiters(processedContent); |
| | processedContent = cleanSingleLineMathNewlines(processedContent); |
| | processedContent = formatDisplayMathBlocks(processedContent); |
| | processedContent = removeHtmlComments(processedContent); |
| | processedContent = cleanMdxSyntax(processedContent); |
| | processedContent = convertSubfiguresToMultiImage(processedContent); |
| | processedContent = transformImages(processedContent); |
| | processedContent = transformImageFigures(processedContent); |
| | processedContent = transformStyledSpans(processedContent); |
| | processedContent = transformHighlightSpans(processedContent); |
| | processedContent = fixEscapedMarkTags(processedContent); |
| | processedContent = transformEpigraphs(processedContent); |
| | processedContent = transformReferenceLinks(processedContent); |
| | processedContent = fixHtmlEscaping(processedContent); |
| | processedContent = cleanHighlightNumbering(processedContent); |
| | processedContent = cleanFigcaptionNewlines(processedContent); |
| |
|
| | |
| | processedContent = addComponentImports(processedContent); |
| |
|
| | return processedContent; |
| | } |
| |
|
| | function convertToMdx(inputFile, outputFile) { |
| | console.log('π Modular Markdown to Astro MDX Converter'); |
| | console.log(`π Input: ${inputFile}`); |
| | console.log(`π Output: ${outputFile}`); |
| |
|
| | |
| | if (!existsSync(inputFile)) { |
| | console.error(`β Input file not found: ${inputFile}`); |
| | process.exit(1); |
| | } |
| |
|
| | try { |
| | console.log('π Reading Markdown file...'); |
| | const markdownContent = readFileSync(inputFile, 'utf8'); |
| |
|
| | |
| | let latexContent = ''; |
| | try { |
| | const inputDir = dirname(inputFile); |
| | const latexFile = join(inputDir, '..', 'input', 'main.tex'); |
| | if (existsSync(latexFile)) { |
| | latexContent = readFileSync(latexFile, 'utf8'); |
| | } |
| | } catch (error) { |
| | |
| | } |
| |
|
| | |
| | const mdxContent = processMdxContent(markdownContent, latexContent); |
| |
|
| | console.log('πΎ Writing MDX file...'); |
| | writeFileSync(outputFile, mdxContent); |
| |
|
| | console.log(`β
Conversion completed: ${outputFile}`); |
| |
|
| | |
| | const inputSize = Math.round(markdownContent.length / 1024); |
| | const outputSize = Math.round(mdxContent.length / 1024); |
| | console.log(`π Input: ${inputSize}KB β Output: ${outputSize}KB`); |
| |
|
| | } catch (error) { |
| | console.error('β Conversion failed:'); |
| | console.error(error.message); |
| | process.exit(1); |
| | } |
| | } |
| |
|
| | export { convertToMdx }; |
| |
|
| | function main() { |
| | const config = parseArgs(); |
| | convertToMdx(config.input, config.output); |
| | console.log('π MDX conversion completed!'); |
| | } |
| |
|
| | if (import.meta.url === `file://${process.argv[1]}`) { |
| | main(); |
| | } |
| |
|