Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| import { readFileSync, readdirSync, statSync } from 'fs'; | |
| import { join, relative, dirname } from 'path'; | |
| import { fileURLToPath } from 'url'; | |
| /** | |
| * Extract HtmlEmbed, Image components and tables from MDX/Markdown content | |
| * Simple utility to find visual elements and their props | |
| */ | |
| /** | |
| * Simple Markdown to HTML converter for table cells | |
| * Handles: links, bold, italic, code, strikethrough | |
| */ | |
| function markdownToHtml(md) { | |
| if (!md) return ''; | |
| let html = md; | |
| // Escape HTML entities first (but not for already-converted content) | |
| // Skip if it already looks like HTML | |
| if (!html.includes('<a ') && !html.includes('<strong>')) { | |
| html = html | |
| .replace(/&/g, '&') | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>'); | |
| } | |
| // Links: [text](url) | |
| html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener">$1</a>'); | |
| // Bold: **text** or __text__ | |
| html = html.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>'); | |
| html = html.replace(/__([^_]+)__/g, '<strong>$1</strong>'); | |
| // Italic: *text* or _text_ (but not inside words) | |
| html = html.replace(/(?<![*_])\*([^*]+)\*(?![*_])/g, '<em>$1</em>'); | |
| html = html.replace(/(?<![*_])_([^_]+)_(?![*_])/g, '<em>$1</em>'); | |
| // Inline code: `code` | |
| html = html.replace(/`([^`]+)`/g, '<code>$1</code>'); | |
| // Strikethrough: ~~text~~ | |
| html = html.replace(/~~([^~]+)~~/g, '<del>$1</del>'); | |
| // Checkboxes (common in tables) | |
| html = html.replace(/\[x\]/gi, '✅'); | |
| html = html.replace(/\[ \]/g, '❌'); | |
| return html; | |
| } | |
| /** | |
| * Extract Image components from MDX content | |
| */ | |
| export function extractImages(content) { | |
| const images = []; | |
| // Match <Image ... /> components | |
| const imagePattern = /<Image[^>]*\/>/gi; | |
| let match; | |
| while ((match = imagePattern.exec(content)) !== null) { | |
| const tag = match[0]; | |
| // Extract src attribute (variable reference like {myImage}) | |
| const srcMatch = tag.match(/src\s*=\s*\{([^}]+)\}/i); | |
| const src = srcMatch ? srcMatch[1].trim() : null; | |
| // Extract alt | |
| const altMatch = tag.match(/alt\s*=\s*["']([^"']+)["']/i); | |
| const alt = altMatch ? altMatch[1] : 'Image'; | |
| // Extract caption | |
| const captionMatch = tag.match(/caption\s*=\s*["']([^"']+)["']/i) || | |
| tag.match(/caption\s*=\s*\{`([^`]+)`\}/i); | |
| const caption = captionMatch ? captionMatch[1] : null; | |
| // Extract id | |
| const idMatch = tag.match(/id\s*=\s*["']([^"']+)["']/i); | |
| const id = idMatch ? idMatch[1] : null; | |
| if (src) { | |
| images.push({ | |
| type: 'image', | |
| src, | |
| alt, | |
| caption, | |
| id | |
| }); | |
| } | |
| } | |
| return images; | |
| } | |
| /** | |
| * Extract markdown tables from content | |
| */ | |
| export function extractTables(content) { | |
| const tables = []; | |
| // Match markdown tables (lines starting with |) | |
| // A table has at least a header row, separator row, and one data row | |
| const tablePattern = /(\|[^\n]+\|\n\|[-:\s|]+\|\n(?:\|[^\n]+\|\n?)+)/g; | |
| let match; | |
| let tableIndex = 0; | |
| while ((match = tablePattern.exec(content)) !== null) { | |
| const tableContent = match[1].trim(); | |
| const rows = tableContent.split('\n').filter(row => row.trim()); | |
| if (rows.length >= 3) { | |
| // Parse header - convert Markdown to HTML | |
| const headerRow = rows[0]; | |
| const headers = headerRow.split('|') | |
| .filter(cell => cell.trim()) | |
| .map(cell => markdownToHtml(cell.trim())); | |
| // Parse data rows (skip separator at index 1) - convert Markdown to HTML | |
| const dataRows = rows.slice(2).map(row => { | |
| return row.split('|') | |
| .filter(cell => cell.trim()) | |
| .map(cell => markdownToHtml(cell.trim())); | |
| }); | |
| tables.push({ | |
| type: 'table', | |
| id: `table-${tableIndex++}`, | |
| headers, | |
| rows: dataRows, | |
| raw: tableContent | |
| }); | |
| } | |
| } | |
| return tables; | |
| } | |
| export function extractHtmlEmbeds(content) { | |
| const embeds = []; | |
| // First, find all Wide components and mark their content | |
| // Pattern to match <Wide>...</Wide> blocks | |
| const widePattern = /<Wide[\s\S]*?>([\s\S]*?)<\/Wide>/gi; | |
| const wideBlocks = []; | |
| let wideMatch; | |
| while ((wideMatch = widePattern.exec(content)) !== null) { | |
| wideBlocks.push({ | |
| start: wideMatch.index, | |
| end: wideMatch.index + wideMatch[0].length, | |
| content: wideMatch[0] | |
| }); | |
| } | |
| // Helper to check if an embed is inside a Wide block | |
| const isInsideWide = (embedStartIndex) => { | |
| return wideBlocks.some(block => | |
| embedStartIndex >= block.start && embedStartIndex < block.end | |
| ); | |
| }; | |
| // Pattern to match HtmlEmbed opening tags | |
| const embedPattern = /<HtmlEmbed/gi; | |
| let embedMatch; | |
| while ((embedMatch = embedPattern.exec(content)) !== null) { | |
| const matchIndex = embedMatch.index; | |
| // Manually find the closing /> while respecting string boundaries | |
| let pos = matchIndex + 10; // After "<HtmlEmbed" | |
| let match = '<HtmlEmbed'; | |
| let inString = false; | |
| let stringDelim = null; | |
| let inJSXBraces = 0; // Track depth in JSX expressions like config={{...}} | |
| while (pos < content.length) { | |
| const char = content[pos]; | |
| const prevChar = pos > 0 ? content[pos - 1] : ''; | |
| match += char; | |
| // Track string boundaries (template strings, single, double quotes) | |
| if (!inString) { | |
| if ((char === '`' || char === '"' || char === "'") && prevChar !== '\\') { | |
| inString = true; | |
| stringDelim = char; | |
| } | |
| } else { | |
| if (char === stringDelim && prevChar !== '\\') { | |
| inString = false; | |
| stringDelim = null; | |
| } | |
| } | |
| // Track JSX expression braces (for config={{...}}, data={{...}}, etc.) | |
| if (!inString) { | |
| if (char === '{') { | |
| inJSXBraces++; | |
| } else if (char === '}') { | |
| inJSXBraces--; | |
| } | |
| } | |
| // Check for closing /> - only valid if not in string AND all JSX braces are closed | |
| if (!inString && inJSXBraces === 0 && char === '/' && pos + 1 < content.length && content[pos + 1] === '>') { | |
| match += '>'; | |
| break; | |
| } | |
| pos++; | |
| } | |
| // If config={{ is present, we need to find the real closing after }} | |
| // Check if config={{ is there but the match doesn't include the full config (doesn't end with }}) | |
| if (match.includes('config={{') && !match.includes('}}')) { | |
| // The match was cut off at the first /> it found (probably in desc) | |
| // We need to find the real closing after }} | |
| // Find where config={{ starts | |
| const configStart = match.indexOf('config={{'); | |
| if (configStart >= 0) { | |
| // Look for the matching }} after this, starting from the content | |
| let braceCount = 2; // We're inside {{ | |
| let pos = matchIndex + configStart + 9; // After "config={{" | |
| let foundEnd = false; | |
| while (pos < content.length) { | |
| const char = content[pos]; | |
| const prevChar = pos > 0 ? content[pos - 1] : ''; | |
| // Track strings to avoid counting braces inside strings | |
| if ((char === '"' || char === "'" || char === '`') && prevChar !== '\\') { | |
| // We hit a string delimiter - skip the entire string | |
| const stringDelim = char; | |
| pos++; | |
| while (pos < content.length) { | |
| if (content[pos] === stringDelim && content[pos - 1] !== '\\') { | |
| break; | |
| } | |
| // Handle template string ${...} | |
| if (stringDelim === '`' && content[pos] === '$' && pos + 1 < content.length && content[pos + 1] === '{') { | |
| // Skip ${...} without counting | |
| pos += 2; | |
| let innerBraces = 1; | |
| while (pos < content.length && innerBraces > 0) { | |
| if (content[pos] === '{') innerBraces++; | |
| if (content[pos] === '}') innerBraces--; | |
| pos++; | |
| } | |
| continue; | |
| } | |
| pos++; | |
| } | |
| pos++; // Skip the closing quote | |
| continue; | |
| } | |
| if (char === '{') braceCount++; | |
| if (char === '}') { | |
| braceCount--; | |
| if (braceCount === 0) { | |
| // Found the closing }} | |
| // Now look for /> after optional whitespace | |
| pos++; | |
| while (pos < content.length && /\s/.test(content[pos])) { | |
| pos++; | |
| } | |
| if (pos < content.length && content[pos] === '/' && pos + 1 < content.length && content[pos + 1] === '>') { | |
| // Found the real closing | |
| match = content.substring(matchIndex, pos + 2); | |
| foundEnd = true; | |
| break; | |
| } | |
| } | |
| } | |
| pos++; | |
| } | |
| if (!foundEnd) { | |
| // Fallback: try to find }} /> pattern after match | |
| const after = content.substring(matchIndex + match.length); | |
| const endPattern = after.match(/\}\}\s*\/>/); | |
| if (endPattern) { | |
| match = content.substring(matchIndex, matchIndex + match.length + endPattern.index + endPattern[0].length); | |
| } | |
| } | |
| } | |
| } | |
| // Helper function to extract attribute value supporting multiline | |
| const extractAttr = (attrName, content) => { | |
| // Try JSX template strings first: desc={`...`} | |
| const templateMatch = content.match(new RegExp(`${attrName}\\s*=\\s*\\{\`([\\s\\S]*?)\`\\}`, 'i')); | |
| if (templateMatch) return templateMatch[1].trim(); | |
| // Try single quotes: desc='...' | |
| const singleQuoteMatch = content.match(new RegExp(`${attrName}\\s*=\\s*'([\\s\\S]*?)'`, 'i')); | |
| if (singleQuoteMatch) return singleQuoteMatch[1].trim(); | |
| // Try double quotes: desc="..." | |
| const doubleQuoteMatch = content.match(new RegExp(`${attrName}\\s*=\\s*"([\\s\\S]*?)"`, 'i')); | |
| if (doubleQuoteMatch) return doubleQuoteMatch[1].trim(); | |
| return undefined; | |
| }; | |
| // Extract src attribute (required) | |
| const src = extractAttr('src', match); | |
| if (!src) continue; | |
| // Extract optional attributes | |
| const title = extractAttr('title', match); | |
| const desc = extractAttr('desc', match); | |
| const id = extractAttr('id', match); | |
| const data = extractAttr('data', match); | |
| const frameless = /\bframeless\b/i.test(match); | |
| const wideAttr = /\bwide\b/i.test(match); | |
| const skipGallery = /\bskipGallery\b/i.test(match); | |
| // Extract config attribute - JSX object format: config={{ ... }} | |
| let config = null; | |
| // Pattern to match config={{ ... }} with balanced braces | |
| const jsxConfigRegex = /config\s*=\s*\{\{/i; | |
| const jsxConfigMatch = match.match(jsxConfigRegex); | |
| if (jsxConfigMatch) { | |
| try { | |
| // Find the start position after config={{ | |
| const configStart = jsxConfigMatch.index; | |
| const startPos = match.indexOf('{{', configStart) + 2; | |
| // Find matching closing braces with better handling | |
| let braceCount = 1; // Start at 1 because we're inside {{ | |
| let inString = false; | |
| let stringChar = null; | |
| let pos = startPos; | |
| for (; pos < match.length; pos++) { | |
| const char = match[pos]; | |
| const prevChar = pos > 0 ? match[pos - 1] : ''; | |
| const nextChar = pos < match.length - 1 ? match[pos + 1] : ''; | |
| // Handle string literals - check for template strings too | |
| if (!inString) { | |
| if (char === '`') { | |
| inString = true; | |
| stringChar = '`'; | |
| } else if (char === '"' && prevChar !== '\\') { | |
| inString = true; | |
| stringChar = '"'; | |
| } else if (char === "'" && prevChar !== '\\') { | |
| inString = true; | |
| stringChar = "'"; | |
| } | |
| } else { | |
| // Check for end of string | |
| if (char === stringChar && prevChar !== '\\') { | |
| inString = false; | |
| stringChar = null; | |
| } | |
| // Template strings can contain ${...} - handle that | |
| if (stringChar === '`' && char === '$' && nextChar === '{') { | |
| // Skip the ${ but don't count it as a brace yet | |
| pos++; // Skip $ | |
| braceCount++; // Count the { we're about to see | |
| continue; | |
| } | |
| } | |
| if (!inString) { | |
| if (char === '{') { | |
| braceCount++; | |
| } else if (char === '}') { | |
| braceCount--; | |
| if (braceCount === 0) { | |
| // Found matching closing }} | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| if (braceCount !== 0) { | |
| throw new Error(`Unbalanced braces: braceCount=${braceCount}`); | |
| } | |
| // Extract the JSX object content | |
| let jsxContent = match.substring(startPos, pos).trim(); | |
| // Instead of converting to JSON, evaluate the JavaScript object directly | |
| // This is safer in a build context (not in browser) | |
| try { | |
| // Wrap in parentheses and braces to make it a valid expression | |
| const jsCode = `({${jsxContent}})`; | |
| // Use Function constructor to safely evaluate (no access to local scope) | |
| // This is safe because we're in Node.js build time, not browser runtime | |
| config = new Function('return ' + jsCode)(); | |
| } catch (evalError) { | |
| // If eval fails, try the JSON approach as fallback | |
| // Fallback: try JSON parsing | |
| let jsonStr = jsxContent; | |
| // Add braces around the content | |
| jsonStr = '{' + jsonStr + '}'; | |
| // Quote unquoted keys | |
| for (let pass = 0; pass < 5; pass++) { | |
| jsonStr = jsonStr.replace(/([{,\[\s])([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":'); | |
| jsonStr = jsonStr.replace(/^([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/gm, '"$1":'); | |
| } | |
| // Replace single quotes with double quotes | |
| jsonStr = jsonStr.replace(/'/g, '"'); | |
| // Remove trailing commas | |
| jsonStr = jsonStr.replace(/,\s*([}\]])/g, '$1'); | |
| try { | |
| config = JSON.parse(jsonStr); | |
| } catch (jsonError) { | |
| // Both methods failed - log warning but don't throw | |
| console.warn('[extract-embeds] Config parsing failed:', jsonError.message); | |
| } | |
| } | |
| } catch (e) { | |
| // If parsing fails, keep config as null | |
| // Component will handle missing config | |
| } | |
| } | |
| // Fallback: try standard attribute extraction (for string-based config) | |
| if (!config) { | |
| const configAttr = extractAttr('config', match); | |
| if (configAttr) { | |
| try { | |
| config = JSON.parse(configAttr); | |
| } catch (e) { | |
| // Keep as string if not valid JSON | |
| config = configAttr; | |
| } | |
| } | |
| } | |
| // Check if this embed is inside a Wide component OR has wide prop | |
| const isWide = isInsideWide(matchIndex) || wideAttr; | |
| embeds.push({ | |
| src, | |
| title, | |
| desc, | |
| id, | |
| frameless, | |
| data, | |
| config, | |
| wide: isWide, | |
| skipGallery | |
| }); | |
| } | |
| return embeds; | |
| } | |
| /** | |
| * Recursively find all MDX files in a directory | |
| * Skips demo chapters by default to avoid missing embeds | |
| */ | |
| function findMdxFiles(dir, baseDir = dir, files = [], skipDemo = true) { | |
| const entries = readdirSync(dir); | |
| for (const entry of entries) { | |
| const fullPath = join(dir, entry); | |
| const stat = statSync(fullPath); | |
| if (stat.isDirectory()) { | |
| // Skip demo directory if skipDemo is true | |
| if (skipDemo && entry === 'demo') { | |
| continue; | |
| } | |
| findMdxFiles(fullPath, baseDir, files, skipDemo); | |
| } else if (entry.endsWith('.mdx')) { | |
| files.push(fullPath); | |
| } | |
| } | |
| return files; | |
| } | |
| /** | |
| * Parse imports and chapter usage order from article.mdx | |
| */ | |
| function parseArticleChapters(articleContent, contentDir) { | |
| const chapterMap = new Map(); // Component name -> file path | |
| const chapterOrder = []; // Ordered list of file paths | |
| // Extract import statements | |
| const importPattern = /import\s+(\w+)\s+from\s+["'](.\/chapters\/[^"']+)["']/g; | |
| let match; | |
| while ((match = importPattern.exec(articleContent)) !== null) { | |
| const [, componentName, importPath] = match; | |
| const fullPath = join(contentDir, importPath); | |
| chapterMap.set(componentName, fullPath); | |
| } | |
| // Extract chapter usage order (e.g., <Introduction />) | |
| const usagePattern = /<(\w+)\s*\/>/g; | |
| while ((match = usagePattern.exec(articleContent)) !== null) { | |
| const componentName = match[1]; | |
| if (chapterMap.has(componentName)) { | |
| const chapterPath = chapterMap.get(componentName); | |
| if (!chapterOrder.includes(chapterPath)) { | |
| chapterOrder.push(chapterPath); | |
| } | |
| } | |
| } | |
| return chapterOrder; | |
| } | |
| /** | |
| * Load and extract embeds from MDX content files, following article structure | |
| */ | |
| export function loadEmbedsFromMDX() { | |
| // Get absolute path to content directory | |
| // In dev: __dirname is app/src/utils, so we go ../content | |
| // In build: Astro copies files to dist/pages/, but the source files stay in src/ | |
| // So we need to resolve relative to the actual source location | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = dirname(__filename); | |
| // Try to resolve content directory - works in both dev and build | |
| // First try relative to current file location (dev) | |
| let contentDir = join(__dirname, '../content'); | |
| // If that doesn't work, try going up more levels (build scenario) | |
| if (!statSync(contentDir, { throwIfNoEntry: false })) { | |
| // dist/pages/../.. -> dist/../src/content | |
| contentDir = join(__dirname, '../../src/content'); | |
| } | |
| // If still not found, try one more level (dist/*.mjs) | |
| if (!statSync(contentDir, { throwIfNoEntry: false })) { | |
| contentDir = join(__dirname, '../../../src/content'); | |
| } | |
| const allEmbeds = []; | |
| const articleFile = join(contentDir, 'article.mdx'); | |
| try { | |
| // Read main article file | |
| const articleContent = readFileSync(articleFile, 'utf-8'); | |
| // Extract embeds from main article first | |
| const articleEmbeds = extractHtmlEmbeds(articleContent); | |
| articleEmbeds.forEach(embed => { | |
| embed.sourceFile = 'content/article.mdx'; | |
| }); | |
| allEmbeds.push(...articleEmbeds); | |
| // Parse chapter order from article | |
| const chapterOrder = parseArticleChapters(articleContent, contentDir); | |
| // Extract embeds from chapters in order | |
| for (const chapterPath of chapterOrder) { | |
| try { | |
| const chapterContent = readFileSync(chapterPath, 'utf-8'); | |
| const embeds = extractHtmlEmbeds(chapterContent); | |
| // Add source file info (relative path) | |
| const relativePath = relative(contentDir, chapterPath); | |
| embeds.forEach(embed => { | |
| embed.sourceFile = `content/${relativePath}`; | |
| }); | |
| allEmbeds.push(...embeds); | |
| } catch (error) { | |
| console.error(`Error reading chapter ${chapterPath}:`, error); | |
| } | |
| } | |
| // Also include any other MDX files not in chapters (for completeness) | |
| // Skip demo chapters to avoid missing embeds in dataviz gallery | |
| const allMdxFiles = findMdxFiles(contentDir, contentDir, [], true); | |
| const processedFiles = new Set([articleFile, ...chapterOrder]); | |
| for (const filePath of allMdxFiles) { | |
| if (!processedFiles.has(filePath)) { | |
| try { | |
| const rawContent = readFileSync(filePath, 'utf-8'); | |
| const embeds = extractHtmlEmbeds(rawContent); | |
| const relativePath = relative(contentDir, filePath); | |
| embeds.forEach(embed => { | |
| embed.sourceFile = `content/${relativePath}`; | |
| }); | |
| allEmbeds.push(...embeds); | |
| } catch (error) { | |
| console.error(`Error reading ${filePath}:`, error); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error('Error processing article:', error); | |
| // Fallback to old behavior if article.mdx can't be read | |
| // Skip demo chapters to avoid missing embeds | |
| const mdxFiles = findMdxFiles(contentDir, contentDir, [], true); | |
| for (const filePath of mdxFiles) { | |
| try { | |
| const rawContent = readFileSync(filePath, 'utf-8'); | |
| const embeds = extractHtmlEmbeds(rawContent); | |
| const relativePath = relative(contentDir, filePath); | |
| embeds.forEach(embed => { | |
| embed.sourceFile = `content/${relativePath}`; | |
| }); | |
| allEmbeds.push(...embeds); | |
| } catch (err) { | |
| console.error(`Error reading ${filePath}:`, err); | |
| } | |
| } | |
| } | |
| // Remove duplicates based on src (keeping first occurrence = order of appearance) | |
| const uniqueEmbeds = Array.from( | |
| new Map(allEmbeds.map(e => [e.src, e])).values() | |
| ); | |
| return uniqueEmbeds; | |
| } | |
| /** | |
| * Helper to extract attribute from tag content | |
| */ | |
| function extractAttrFromTag(attrName, tagContent) { | |
| // Try JSX template strings first: attr={`...`} | |
| const templateMatch = tagContent.match(new RegExp(`${attrName}\\s*=\\s*\\{\`([\\s\\S]*?)\`\\}`, 'i')); | |
| if (templateMatch) return templateMatch[1].trim(); | |
| // Try single quotes: attr='...' | |
| const singleQuoteMatch = tagContent.match(new RegExp(`${attrName}\\s*=\\s*'([\\s\\S]*?)'`, 'i')); | |
| if (singleQuoteMatch) return singleQuoteMatch[1].trim(); | |
| // Try double quotes: attr="..." | |
| const doubleQuoteMatch = tagContent.match(new RegExp(`${attrName}\\s*=\\s*"([\\s\\S]*?)"`, 'i')); | |
| if (doubleQuoteMatch) return doubleQuoteMatch[1].trim(); | |
| return undefined; | |
| } | |
| /** | |
| * Check if position is inside a Wide component | |
| */ | |
| function isPositionInsideWide(content, position) { | |
| const widePattern = /<Wide[\s\S]*?>([\s\S]*?)<\/Wide>/gi; | |
| let match; | |
| while ((match = widePattern.exec(content)) !== null) { | |
| if (position >= match.index && position < match.index + match[0].length) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| /** | |
| * Extract all visual elements from content with their position | |
| * Returns sorted by position (order of appearance) | |
| */ | |
| function extractAllVisualsWithPosition(content) { | |
| const visuals = []; | |
| // Extract HtmlEmbeds with position and ALL props | |
| const embedPattern = /<HtmlEmbed/gi; | |
| let match; | |
| while ((match = embedPattern.exec(content)) !== null) { | |
| const position = match.index; | |
| // Find the end of this tag | |
| let pos = position + 10; | |
| let tagContent = '<HtmlEmbed'; | |
| let inString = false; | |
| let stringDelim = null; | |
| let inJSXBraces = 0; | |
| while (pos < content.length) { | |
| const char = content[pos]; | |
| const prevChar = pos > 0 ? content[pos - 1] : ''; | |
| tagContent += char; | |
| if (!inString) { | |
| if ((char === '`' || char === '"' || char === "'") && prevChar !== '\\') { | |
| inString = true; | |
| stringDelim = char; | |
| } | |
| } else { | |
| if (char === stringDelim && prevChar !== '\\') { | |
| inString = false; | |
| stringDelim = null; | |
| } | |
| } | |
| if (!inString) { | |
| if (char === '{') inJSXBraces++; | |
| else if (char === '}') inJSXBraces--; | |
| } | |
| if (!inString && inJSXBraces === 0 && char === '/' && pos + 1 < content.length && content[pos + 1] === '>') { | |
| tagContent += '>'; | |
| break; | |
| } | |
| pos++; | |
| } | |
| // Extract all props | |
| const src = extractAttrFromTag('src', tagContent); | |
| if (src) { | |
| const title = extractAttrFromTag('title', tagContent); | |
| const desc = extractAttrFromTag('desc', tagContent); | |
| const id = extractAttrFromTag('id', tagContent); | |
| const data = extractAttrFromTag('data', tagContent); | |
| const frameless = /\bframeless\b/i.test(tagContent); | |
| const wideAttr = /\bwide\b/i.test(tagContent); | |
| const skipGallery = /\bskipGallery\b/i.test(tagContent); | |
| // Parse config if present | |
| let config = null; | |
| const jsxConfigMatch = tagContent.match(/config\s*=\s*\{\{/i); | |
| if (jsxConfigMatch) { | |
| try { | |
| const configStart = tagContent.indexOf('{{', jsxConfigMatch.index) + 2; | |
| let braceCount = 1; | |
| let configEnd = configStart; | |
| for (let i = configStart; i < tagContent.length && braceCount > 0; i++) { | |
| if (tagContent[i] === '{') braceCount++; | |
| if (tagContent[i] === '}') braceCount--; | |
| if (braceCount === 0) configEnd = i; | |
| } | |
| const jsxContent = tagContent.substring(configStart, configEnd).trim(); | |
| config = new Function('return ({' + jsxContent + '})')(); | |
| } catch (e) { | |
| // Config parsing failed, keep null | |
| } | |
| } | |
| const isWide = isPositionInsideWide(content, position) || wideAttr; | |
| visuals.push({ | |
| type: 'embed', | |
| position, | |
| src, | |
| title, | |
| desc, | |
| id, | |
| data, | |
| frameless, | |
| config, | |
| wide: isWide, | |
| skipGallery | |
| }); | |
| } | |
| } | |
| // Extract Images with position | |
| const imagePattern = /<Image[^>]*\/>/gi; | |
| while ((match = imagePattern.exec(content)) !== null) { | |
| const srcMatch = match[0].match(/src\s*=\s*\{([^}]+)\}/i); | |
| if (srcMatch) { | |
| const altMatch = match[0].match(/alt\s*=\s*["']([^"']+)["']/i); | |
| const captionMatch = match[0].match(/caption\s*=\s*["']([^"']+)["']/i); | |
| visuals.push({ | |
| type: 'image', | |
| position: match.index, | |
| src: srcMatch[1].trim(), | |
| alt: altMatch ? altMatch[1] : 'Image', | |
| caption: captionMatch ? captionMatch[1] : null, | |
| }); | |
| } | |
| } | |
| // Extract Tables with position | |
| const tablePattern = /(\|[^\n]+\|\n\|[-:\s|]+\|\n(?:\|[^\n]+\|\n?)+)/g; | |
| let tableIndex = 0; | |
| while ((match = tablePattern.exec(content)) !== null) { | |
| const tableContent = match[1].trim(); | |
| const rows = tableContent.split('\n').filter(row => row.trim()); | |
| if (rows.length >= 3) { | |
| const headerRow = rows[0]; | |
| // Convert Markdown to HTML in cells | |
| const headers = headerRow.split('|') | |
| .filter(cell => cell.trim()) | |
| .map(cell => markdownToHtml(cell.trim())); | |
| const dataRows = rows.slice(2).map(row => { | |
| return row.split('|') | |
| .filter(cell => cell.trim()) | |
| .map(cell => markdownToHtml(cell.trim())); | |
| }); | |
| visuals.push({ | |
| type: 'table', | |
| position: match.index, | |
| id: `table-${tableIndex++}`, | |
| headers, | |
| rows: dataRows, | |
| }); | |
| } | |
| } | |
| // Sort by position (order of appearance) | |
| visuals.sort((a, b) => a.position - b.position); | |
| return visuals; | |
| } | |
| /** | |
| * Load all visual elements (embeds, images, tables) from MDX content files | |
| * Returns them in order of appearance in the article | |
| */ | |
| export function loadAllVisualsFromMDX() { | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = dirname(__filename); | |
| let contentDir = join(__dirname, '../content'); | |
| if (!statSync(contentDir, { throwIfNoEntry: false })) { | |
| contentDir = join(__dirname, '../../src/content'); | |
| } | |
| if (!statSync(contentDir, { throwIfNoEntry: false })) { | |
| contentDir = join(__dirname, '../../../src/content'); | |
| } | |
| const allVisuals = []; | |
| const articleFile = join(contentDir, 'article.mdx'); | |
| try { | |
| const articleContent = readFileSync(articleFile, 'utf-8'); | |
| // Extract all visual elements from article IN ORDER (with all props) | |
| const articleVisuals = extractAllVisualsWithPosition(articleContent); | |
| articleVisuals.forEach(item => { | |
| item.sourceFile = 'content/article.mdx'; | |
| }); | |
| allVisuals.push(...articleVisuals); | |
| // Parse chapter order and extract from chapters | |
| const chapterOrder = parseArticleChapters(articleContent, contentDir); | |
| for (const chapterPath of chapterOrder) { | |
| // Skip demo chapters | |
| if (chapterPath.includes('/demo/')) continue; | |
| try { | |
| const chapterContent = readFileSync(chapterPath, 'utf-8'); | |
| // Extract all visuals IN ORDER from this chapter (with all props) | |
| const chapterVisuals = extractAllVisualsWithPosition(chapterContent); | |
| const relativePath = relative(contentDir, chapterPath); | |
| chapterVisuals.forEach(item => { | |
| item.sourceFile = `content/${relativePath}`; | |
| }); | |
| allVisuals.push(...chapterVisuals); | |
| } catch (error) { | |
| console.error(`Error reading chapter ${chapterPath}:`, error); | |
| } | |
| } | |
| // Process other MDX files (not in demo) | |
| const allMdxFiles = findMdxFiles(contentDir, contentDir, [], true); | |
| const processedFiles = new Set([articleFile, ...chapterOrder]); | |
| for (const filePath of allMdxFiles) { | |
| if (!processedFiles.has(filePath) && !filePath.includes('/demo/')) { | |
| try { | |
| const rawContent = readFileSync(filePath, 'utf-8'); | |
| const fileVisuals = extractAllVisualsWithPosition(rawContent); | |
| const relativePath = relative(contentDir, filePath); | |
| fileVisuals.forEach(item => { | |
| item.sourceFile = `content/${relativePath}`; | |
| }); | |
| allVisuals.push(...fileVisuals); | |
| } catch (error) { | |
| console.error(`Error reading ${filePath}:`, error); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.error('Error processing article:', error); | |
| } | |
| // Keep all occurrences (no deduplication) | |
| // Duplicates will be numbered in dataviz.astro (e.g., d3-line-chart, d3-line-chart-2) | |
| return allVisuals; | |
| } | |