/* * Copyright 2023 Vercel, Inc. * Adapted from: https://github.com/vercel/streamdown/blob/main/packages/streamdown/lib/parse-blocks.tsx */ import { Lexer } from "marked"; /** * Parses markdown into independent blocks for efficient memoization during streaming. * Blocks are split at natural boundaries while keeping related content together. */ export function parseMarkdownIntoBlocks(markdown: string): string[] { // Check if the markdown contains footnotes (references or definitions) // Footnote references: [^1], [^label], etc. // Footnote definitions: [^1]: text, [^label]: text, etc. // Use atomic groups or possessive quantifiers to prevent backtracking const hasFootnoteReference = /\[\^[^\]\s]{1,200}\](?!:)/.test(markdown); const hasFootnoteDefinition = /\[\^[^\]\s]{1,200}\]:/.test(markdown); // If footnotes are present, return the entire document as a single block // This ensures footnote references and definitions remain in the same mdast tree if (hasFootnoteReference || hasFootnoteDefinition) { return [markdown]; } const tokens = Lexer.lex(markdown, { gfm: true }); // Post-process to merge consecutive blocks that belong together const mergedBlocks: string[] = []; const htmlStack: string[] = []; // Track opening HTML tags for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; const currentBlock = token.raw; // Check if we're inside an HTML block if (htmlStack.length > 0) { // We're inside an HTML block, merge with the previous block mergedBlocks[mergedBlocks.length - 1] += currentBlock; // Check if this token closes an HTML tag if (token.type === "html") { const closingTagMatch = currentBlock.match(/<\/(\w+)>/); if (closingTagMatch) { const closingTag = closingTagMatch[1]; // Check if this closes the most recent opening tag if (htmlStack[htmlStack.length - 1] === closingTag) { htmlStack.pop(); } } } continue; } // Check if this is an opening HTML block tag if (token.type === "html" && token.block) { const openingTagMatch = currentBlock.match(/<(\w+)[\s>]/); if (openingTagMatch) { const tagName = openingTagMatch[1]; // Check if this is a self-closing tag or if there's a closing tag in the same block const hasClosingTag = currentBlock.includes(``); if (!hasClosingTag) { // This is an opening tag without a closing tag in the same block htmlStack.push(tagName); } } } // Math block merging logic (existing) // Check if this is a standalone $$ that might be a closing delimiter if (currentBlock.trim() === "$$" && mergedBlocks.length > 0) { const previousBlock = mergedBlocks.at(-1); if (!previousBlock) { mergedBlocks.push(currentBlock); continue; } // Check if the previous block starts with $$ but doesn't end with $$ const prevStartsWith$$ = previousBlock.trimStart().startsWith("$$"); const prevDollarCount = (previousBlock.match(/\$\$/g) || []).length; // If previous block has odd number of $$ and starts with $$, merge them if (prevStartsWith$$ && prevDollarCount % 2 === 1) { mergedBlocks[mergedBlocks.length - 1] = previousBlock + currentBlock; continue; } } // Check if current block ends with $$ and previous block started with $$ but didn't close if (mergedBlocks.length > 0 && currentBlock.trimEnd().endsWith("$$")) { const previousBlock = mergedBlocks.at(-1); if (!previousBlock) { mergedBlocks.push(currentBlock); continue; } const prevStartsWith$$ = previousBlock.trimStart().startsWith("$$"); const prevDollarCount = (previousBlock.match(/\$\$/g) || []).length; const currDollarCount = (currentBlock.match(/\$\$/g) || []).length; // If previous block has unclosed math (odd $$) and current block ends with $$ // AND current block doesn't start with $$, it's likely a continuation if ( prevStartsWith$$ && prevDollarCount % 2 === 1 && !currentBlock.trimStart().startsWith("$$") && currDollarCount === 1 ) { mergedBlocks[mergedBlocks.length - 1] = previousBlock + currentBlock; continue; } } mergedBlocks.push(currentBlock); } return mergedBlocks; }