/* * Copyright 2023 Vercel, Inc. * Source: https://github.com/vercel/streamdown/blob/main/packages/streamdown/lib/parse-incomplete-markdown.ts */ const linkImagePattern = /(!?\[)([^\]]*?)$/; const boldPattern = /(\*\*)([^*]*?)$/; const italicPattern = /(__)([^_]*?)$/; const boldItalicPattern = /(\*\*\*)([^*]*?)$/; const singleAsteriskPattern = /(\*)([^*]*?)$/; const singleUnderscorePattern = /(_)([^_]*?)$/; const inlineCodePattern = /(`)([^`]*?)$/; const strikethroughPattern = /(~~)([^~]*?)$/; // Helper function to check if we have a complete code block const hasCompleteCodeBlock = (text: string): boolean => { const tripleBackticks = (text.match(/```/g) || []).length; return tripleBackticks > 0 && tripleBackticks % 2 === 0 && text.includes("\n"); }; // Returns the start index of the currently open fenced code block, or -1 if none const getOpenCodeFenceIndex = (text: string): number => { let openFenceIndex = -1; let inFence = false; for (const match of text.matchAll(/```/g)) { const index = match.index ?? -1; if (index === -1) { continue; } if (inFence) { // This fence closes the current block inFence = false; openFenceIndex = -1; } else { // This fence opens a new block inFence = true; openFenceIndex = index; } } return openFenceIndex; }; // Handles incomplete links and images by preserving them with a special marker const handleIncompleteLinksAndImages = (text: string): string => { // First check for incomplete URLs: [text](partial-url or ![text](partial-url without closing ) // Pattern: !?[text](url-without-closing-paren at end of string const incompleteLinkUrlPattern = /(!?)\[([^\]]+)\]\(([^)]+)$/; const incompleteLinkUrlMatch = text.match(incompleteLinkUrlPattern); if (incompleteLinkUrlMatch) { const isImage = incompleteLinkUrlMatch[1] === "!"; const linkText = incompleteLinkUrlMatch[2]; const partialUrl = incompleteLinkUrlMatch[3]; // Find the start position of this link/image pattern const matchStart = text.lastIndexOf(`${isImage ? "!" : ""}[${linkText}](${partialUrl}`); const beforeLink = text.substring(0, matchStart); if (isImage) { // For images with incomplete URLs, remove them entirely return beforeLink; } // For links with incomplete URLs, replace the URL with placeholder and close it return `${beforeLink}[${linkText}](streamdown:incomplete-link)`; } // Then check for incomplete link text: [partial-text without closing ] const linkMatch = text.match(linkImagePattern); if (linkMatch) { const isImage = linkMatch[1].startsWith("!"); // For images, we still remove them as they can't show skeleton if (isImage) { const startIndex = text.lastIndexOf(linkMatch[1]); return text.substring(0, startIndex); } // For links, preserve the text and close the link with a // special placeholder URL that indicates it's incomplete return `${text}](streamdown:incomplete-link)`; } return text; }; // Completes incomplete bold formatting (**) const handleIncompleteBold = (text: string): string => { // Don't process if inside a complete code block if (hasCompleteCodeBlock(text)) { return text; } const boldMatch = text.match(boldPattern); if (boldMatch) { // Don't close if there's no meaningful content after the opening markers // boldMatch[2] contains the content after ** // Check if content is only whitespace or other emphasis markers const contentAfterMarker = boldMatch[2]; if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) { return text; } // Check if the bold marker is in a list item context // Find the position of the matched bold marker const markerIndex = text.lastIndexOf(boldMatch[1]); // Don't process if the marker is inside an incomplete code block const openFenceIndex = getOpenCodeFenceIndex(text); if (openFenceIndex !== -1 && markerIndex > openFenceIndex) { return text; } const beforeMarker = text.substring(0, markerIndex); const lastNewlineBeforeMarker = beforeMarker.lastIndexOf("\n"); const lineStart = lastNewlineBeforeMarker === -1 ? 0 : lastNewlineBeforeMarker + 1; const lineBeforeMarker = text.substring(lineStart, markerIndex); // Check if this line is a list item with just the bold marker if (/^[\s]*[-*+][\s]+$/.test(lineBeforeMarker)) { // This is a list item with just emphasis markers // Check if content after marker spans multiple lines const hasNewlineInContent = contentAfterMarker.includes("\n"); if (hasNewlineInContent) { // Don't complete if the content spans to another line return text; } } const asteriskPairs = (text.match(/\*\*/g) || []).length; if (asteriskPairs % 2 === 1) { return `${text}**`; } } return text; }; // Completes incomplete italic formatting with double underscores (__) const handleIncompleteDoubleUnderscoreItalic = (text: string): string => { // Don't process if inside a complete code block if (hasCompleteCodeBlock(text)) { return text; } const italicMatch = text.match(italicPattern); if (italicMatch) { // Don't close if there's no meaningful content after the opening markers // italicMatch[2] contains the content after __ // Check if content is only whitespace or other emphasis markers const contentAfterMarker = italicMatch[2]; if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) { return text; } // Check if the underscore marker is in a list item context // Find the position of the matched underscore marker const markerIndex = text.lastIndexOf(italicMatch[1]); // Don't process if the marker is inside an incomplete code block const openFenceIndex = getOpenCodeFenceIndex(text); if (openFenceIndex !== -1 && markerIndex > openFenceIndex) { return text; } const beforeMarker = text.substring(0, markerIndex); const lastNewlineBeforeMarker = beforeMarker.lastIndexOf("\n"); const lineStart = lastNewlineBeforeMarker === -1 ? 0 : lastNewlineBeforeMarker + 1; const lineBeforeMarker = text.substring(lineStart, markerIndex); // Check if this line is a list item with just the underscore marker if (/^[\s]*[-*+][\s]+$/.test(lineBeforeMarker)) { // This is a list item with just emphasis markers // Check if content after marker spans multiple lines const hasNewlineInContent = contentAfterMarker.includes("\n"); if (hasNewlineInContent) { // Don't complete if the content spans to another line return text; } } const underscorePairs = (text.match(/__/g) || []).length; if (underscorePairs % 2 === 1) { return `${text}__`; } } return text; }; // Counts single asterisks that are not part of double asterisks, not escaped, and not list markers const countSingleAsterisks = (text: string): number => { return text.split("").reduce((acc, char, index) => { if (char === "*") { const prevChar = text[index - 1]; const nextChar = text[index + 1]; // Skip if escaped with backslash if (prevChar === "\\") { return acc; } // Check if this is a list marker (asterisk at start of line followed by space) // Look backwards to find the start of the current line let lineStartIndex = index; for (let i = index - 1; i >= 0; i--) { if (text[i] === "\n") { lineStartIndex = i + 1; break; } if (i === 0) { lineStartIndex = 0; break; } } // Check if this asterisk is at the beginning of a line (with optional whitespace) const beforeAsterisk = text.substring(lineStartIndex, index); if (beforeAsterisk.trim() === "" && (nextChar === " " || nextChar === "\t")) { // This is likely a list marker, don't count it return acc; } if (prevChar !== "*" && nextChar !== "*") { return acc + 1; } } return acc; }, 0); }; // Completes incomplete italic formatting with single asterisks (*) const handleIncompleteSingleAsteriskItalic = (text: string): string => { // Don't process if inside a complete code block if (hasCompleteCodeBlock(text)) { return text; } const singleAsteriskMatch = text.match(singleAsteriskPattern); if (singleAsteriskMatch) { // Find the first single asterisk position (not part of **) let firstSingleAsteriskIndex = -1; for (let i = 0; i < text.length; i++) { if (text[i] === "*" && text[i - 1] !== "*" && text[i + 1] !== "*") { firstSingleAsteriskIndex = i; break; } } if (firstSingleAsteriskIndex === -1) { return text; } // Don't process if the marker is inside an incomplete code block const openFenceIndex = getOpenCodeFenceIndex(text); if (openFenceIndex !== -1 && firstSingleAsteriskIndex > openFenceIndex) { return text; } // Get content after the first single asterisk const contentAfterFirstAsterisk = text.substring(firstSingleAsteriskIndex + 1); // Check if there's meaningful content after the asterisk // Don't close if content is only whitespace or emphasis markers if (!contentAfterFirstAsterisk || /^[\s_~*`]*$/.test(contentAfterFirstAsterisk)) { return text; } const singleAsterisks = countSingleAsterisks(text); if (singleAsterisks % 2 === 1) { return `${text}*`; } } return text; }; // Check if a position is within a math block (between $ or $$) const isWithinMathBlock = (text: string, position: number): boolean => { // Count dollar signs before this position let inInlineMath = false; let inBlockMath = false; for (let i = 0; i < text.length && i < position; i++) { // Skip escaped dollar signs if (text[i] === "\\" && text[i + 1] === "$") { i++; // Skip the next character continue; } if (text[i] === "$") { // Check for block math ($$) if (text[i + 1] === "$") { inBlockMath = !inBlockMath; i++; // Skip the second $ inInlineMath = false; // Block math takes precedence } else if (!inBlockMath) { // Only toggle inline math if not in block math inInlineMath = !inInlineMath; } } } return inInlineMath || inBlockMath; }; // Counts single underscores that are not part of double underscores, not escaped, and not in math blocks const countSingleUnderscores = (text: string): number => { return text.split("").reduce((acc, char, index) => { if (char === "_") { const prevChar = text[index - 1]; const nextChar = text[index + 1]; // Skip if escaped with backslash if (prevChar === "\\") { return acc; } // Skip if within math block if (isWithinMathBlock(text, index)) { return acc; } // Skip if underscore is word-internal (between word characters) if ( prevChar && nextChar && /[\p{L}\p{N}_]/u.test(prevChar) && /[\p{L}\p{N}_]/u.test(nextChar) ) { return acc; } if (prevChar !== "_" && nextChar !== "_") { return acc + 1; } } return acc; }, 0); }; // Completes incomplete italic formatting with single underscores (_) const handleIncompleteSingleUnderscoreItalic = (text: string): string => { // Don't process if inside a complete code block if (hasCompleteCodeBlock(text)) { return text; } const singleUnderscoreMatch = text.match(singleUnderscorePattern); if (singleUnderscoreMatch) { // Find the first single underscore position (not part of __ and not word-internal) let firstSingleUnderscoreIndex = -1; for (let i = 0; i < text.length; i++) { if ( text[i] === "_" && text[i - 1] !== "_" && text[i + 1] !== "_" && text[i - 1] !== "\\" && !isWithinMathBlock(text, i) ) { // Check if underscore is word-internal (between word characters) const prevChar = i > 0 ? text[i - 1] : ""; const nextChar = i < text.length - 1 ? text[i + 1] : ""; if ( prevChar && nextChar && /[\p{L}\p{N}_]/u.test(prevChar) && /[\p{L}\p{N}_]/u.test(nextChar) ) { continue; } firstSingleUnderscoreIndex = i; break; } } if (firstSingleUnderscoreIndex === -1) { return text; } // Don't process if the marker is inside an incomplete code block const openFenceIndex = getOpenCodeFenceIndex(text); if (openFenceIndex !== -1 && firstSingleUnderscoreIndex > openFenceIndex) { return text; } // Get content after the first single underscore const contentAfterFirstUnderscore = text.substring(firstSingleUnderscoreIndex + 1); // Check if there's meaningful content after the underscore // Don't close if content is only whitespace or emphasis markers if (!contentAfterFirstUnderscore || /^[\s_~*`]*$/.test(contentAfterFirstUnderscore)) { return text; } const singleUnderscores = countSingleUnderscores(text); if (singleUnderscores % 2 === 1) { // If text ends with newline(s), insert underscore before them const trailingNewlineMatch = text.match(/\n+$/); if (trailingNewlineMatch) { const textBeforeNewlines = text.slice(0, -trailingNewlineMatch[0].length); return `${textBeforeNewlines}_${trailingNewlineMatch[0]}`; } return `${text}_`; } } return text; }; // Checks if a backtick at position i is part of a triple backtick sequence const isPartOfTripleBacktick = (text: string, i: number): boolean => { const isTripleStart = text.substring(i, i + 3) === "```"; const isTripleMiddle = i > 0 && text.substring(i - 1, i + 2) === "```"; const isTripleEnd = i > 1 && text.substring(i - 2, i + 1) === "```"; return isTripleStart || isTripleMiddle || isTripleEnd; }; // Counts single backticks that are not part of triple backticks const countSingleBackticks = (text: string): number => { let count = 0; for (let i = 0; i < text.length; i++) { if (text[i] === "`" && !isPartOfTripleBacktick(text, i)) { count++; } } return count; }; // Completes incomplete inline code formatting (`) // Avoids completing if inside an incomplete code block const handleIncompleteInlineCode = (text: string): string => { // Check if we have inline triple backticks (starts with ``` and should end with ```) // This pattern should ONLY match truly inline code (no newlines) // Examples: ```code``` or ```python code``` const inlineTripleBacktickMatch = text.match(/^```[^`\n]*```?$/); if (inlineTripleBacktickMatch && !text.includes("\n")) { // Check if it ends with exactly 2 backticks (incomplete) if (text.endsWith("``") && !text.endsWith("```")) { return `${text}\``; } // Already complete inline triple backticks return text; } // Check if we're inside a multi-line code block (complete or incomplete) const allTripleBackticks = (text.match(/```/g) || []).length; const insideIncompleteCodeBlock = allTripleBackticks % 2 === 1; // Don't modify text if we have complete multi-line code blocks (even pairs of ```) if (allTripleBackticks > 0 && allTripleBackticks % 2 === 0 && text.includes("\n")) { // We have complete multi-line code blocks, don't add any backticks return text; } // Special case: if text ends with ```\n (triple backticks followed by newline) // This is actually a complete code block, not incomplete if (text.endsWith("```\n") || text.endsWith("```")) { // Count all triple backticks - if even, it's complete if (allTripleBackticks % 2 === 0) { return text; } } const inlineCodeMatch = text.match(inlineCodePattern); if (inlineCodeMatch && !insideIncompleteCodeBlock) { // Don't close if there's no meaningful content after the opening marker // inlineCodeMatch[2] contains the content after ` // Check if content is only whitespace or other emphasis markers const contentAfterMarker = inlineCodeMatch[2]; if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) { return text; } const singleBacktickCount = countSingleBackticks(text); if (singleBacktickCount % 2 === 1) { return `${text}\``; } } return text; }; // Completes incomplete strikethrough formatting (~~) const handleIncompleteStrikethrough = (text: string): string => { const strikethroughMatch = text.match(strikethroughPattern); if (strikethroughMatch) { // Don't close if there's no meaningful content after the opening markers // strikethroughMatch[2] contains the content after ~~ // Check if content is only whitespace or other emphasis markers const contentAfterMarker = strikethroughMatch[2]; if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) { return text; } const tildePairs = (text.match(/~~/g) || []).length; if (tildePairs % 2 === 1) { return `${text}~~`; } } return text; }; // Counts single dollar signs that are not part of double dollar signs and not escaped // eslint-disable-next-line @typescript-eslint/no-unused-vars const _countSingleDollarSigns = (text: string): number => { return text.split("").reduce((acc, char, index) => { if (char === "$") { const prevChar = text[index - 1]; const nextChar = text[index + 1]; // Skip if escaped with backslash if (prevChar === "\\") { return acc; } if (prevChar !== "$" && nextChar !== "$") { return acc + 1; } } return acc; }, 0); }; // Completes incomplete block KaTeX formatting ($$) const handleIncompleteBlockKatex = (text: string): string => { // Count all $$ pairs in the text const dollarPairs = (text.match(/\$\$/g) || []).length; // If we have an even number of $$, the block is complete if (dollarPairs % 2 === 0) { return text; } // If we have an odd number, add closing $$ // Check if this looks like a multi-line math block (contains newlines after opening $$) const firstDollarIndex = text.indexOf("$$"); const hasNewlineAfterStart = firstDollarIndex !== -1 && text.indexOf("\n", firstDollarIndex) !== -1; // For multi-line blocks, add newline before closing $$ if not present if (hasNewlineAfterStart && !text.endsWith("\n")) { return `${text}\n$$`; } // For inline blocks or when already ending with newline, just add $$ return `${text}$$`; }; // Counts triple asterisks that are not part of quadruple or more asterisks const countTripleAsterisks = (text: string): number => { let count = 0; const matches = text.match(/\*+/g) || []; for (const match of matches) { // Count how many complete triple asterisks are in this sequence const asteriskCount = match.length; if (asteriskCount >= 3) { // Each group of exactly 3 asterisks counts as one triple asterisk marker count += Math.floor(asteriskCount / 3); } } return count; }; // Completes incomplete bold-italic formatting (***) const handleIncompleteBoldItalic = (text: string): string => { // Don't process if inside a complete code block if (hasCompleteCodeBlock(text)) { return text; } // Don't process if text is only asterisks and has 4 or more consecutive asterisks // This prevents cases like **** from being treated as incomplete *** if (/^\*{4,}$/.test(text)) { return text; } const boldItalicMatch = text.match(boldItalicPattern); if (boldItalicMatch) { // Don't close if there's no meaningful content after the opening markers // boldItalicMatch[2] contains the content after *** // Check if content is only whitespace or other emphasis markers const contentAfterMarker = boldItalicMatch[2]; if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) { return text; } // Find the position of the matched bold-italic marker const markerIndex = text.lastIndexOf(boldItalicMatch[1]); // Don't process if the marker is inside an incomplete code block const openFenceIndex = getOpenCodeFenceIndex(text); if (openFenceIndex !== -1 && markerIndex > openFenceIndex) { return text; } const tripleAsteriskCount = countTripleAsterisks(text); if (tripleAsteriskCount % 2 === 1) { return `${text}***`; } } return text; }; // Parses markdown text and removes incomplete tokens to prevent partial rendering export const parseIncompleteMarkdown = (text: string): string => { if (!text || typeof text !== "string") { return text; } let result = text; // Handle incomplete links and images first const processedResult = handleIncompleteLinksAndImages(result); // If we added an incomplete link marker, don't process other formatting // as the content inside the link should be preserved as-is if (processedResult.endsWith("](streamdown:incomplete-link)")) { return processedResult; } result = processedResult; // Handle various formatting completions // Handle triple asterisks first (most specific) result = handleIncompleteBoldItalic(result); result = handleIncompleteBold(result); result = handleIncompleteDoubleUnderscoreItalic(result); result = handleIncompleteSingleAsteriskItalic(result); result = handleIncompleteSingleUnderscoreItalic(result); result = handleIncompleteInlineCode(result); result = handleIncompleteStrikethrough(result); // Handle KaTeX formatting (only block math with $$) result = handleIncompleteBlockKatex(result); // Note: We don't handle inline KaTeX with single $ as they're likely currency symbols return result; };