victor HF Staff commited on
Commit
cedc9cf
·
1 Parent(s): 234c161

Add incomplete markdown parser for streaming support

Browse files

Introduces parseIncompleteMarkdown to preprocess and complete partial markdown tokens, improving smooth streaming and rendering of incomplete markdown. Updates processTokens and processTokensSync in marked.ts to use this preprocessing step before lexing content.

src/lib/utils/marked.ts CHANGED
@@ -8,6 +8,7 @@ type SimpleSource = {
8
  link: string;
9
  };
10
  import hljs from "highlight.js";
 
11
 
12
  interface katexBlockToken extends Tokens.Generic {
13
  type: "katexBlock";
@@ -199,8 +200,11 @@ type TextToken = {
199
  };
200
 
201
  export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> {
 
 
 
202
  const marked = createMarkedInstance(sources);
203
- const tokens = marked.lexer(content);
204
 
205
  const processedTokens = await Promise.all(
206
  tokens.map(async (token) => {
@@ -225,8 +229,11 @@ export async function processTokens(content: string, sources: SimpleSource[]): P
225
  }
226
 
227
  export function processTokensSync(content: string, sources: SimpleSource[]): Token[] {
 
 
 
228
  const marked = createMarkedInstance(sources);
229
- const tokens = marked.lexer(content);
230
  return tokens.map((token) => {
231
  if (token.type === "code") {
232
  return {
 
8
  link: string;
9
  };
10
  import hljs from "highlight.js";
11
+ import { parseIncompleteMarkdown } from "./parseIncompleteMarkdown";
12
 
13
  interface katexBlockToken extends Tokens.Generic {
14
  type: "katexBlock";
 
200
  };
201
 
202
  export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> {
203
+ // Apply incomplete markdown preprocessing for smooth streaming
204
+ const processedContent = parseIncompleteMarkdown(content);
205
+
206
  const marked = createMarkedInstance(sources);
207
+ const tokens = marked.lexer(processedContent);
208
 
209
  const processedTokens = await Promise.all(
210
  tokens.map(async (token) => {
 
229
  }
230
 
231
  export function processTokensSync(content: string, sources: SimpleSource[]): Token[] {
232
+ // Apply incomplete markdown preprocessing for smooth streaming
233
+ const processedContent = parseIncompleteMarkdown(content);
234
+
235
  const marked = createMarkedInstance(sources);
236
+ const tokens = marked.lexer(processedContent);
237
  return tokens.map((token) => {
238
  if (token.type === "code") {
239
  return {
src/lib/utils/parseIncompleteMarkdown.ts ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2023 Vercel, Inc.
3
+ * Source: https://github.com/vercel/streamdown/blob/main/packages/streamdown/lib/parse-incomplete-markdown.ts
4
+ */
5
+
6
+ const linkImagePattern = /(!?\[)([^\]]*?)$/;
7
+ const boldPattern = /(\*\*)([^*]*?)$/;
8
+ const italicPattern = /(__)([^_]*?)$/;
9
+ const boldItalicPattern = /(\*\*\*)([^*]*?)$/;
10
+ const singleAsteriskPattern = /(\*)([^*]*?)$/;
11
+ const singleUnderscorePattern = /(_)([^_]*?)$/;
12
+ const inlineCodePattern = /(`)([^`]*?)$/;
13
+ const strikethroughPattern = /(~~)([^~]*?)$/;
14
+
15
+ // Helper function to check if we have a complete code block
16
+ const hasCompleteCodeBlock = (text: string): boolean => {
17
+ const tripleBackticks = (text.match(/```/g) || []).length;
18
+ return (
19
+ tripleBackticks > 0 && tripleBackticks % 2 === 0 && text.includes("\n")
20
+ );
21
+ };
22
+
23
+ // Handles incomplete links and images by preserving them with a special marker
24
+ const handleIncompleteLinksAndImages = (text: string): string => {
25
+ // First check for incomplete URLs: [text](partial-url or ![text](partial-url without closing )
26
+ // Pattern: !?[text](url-without-closing-paren at end of string
27
+ const incompleteLinkUrlPattern = /(!?)\[([^\]]+)\]\(([^)]+)$/;
28
+ const incompleteLinkUrlMatch = text.match(incompleteLinkUrlPattern);
29
+
30
+ if (incompleteLinkUrlMatch) {
31
+ const isImage = incompleteLinkUrlMatch[1] === "!";
32
+ const linkText = incompleteLinkUrlMatch[2];
33
+ const partialUrl = incompleteLinkUrlMatch[3];
34
+
35
+ // Find the start position of this link/image pattern
36
+ const matchStart = text.lastIndexOf(
37
+ `${isImage ? "!" : ""}[${linkText}](${partialUrl}`
38
+ );
39
+ const beforeLink = text.substring(0, matchStart);
40
+
41
+ if (isImage) {
42
+ // For images with incomplete URLs, remove them entirely
43
+ return beforeLink;
44
+ }
45
+
46
+ // For links with incomplete URLs, replace the URL with placeholder and close it
47
+ return `${beforeLink}[${linkText}](streamdown:incomplete-link)`;
48
+ }
49
+
50
+ // Then check for incomplete link text: [partial-text without closing ]
51
+ const linkMatch = text.match(linkImagePattern);
52
+
53
+ if (linkMatch) {
54
+ const isImage = linkMatch[1].startsWith("!");
55
+
56
+ // For images, we still remove them as they can't show skeleton
57
+ if (isImage) {
58
+ const startIndex = text.lastIndexOf(linkMatch[1]);
59
+ return text.substring(0, startIndex);
60
+ }
61
+
62
+ // For links, preserve the text and close the link with a
63
+ // special placeholder URL that indicates it's incomplete
64
+ return `${text}](streamdown:incomplete-link)`;
65
+ }
66
+
67
+ return text;
68
+ };
69
+
70
+ // Completes incomplete bold formatting (**)
71
+ const handleIncompleteBold = (text: string): string => {
72
+ // Don't process if inside a complete code block
73
+ if (hasCompleteCodeBlock(text)) {
74
+ return text;
75
+ }
76
+
77
+ const boldMatch = text.match(boldPattern);
78
+
79
+ if (boldMatch) {
80
+ // Don't close if there's no meaningful content after the opening markers
81
+ // boldMatch[2] contains the content after **
82
+ // Check if content is only whitespace or other emphasis markers
83
+ const contentAfterMarker = boldMatch[2];
84
+ if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) {
85
+ return text;
86
+ }
87
+
88
+ // Check if the bold marker is in a list item context
89
+ // Find the position of the matched bold marker
90
+ const markerIndex = text.lastIndexOf(boldMatch[1]);
91
+ const beforeMarker = text.substring(0, markerIndex);
92
+ const lastNewlineBeforeMarker = beforeMarker.lastIndexOf("\n");
93
+ const lineStart =
94
+ lastNewlineBeforeMarker === -1 ? 0 : lastNewlineBeforeMarker + 1;
95
+ const lineBeforeMarker = text.substring(lineStart, markerIndex);
96
+
97
+ // Check if this line is a list item with just the bold marker
98
+ if (/^[\s]*[-*+][\s]+$/.test(lineBeforeMarker)) {
99
+ // This is a list item with just emphasis markers
100
+ // Check if content after marker spans multiple lines
101
+ const hasNewlineInContent = contentAfterMarker.includes("\n");
102
+ if (hasNewlineInContent) {
103
+ // Don't complete if the content spans to another line
104
+ return text;
105
+ }
106
+ }
107
+
108
+ const asteriskPairs = (text.match(/\*\*/g) || []).length;
109
+ if (asteriskPairs % 2 === 1) {
110
+ return `${text}**`;
111
+ }
112
+ }
113
+
114
+ return text;
115
+ };
116
+
117
+ // Completes incomplete italic formatting with double underscores (__)
118
+ const handleIncompleteDoubleUnderscoreItalic = (text: string): string => {
119
+ const italicMatch = text.match(italicPattern);
120
+
121
+ if (italicMatch) {
122
+ // Don't close if there's no meaningful content after the opening markers
123
+ // italicMatch[2] contains the content after __
124
+ // Check if content is only whitespace or other emphasis markers
125
+ const contentAfterMarker = italicMatch[2];
126
+ if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) {
127
+ return text;
128
+ }
129
+
130
+ // Check if the underscore marker is in a list item context
131
+ // Find the position of the matched underscore marker
132
+ const markerIndex = text.lastIndexOf(italicMatch[1]);
133
+ const beforeMarker = text.substring(0, markerIndex);
134
+ const lastNewlineBeforeMarker = beforeMarker.lastIndexOf("\n");
135
+ const lineStart =
136
+ lastNewlineBeforeMarker === -1 ? 0 : lastNewlineBeforeMarker + 1;
137
+ const lineBeforeMarker = text.substring(lineStart, markerIndex);
138
+
139
+ // Check if this line is a list item with just the underscore marker
140
+ if (/^[\s]*[-*+][\s]+$/.test(lineBeforeMarker)) {
141
+ // This is a list item with just emphasis markers
142
+ // Check if content after marker spans multiple lines
143
+ const hasNewlineInContent = contentAfterMarker.includes("\n");
144
+ if (hasNewlineInContent) {
145
+ // Don't complete if the content spans to another line
146
+ return text;
147
+ }
148
+ }
149
+
150
+ const underscorePairs = (text.match(/__/g) || []).length;
151
+ if (underscorePairs % 2 === 1) {
152
+ return `${text}__`;
153
+ }
154
+ }
155
+
156
+ return text;
157
+ };
158
+
159
+ // Counts single asterisks that are not part of double asterisks, not escaped, and not list markers
160
+ const countSingleAsterisks = (text: string): number => {
161
+ return text.split("").reduce((acc, char, index) => {
162
+ if (char === "*") {
163
+ const prevChar = text[index - 1];
164
+ const nextChar = text[index + 1];
165
+ // Skip if escaped with backslash
166
+ if (prevChar === "\\") {
167
+ return acc;
168
+ }
169
+ // Check if this is a list marker (asterisk at start of line followed by space)
170
+ // Look backwards to find the start of the current line
171
+ let lineStartIndex = index;
172
+ for (let i = index - 1; i >= 0; i--) {
173
+ if (text[i] === "\n") {
174
+ lineStartIndex = i + 1;
175
+ break;
176
+ }
177
+ if (i === 0) {
178
+ lineStartIndex = 0;
179
+ break;
180
+ }
181
+ }
182
+ // Check if this asterisk is at the beginning of a line (with optional whitespace)
183
+ const beforeAsterisk = text.substring(lineStartIndex, index);
184
+ if (
185
+ beforeAsterisk.trim() === "" &&
186
+ (nextChar === " " || nextChar === "\t")
187
+ ) {
188
+ // This is likely a list marker, don't count it
189
+ return acc;
190
+ }
191
+ if (prevChar !== "*" && nextChar !== "*") {
192
+ return acc + 1;
193
+ }
194
+ }
195
+ return acc;
196
+ }, 0);
197
+ };
198
+
199
+ // Completes incomplete italic formatting with single asterisks (*)
200
+ const handleIncompleteSingleAsteriskItalic = (text: string): string => {
201
+ // Don't process if inside a complete code block
202
+ if (hasCompleteCodeBlock(text)) {
203
+ return text;
204
+ }
205
+
206
+ const singleAsteriskMatch = text.match(singleAsteriskPattern);
207
+
208
+ if (singleAsteriskMatch) {
209
+ // Find the first single asterisk position (not part of **)
210
+ let firstSingleAsteriskIndex = -1;
211
+ for (let i = 0; i < text.length; i++) {
212
+ if (text[i] === "*" && text[i - 1] !== "*" && text[i + 1] !== "*") {
213
+ firstSingleAsteriskIndex = i;
214
+ break;
215
+ }
216
+ }
217
+
218
+ if (firstSingleAsteriskIndex === -1) {
219
+ return text;
220
+ }
221
+
222
+ // Get content after the first single asterisk
223
+ const contentAfterFirstAsterisk = text.substring(
224
+ firstSingleAsteriskIndex + 1
225
+ );
226
+
227
+ // Check if there's meaningful content after the asterisk
228
+ // Don't close if content is only whitespace or emphasis markers
229
+ if (
230
+ !contentAfterFirstAsterisk ||
231
+ /^[\s_~*`]*$/.test(contentAfterFirstAsterisk)
232
+ ) {
233
+ return text;
234
+ }
235
+
236
+ const singleAsterisks = countSingleAsterisks(text);
237
+ if (singleAsterisks % 2 === 1) {
238
+ return `${text}*`;
239
+ }
240
+ }
241
+
242
+ return text;
243
+ };
244
+
245
+ // Check if a position is within a math block (between $ or $$)
246
+ const isWithinMathBlock = (text: string, position: number): boolean => {
247
+ // Count dollar signs before this position
248
+ let inInlineMath = false;
249
+ let inBlockMath = false;
250
+
251
+ for (let i = 0; i < text.length && i < position; i++) {
252
+ // Skip escaped dollar signs
253
+ if (text[i] === "\\" && text[i + 1] === "$") {
254
+ i++; // Skip the next character
255
+ continue;
256
+ }
257
+
258
+ if (text[i] === "$") {
259
+ // Check for block math ($$)
260
+ if (text[i + 1] === "$") {
261
+ inBlockMath = !inBlockMath;
262
+ i++; // Skip the second $
263
+ inInlineMath = false; // Block math takes precedence
264
+ } else if (!inBlockMath) {
265
+ // Only toggle inline math if not in block math
266
+ inInlineMath = !inInlineMath;
267
+ }
268
+ }
269
+ }
270
+
271
+ return inInlineMath || inBlockMath;
272
+ };
273
+
274
+ // Counts single underscores that are not part of double underscores, not escaped, and not in math blocks
275
+ const countSingleUnderscores = (text: string): number => {
276
+ return text.split("").reduce((acc, char, index) => {
277
+ if (char === "_") {
278
+ const prevChar = text[index - 1];
279
+ const nextChar = text[index + 1];
280
+ // Skip if escaped with backslash
281
+ if (prevChar === "\\") {
282
+ return acc;
283
+ }
284
+ // Skip if within math block
285
+ if (isWithinMathBlock(text, index)) {
286
+ return acc;
287
+ }
288
+ // Skip if underscore is word-internal (between word characters)
289
+ if (
290
+ prevChar &&
291
+ nextChar &&
292
+ /[\p{L}\p{N}_]/u.test(prevChar) &&
293
+ /[\p{L}\p{N}_]/u.test(nextChar)
294
+ ) {
295
+ return acc;
296
+ }
297
+ if (prevChar !== "_" && nextChar !== "_") {
298
+ return acc + 1;
299
+ }
300
+ }
301
+ return acc;
302
+ }, 0);
303
+ };
304
+
305
+ // Completes incomplete italic formatting with single underscores (_)
306
+ const handleIncompleteSingleUnderscoreItalic = (text: string): string => {
307
+ // Don't process if inside a complete code block
308
+ if (hasCompleteCodeBlock(text)) {
309
+ return text;
310
+ }
311
+
312
+ const singleUnderscoreMatch = text.match(singleUnderscorePattern);
313
+
314
+ if (singleUnderscoreMatch) {
315
+ // Find the first single underscore position (not part of __ and not word-internal)
316
+ let firstSingleUnderscoreIndex = -1;
317
+ for (let i = 0; i < text.length; i++) {
318
+ if (
319
+ text[i] === "_" &&
320
+ text[i - 1] !== "_" &&
321
+ text[i + 1] !== "_" &&
322
+ text[i - 1] !== "\\" &&
323
+ !isWithinMathBlock(text, i)
324
+ ) {
325
+ // Check if underscore is word-internal (between word characters)
326
+ const prevChar = i > 0 ? text[i - 1] : "";
327
+ const nextChar = i < text.length - 1 ? text[i + 1] : "";
328
+ if (
329
+ prevChar &&
330
+ nextChar &&
331
+ /[\p{L}\p{N}_]/u.test(prevChar) &&
332
+ /[\p{L}\p{N}_]/u.test(nextChar)
333
+ ) {
334
+ continue;
335
+ }
336
+
337
+ firstSingleUnderscoreIndex = i;
338
+ break;
339
+ }
340
+ }
341
+
342
+ if (firstSingleUnderscoreIndex === -1) {
343
+ return text;
344
+ }
345
+
346
+ // Get content after the first single underscore
347
+ const contentAfterFirstUnderscore = text.substring(
348
+ firstSingleUnderscoreIndex + 1
349
+ );
350
+
351
+ // Check if there's meaningful content after the underscore
352
+ // Don't close if content is only whitespace or emphasis markers
353
+ if (
354
+ !contentAfterFirstUnderscore ||
355
+ /^[\s_~*`]*$/.test(contentAfterFirstUnderscore)
356
+ ) {
357
+ return text;
358
+ }
359
+
360
+ const singleUnderscores = countSingleUnderscores(text);
361
+ if (singleUnderscores % 2 === 1) {
362
+ // If text ends with newline(s), insert underscore before them
363
+ const trailingNewlineMatch = text.match(/\n+$/);
364
+ if (trailingNewlineMatch) {
365
+ const textBeforeNewlines = text.slice(
366
+ 0,
367
+ -trailingNewlineMatch[0].length
368
+ );
369
+ return `${textBeforeNewlines}_${trailingNewlineMatch[0]}`;
370
+ }
371
+ return `${text}_`;
372
+ }
373
+ }
374
+
375
+ return text;
376
+ };
377
+
378
+ // Checks if a backtick at position i is part of a triple backtick sequence
379
+ const isPartOfTripleBacktick = (text: string, i: number): boolean => {
380
+ const isTripleStart = text.substring(i, i + 3) === "```";
381
+ const isTripleMiddle = i > 0 && text.substring(i - 1, i + 2) === "```";
382
+ const isTripleEnd = i > 1 && text.substring(i - 2, i + 1) === "```";
383
+
384
+ return isTripleStart || isTripleMiddle || isTripleEnd;
385
+ };
386
+
387
+ // Counts single backticks that are not part of triple backticks
388
+ const countSingleBackticks = (text: string): number => {
389
+ let count = 0;
390
+ for (let i = 0; i < text.length; i++) {
391
+ if (text[i] === "`" && !isPartOfTripleBacktick(text, i)) {
392
+ count++;
393
+ }
394
+ }
395
+ return count;
396
+ };
397
+
398
+ // Completes incomplete inline code formatting (`)
399
+ // Avoids completing if inside an incomplete code block
400
+ const handleIncompleteInlineCode = (text: string): string => {
401
+ // Check if we have inline triple backticks (starts with ``` and should end with ```)
402
+ // This pattern should ONLY match truly inline code (no newlines)
403
+ // Examples: ```code``` or ```python code```
404
+ const inlineTripleBacktickMatch = text.match(/^```[^`\n]*```?$/);
405
+ if (inlineTripleBacktickMatch && !text.includes("\n")) {
406
+ // Check if it ends with exactly 2 backticks (incomplete)
407
+ if (text.endsWith("``") && !text.endsWith("```")) {
408
+ return `${text}\``;
409
+ }
410
+ // Already complete inline triple backticks
411
+ return text;
412
+ }
413
+
414
+ // Check if we're inside a multi-line code block (complete or incomplete)
415
+ const allTripleBackticks = (text.match(/```/g) || []).length;
416
+ const insideIncompleteCodeBlock = allTripleBackticks % 2 === 1;
417
+
418
+ // Don't modify text if we have complete multi-line code blocks (even pairs of ```)
419
+ if (
420
+ allTripleBackticks > 0 &&
421
+ allTripleBackticks % 2 === 0 &&
422
+ text.includes("\n")
423
+ ) {
424
+ // We have complete multi-line code blocks, don't add any backticks
425
+ return text;
426
+ }
427
+
428
+ // Special case: if text ends with ```\n (triple backticks followed by newline)
429
+ // This is actually a complete code block, not incomplete
430
+ if (text.endsWith("```\n") || text.endsWith("```")) {
431
+ // Count all triple backticks - if even, it's complete
432
+ if (allTripleBackticks % 2 === 0) {
433
+ return text;
434
+ }
435
+ }
436
+
437
+ const inlineCodeMatch = text.match(inlineCodePattern);
438
+
439
+ if (inlineCodeMatch && !insideIncompleteCodeBlock) {
440
+ // Don't close if there's no meaningful content after the opening marker
441
+ // inlineCodeMatch[2] contains the content after `
442
+ // Check if content is only whitespace or other emphasis markers
443
+ const contentAfterMarker = inlineCodeMatch[2];
444
+ if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) {
445
+ return text;
446
+ }
447
+
448
+ const singleBacktickCount = countSingleBackticks(text);
449
+ if (singleBacktickCount % 2 === 1) {
450
+ return `${text}\``;
451
+ }
452
+ }
453
+
454
+ return text;
455
+ };
456
+
457
+ // Completes incomplete strikethrough formatting (~~)
458
+ const handleIncompleteStrikethrough = (text: string): string => {
459
+ const strikethroughMatch = text.match(strikethroughPattern);
460
+
461
+ if (strikethroughMatch) {
462
+ // Don't close if there's no meaningful content after the opening markers
463
+ // strikethroughMatch[2] contains the content after ~~
464
+ // Check if content is only whitespace or other emphasis markers
465
+ const contentAfterMarker = strikethroughMatch[2];
466
+ if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) {
467
+ return text;
468
+ }
469
+
470
+ const tildePairs = (text.match(/~~/g) || []).length;
471
+ if (tildePairs % 2 === 1) {
472
+ return `${text}~~`;
473
+ }
474
+ }
475
+
476
+ return text;
477
+ };
478
+
479
+ // Counts single dollar signs that are not part of double dollar signs and not escaped
480
+ const _countSingleDollarSigns = (text: string): number => {
481
+ return text.split("").reduce((acc, char, index) => {
482
+ if (char === "$") {
483
+ const prevChar = text[index - 1];
484
+ const nextChar = text[index + 1];
485
+ // Skip if escaped with backslash
486
+ if (prevChar === "\\") {
487
+ return acc;
488
+ }
489
+ if (prevChar !== "$" && nextChar !== "$") {
490
+ return acc + 1;
491
+ }
492
+ }
493
+ return acc;
494
+ }, 0);
495
+ };
496
+
497
+ // Completes incomplete block KaTeX formatting ($$)
498
+ const handleIncompleteBlockKatex = (text: string): string => {
499
+ // Count all $$ pairs in the text
500
+ const dollarPairs = (text.match(/\$\$/g) || []).length;
501
+
502
+ // If we have an even number of $$, the block is complete
503
+ if (dollarPairs % 2 === 0) {
504
+ return text;
505
+ }
506
+
507
+ // If we have an odd number, add closing $$
508
+ // Check if this looks like a multi-line math block (contains newlines after opening $$)
509
+ const firstDollarIndex = text.indexOf("$$");
510
+ const hasNewlineAfterStart =
511
+ firstDollarIndex !== -1 && text.indexOf("\n", firstDollarIndex) !== -1;
512
+
513
+ // For multi-line blocks, add newline before closing $$ if not present
514
+ if (hasNewlineAfterStart && !text.endsWith("\n")) {
515
+ return `${text}\n$$`;
516
+ }
517
+
518
+ // For inline blocks or when already ending with newline, just add $$
519
+ return `${text}$$`;
520
+ };
521
+
522
+ // Counts triple asterisks that are not part of quadruple or more asterisks
523
+ const countTripleAsterisks = (text: string): number => {
524
+ let count = 0;
525
+ const matches = text.match(/\*+/g) || [];
526
+
527
+ for (const match of matches) {
528
+ // Count how many complete triple asterisks are in this sequence
529
+ const asteriskCount = match.length;
530
+ if (asteriskCount >= 3) {
531
+ // Each group of exactly 3 asterisks counts as one triple asterisk marker
532
+ count += Math.floor(asteriskCount / 3);
533
+ }
534
+ }
535
+
536
+ return count;
537
+ };
538
+
539
+ // Completes incomplete bold-italic formatting (***)
540
+ const handleIncompleteBoldItalic = (text: string): string => {
541
+ // Don't process if inside a complete code block
542
+ if (hasCompleteCodeBlock(text)) {
543
+ return text;
544
+ }
545
+
546
+ // Don't process if text is only asterisks and has 4 or more consecutive asterisks
547
+ // This prevents cases like **** from being treated as incomplete ***
548
+ if (/^\*{4,}$/.test(text)) {
549
+ return text;
550
+ }
551
+
552
+ const boldItalicMatch = text.match(boldItalicPattern);
553
+
554
+ if (boldItalicMatch) {
555
+ // Don't close if there's no meaningful content after the opening markers
556
+ // boldItalicMatch[2] contains the content after ***
557
+ // Check if content is only whitespace or other emphasis markers
558
+ const contentAfterMarker = boldItalicMatch[2];
559
+ if (!contentAfterMarker || /^[\s_~*`]*$/.test(contentAfterMarker)) {
560
+ return text;
561
+ }
562
+
563
+ const tripleAsteriskCount = countTripleAsterisks(text);
564
+ if (tripleAsteriskCount % 2 === 1) {
565
+ return `${text}***`;
566
+ }
567
+ }
568
+
569
+ return text;
570
+ };
571
+
572
+ // Parses markdown text and removes incomplete tokens to prevent partial rendering
573
+ export const parseIncompleteMarkdown = (text: string): string => {
574
+ if (!text || typeof text !== "string") {
575
+ return text;
576
+ }
577
+
578
+ let result = text;
579
+
580
+ // Handle incomplete links and images first
581
+ const processedResult = handleIncompleteLinksAndImages(result);
582
+
583
+ // If we added an incomplete link marker, don't process other formatting
584
+ // as the content inside the link should be preserved as-is
585
+ if (processedResult.endsWith("](streamdown:incomplete-link)")) {
586
+ return processedResult;
587
+ }
588
+
589
+ result = processedResult;
590
+
591
+ // Handle various formatting completions
592
+ // Handle triple asterisks first (most specific)
593
+ result = handleIncompleteBoldItalic(result);
594
+ result = handleIncompleteBold(result);
595
+ result = handleIncompleteDoubleUnderscoreItalic(result);
596
+ result = handleIncompleteSingleAsteriskItalic(result);
597
+ result = handleIncompleteSingleUnderscoreItalic(result);
598
+ result = handleIncompleteInlineCode(result);
599
+ result = handleIncompleteStrikethrough(result);
600
+
601
+ // Handle KaTeX formatting (only block math with $$)
602
+ result = handleIncompleteBlockKatex(result);
603
+ // Note: We don't handle inline KaTeX with single $ as they're likely currency symbols
604
+
605
+ return result;
606
+ };