victor HF Staff commited on
Commit
ec5d85c
·
unverified ·
1 Parent(s): 6a98172

Markdown rendering update (#1989)

Browse files

* Enable markdown rendering via web worker and improve code highlighting

* LanguageFn typing

src/lib/components/chat/MarkdownRenderer.svelte CHANGED
@@ -1,11 +1,10 @@
1
  <script lang="ts">
2
  import { processBlocks, processBlocksSync, type BlockToken } from "$lib/utils/marked";
3
- // import MarkdownWorker from "$lib/workers/markdownWorker?worker";
4
  import MarkdownBlock from "./MarkdownBlock.svelte";
5
  import { browser } from "$app/environment";
6
 
7
- import DOMPurify from "isomorphic-dompurify";
8
- import { onMount } from "svelte";
9
  import { updateDebouncer } from "$lib/utils/updates";
10
 
11
  interface Props {
@@ -17,53 +16,51 @@
17
  let { content, sources = [], loading = false }: Props = $props();
18
 
19
  let blocks: BlockToken[] = $state(processBlocksSync(content, sources));
 
 
20
 
21
- async function processContent(
22
- content: string,
23
- sources: { title?: string; link: string }[]
24
- ): Promise<BlockToken[]> {
25
- // Note: Worker support for blocks can be added later if needed
26
- // For now, use direct processing which is still efficient due to block memoization
27
- return processBlocks(content, sources);
28
  }
29
 
30
  $effect(() => {
31
  if (!browser) {
32
  blocks = processBlocksSync(content, sources);
33
- } else {
34
- (async () => {
35
- updateDebouncer.startRender();
36
- blocks = await processContent(content, sources).then(async (processedBlocks) =>
37
- Promise.all(
38
- processedBlocks.map(async (block) => ({
39
- ...block,
40
- tokens: await Promise.all(
41
- block.tokens.map(async (token) => {
42
- if (token.type === "text") {
43
- token.html = DOMPurify.sanitize(await token.html);
44
- }
45
- return token;
46
- })
47
- ),
48
- }))
49
- )
50
- );
51
 
52
- updateDebouncer.endRender();
53
- })();
 
 
54
  }
 
 
 
 
 
 
 
55
  });
56
 
57
  onMount(() => {
58
- // todo: fix worker, seems to be transmitting a lot of data
59
- // worker = browser && window.Worker ? new MarkdownWorker() : null;
 
 
 
 
 
 
 
60
 
61
- DOMPurify.addHook("afterSanitizeAttributes", (node) => {
62
- if (node.tagName === "A") {
63
- node.setAttribute("target", "_blank");
64
- node.setAttribute("rel", "noreferrer");
65
- }
66
- });
67
  });
68
  </script>
69
 
 
1
  <script lang="ts">
2
  import { processBlocks, processBlocksSync, type BlockToken } from "$lib/utils/marked";
3
+ import MarkdownWorker from "$lib/workers/markdownWorker?worker";
4
  import MarkdownBlock from "./MarkdownBlock.svelte";
5
  import { browser } from "$app/environment";
6
 
7
+ import { onMount, onDestroy } from "svelte";
 
8
  import { updateDebouncer } from "$lib/utils/updates";
9
 
10
  interface Props {
 
16
  let { content, sources = [], loading = false }: Props = $props();
17
 
18
  let blocks: BlockToken[] = $state(processBlocksSync(content, sources));
19
+ let worker: Worker | null = null;
20
+ let latestRequestId = 0;
21
 
22
+ function handleBlocks(result: BlockToken[], requestId: number) {
23
+ if (requestId !== latestRequestId) return;
24
+ blocks = result;
25
+ updateDebouncer.endRender();
 
 
 
26
  }
27
 
28
  $effect(() => {
29
  if (!browser) {
30
  blocks = processBlocksSync(content, sources);
31
+ return;
32
+ }
33
+
34
+ const requestId = ++latestRequestId;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ if (worker) {
37
+ updateDebouncer.startRender();
38
+ worker.postMessage({ type: "process", content, sources, requestId });
39
+ return;
40
  }
41
+
42
+ (async () => {
43
+ updateDebouncer.startRender();
44
+ const processed = await processBlocks(content, sources);
45
+ // Only apply if this is still the latest request
46
+ handleBlocks(processed, requestId);
47
+ })();
48
  });
49
 
50
  onMount(() => {
51
+ if (typeof Worker !== "undefined") {
52
+ worker = new MarkdownWorker();
53
+ worker.onmessage = (event: MessageEvent) => {
54
+ const data = event.data as { type?: string; blocks?: BlockToken[]; requestId?: number };
55
+ if (data?.type !== "processed" || !data.blocks || data.requestId === undefined) return;
56
+ handleBlocks(data.blocks, data.requestId);
57
+ };
58
+ }
59
+ });
60
 
61
+ onDestroy(() => {
62
+ worker?.terminate();
63
+ worker = null;
 
 
 
64
  });
65
  </script>
66
 
src/lib/utils/marked.ts CHANGED
@@ -7,10 +7,55 @@ type SimpleSource = {
7
  title?: string;
8
  link: string;
9
  };
10
- import hljs from "highlight.js";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import { parseIncompleteMarkdown } from "./parseIncompleteMarkdown";
12
  import { parseMarkdownIntoBlocks } from "./parseBlocks";
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  interface katexBlockToken extends Tokens.Generic {
15
  type: "katexBlock";
16
  raw: string;
@@ -159,6 +204,27 @@ function addInlineCitations(md: string, webSearchSources: SimpleSource[] = []):
159
  });
160
  }
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  function createMarkedInstance(sources: SimpleSource[]): Marked {
163
  return new Marked({
164
  hooks: {
@@ -166,8 +232,12 @@ function createMarkedInstance(sources: SimpleSource[]): Marked {
166
  },
167
  extensions: [katexBlockExtension, katexInlineExtension],
168
  renderer: {
169
- link: (href, title, text) =>
170
- `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
 
 
 
 
171
  html: (html) => escapeHTML(html),
172
  },
173
  gfm: true,
@@ -200,6 +270,13 @@ type TextToken = {
200
  html: string | Promise<string>;
201
  };
202
 
 
 
 
 
 
 
 
203
  export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> {
204
  // Apply incomplete markdown preprocessing for smooth streaming
205
  const processedContent = parseIncompleteMarkdown(content);
@@ -213,7 +290,7 @@ export async function processTokens(content: string, sources: SimpleSource[]): P
213
  return {
214
  type: "code" as const,
215
  lang: token.lang,
216
- code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value,
217
  rawCode: token.text,
218
  isClosed: isFencedBlockClosed(token.raw ?? ""),
219
  };
@@ -240,7 +317,7 @@ export function processTokensSync(content: string, sources: SimpleSource[]): Tok
240
  return {
241
  type: "code" as const,
242
  lang: token.lang,
243
- code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value,
244
  rawCode: token.text,
245
  isClosed: isFencedBlockClosed(token.raw ?? ""),
246
  };
@@ -282,12 +359,18 @@ export async function processBlocks(
282
 
283
  return await Promise.all(
284
  blocks.map(async (blockContent, index) => {
 
 
 
 
285
  const tokens = await processTokens(blockContent, sources);
286
- return {
287
  id: `${index}-${hashString(blockContent)}`,
288
  content: blockContent,
289
  tokens,
290
  };
 
 
291
  })
292
  );
293
  }
@@ -299,11 +382,17 @@ export function processBlocksSync(content: string, sources: SimpleSource[] = [])
299
  const blocks = parseMarkdownIntoBlocks(content);
300
 
301
  return blocks.map((blockContent, index) => {
 
 
 
 
302
  const tokens = processTokensSync(blockContent, sources);
303
- return {
304
  id: `${index}-${hashString(blockContent)}`,
305
  content: blockContent,
306
  tokens,
307
  };
 
 
308
  });
309
  }
 
7
  title?: string;
8
  link: string;
9
  };
10
+ import hljs from "highlight.js/lib/core";
11
+ import type { LanguageFn } from "highlight.js";
12
+ import javascript from "highlight.js/lib/languages/javascript";
13
+ import typescript from "highlight.js/lib/languages/typescript";
14
+ import json from "highlight.js/lib/languages/json";
15
+ import bash from "highlight.js/lib/languages/bash";
16
+ import shell from "highlight.js/lib/languages/shell";
17
+ import python from "highlight.js/lib/languages/python";
18
+ import go from "highlight.js/lib/languages/go";
19
+ import rust from "highlight.js/lib/languages/rust";
20
+ import java from "highlight.js/lib/languages/java";
21
+ import csharp from "highlight.js/lib/languages/csharp";
22
+ import cpp from "highlight.js/lib/languages/cpp";
23
+ import cLang from "highlight.js/lib/languages/c";
24
+ import xml from "highlight.js/lib/languages/xml";
25
+ import css from "highlight.js/lib/languages/css";
26
+ import scss from "highlight.js/lib/languages/scss";
27
+ import markdownLang from "highlight.js/lib/languages/markdown";
28
+ import yaml from "highlight.js/lib/languages/yaml";
29
+ import sql from "highlight.js/lib/languages/sql";
30
+ import plaintext from "highlight.js/lib/languages/plaintext";
31
  import { parseIncompleteMarkdown } from "./parseIncompleteMarkdown";
32
  import { parseMarkdownIntoBlocks } from "./parseBlocks";
33
 
34
+ const bundledLanguages: [string, LanguageFn][] = [
35
+ ["javascript", javascript],
36
+ ["typescript", typescript],
37
+ ["json", json],
38
+ ["bash", bash],
39
+ ["shell", shell],
40
+ ["python", python],
41
+ ["go", go],
42
+ ["rust", rust],
43
+ ["java", java],
44
+ ["csharp", csharp],
45
+ ["cpp", cpp],
46
+ ["c", cLang],
47
+ ["xml", xml],
48
+ ["html", xml],
49
+ ["css", css],
50
+ ["scss", scss],
51
+ ["markdown", markdownLang],
52
+ ["yaml", yaml],
53
+ ["sql", sql],
54
+ ["plaintext", plaintext],
55
+ ];
56
+
57
+ bundledLanguages.forEach(([name, language]) => hljs.registerLanguage(name, language));
58
+
59
  interface katexBlockToken extends Tokens.Generic {
60
  type: "katexBlock";
61
  raw: string;
 
204
  });
205
  }
206
 
207
+ function sanitizeHref(href?: string | null): string | undefined {
208
+ if (!href) return undefined;
209
+ const trimmed = href.trim();
210
+ const lower = trimmed.toLowerCase();
211
+ if (lower.startsWith("javascript:") || lower.startsWith("data:text/html")) {
212
+ return undefined;
213
+ }
214
+ return trimmed.replace(/>$/, "");
215
+ }
216
+
217
+ function highlightCode(text: string, lang?: string): string {
218
+ if (lang && hljs.getLanguage(lang)) {
219
+ try {
220
+ return hljs.highlight(text, { language: lang, ignoreIllegals: true }).value;
221
+ } catch {
222
+ // fall through to auto-detect
223
+ }
224
+ }
225
+ return hljs.highlightAuto(text).value;
226
+ }
227
+
228
  function createMarkedInstance(sources: SimpleSource[]): Marked {
229
  return new Marked({
230
  hooks: {
 
232
  },
233
  extensions: [katexBlockExtension, katexInlineExtension],
234
  renderer: {
235
+ link: (href, title, text) => {
236
+ const safeHref = sanitizeHref(href);
237
+ return safeHref
238
+ ? `<a href="${safeHref}" target="_blank" rel="noreferrer">${text}</a>`
239
+ : `<span>${escapeHTML(text ?? "")}</span>`;
240
+ },
241
  html: (html) => escapeHTML(html),
242
  },
243
  gfm: true,
 
270
  html: string | Promise<string>;
271
  };
272
 
273
+ const blockCache = new Map<string, BlockToken>();
274
+
275
+ function cacheKey(index: number, blockContent: string, sources: SimpleSource[]) {
276
+ const sourceKey = sources.map((s) => s.link).join("|");
277
+ return `${index}-${hashString(blockContent)}|${sourceKey}`;
278
+ }
279
+
280
  export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> {
281
  // Apply incomplete markdown preprocessing for smooth streaming
282
  const processedContent = parseIncompleteMarkdown(content);
 
290
  return {
291
  type: "code" as const,
292
  lang: token.lang,
293
+ code: highlightCode(token.text, token.lang),
294
  rawCode: token.text,
295
  isClosed: isFencedBlockClosed(token.raw ?? ""),
296
  };
 
317
  return {
318
  type: "code" as const,
319
  lang: token.lang,
320
+ code: highlightCode(token.text, token.lang),
321
  rawCode: token.text,
322
  isClosed: isFencedBlockClosed(token.raw ?? ""),
323
  };
 
359
 
360
  return await Promise.all(
361
  blocks.map(async (blockContent, index) => {
362
+ const key = cacheKey(index, blockContent, sources);
363
+ const cached = blockCache.get(key);
364
+ if (cached) return cached;
365
+
366
  const tokens = await processTokens(blockContent, sources);
367
+ const block: BlockToken = {
368
  id: `${index}-${hashString(blockContent)}`,
369
  content: blockContent,
370
  tokens,
371
  };
372
+ blockCache.set(key, block);
373
+ return block;
374
  })
375
  );
376
  }
 
382
  const blocks = parseMarkdownIntoBlocks(content);
383
 
384
  return blocks.map((blockContent, index) => {
385
+ const key = cacheKey(index, blockContent, sources);
386
+ const cached = blockCache.get(key);
387
+ if (cached) return cached;
388
+
389
  const tokens = processTokensSync(blockContent, sources);
390
+ const block: BlockToken = {
391
  id: `${index}-${hashString(blockContent)}`,
392
  content: blockContent,
393
  tokens,
394
  };
395
+ blockCache.set(key, block);
396
+ return block;
397
  });
398
  }
src/lib/workers/markdownWorker.ts CHANGED
@@ -3,17 +3,19 @@ type SimpleSource = {
3
  title?: string;
4
  link: string;
5
  };
6
- import { processTokens, type Token } from "$lib/utils/marked";
7
 
8
  export type IncomingMessage = {
9
  type: "process";
10
  content: string;
11
  sources: SimpleSource[];
 
12
  };
13
 
14
  export type OutgoingMessage = {
15
  type: "processed";
16
- tokens: Token[];
 
17
  };
18
 
19
  // Flag to track if the worker is currently processing a message
@@ -31,9 +33,11 @@ async function processMessage() {
31
  isProcessing = true;
32
 
33
  try {
34
- const { content, sources } = nextMessage;
35
- const processedTokens = await processTokens(content, sources);
36
- postMessage(JSON.parse(JSON.stringify({ type: "processed", tokens: processedTokens })));
 
 
37
  } finally {
38
  isProcessing = false;
39
 
 
3
  title?: string;
4
  link: string;
5
  };
6
+ import { processBlocks, type BlockToken } from "$lib/utils/marked";
7
 
8
  export type IncomingMessage = {
9
  type: "process";
10
  content: string;
11
  sources: SimpleSource[];
12
+ requestId: number;
13
  };
14
 
15
  export type OutgoingMessage = {
16
  type: "processed";
17
+ blocks: BlockToken[];
18
+ requestId: number;
19
  };
20
 
21
  // Flag to track if the worker is currently processing a message
 
33
  isProcessing = true;
34
 
35
  try {
36
+ const { content, sources, requestId } = nextMessage;
37
+ const processedBlocks = await processBlocks(content, sources);
38
+ postMessage(
39
+ JSON.parse(JSON.stringify({ type: "processed", blocks: processedBlocks, requestId }))
40
+ );
41
  } finally {
42
  isProcessing = false;
43