victor HF Staff commited on
Commit
7b56bb5
·
unverified ·
1 Parent(s): 02a3822

feat(markdown): allow video and audio tags (#2037)

Browse files

* feat(markdown): allow video and audio tags in markdown

- Add custom image renderer to convert video/audio URLs to media tags
- Use DOMPurify to sanitize raw HTML, allowing only video/audio/source
- Add comprehensive tests for media rendering and XSS prevention

* feat(mcp): increase tool timeout from 30s to 60s

* feat(mcp): update tool prompt to mention video/audio support

* test: update markdown test for DOMPurify behavior

src/lib/components/chat/MarkdownRenderer.svelte.test.ts CHANGED
@@ -32,7 +32,8 @@ describe("MarkdownRenderer", () => {
32
  it("doesnt render raw html directly", () => {
33
  render(MarkdownRenderer, { content: "<button>Click me</button>" });
34
  expect(page.getByRole("button").elements).toHaveLength(0);
35
- expect(page.getByRole("paragraph")).toHaveTextContent("<button>Click me</button>");
 
36
  });
37
  it("renders latex", () => {
38
  const { baseElement } = render(MarkdownRenderer, { content: "$(oo)^2$" });
 
32
  it("doesnt render raw html directly", () => {
33
  render(MarkdownRenderer, { content: "<button>Click me</button>" });
34
  expect(page.getByRole("button").elements).toHaveLength(0);
35
+ // DOMPurify strips disallowed tags but preserves text content
36
+ expect(page.getByRole("paragraph")).toHaveTextContent("Click me");
37
  });
38
  it("renders latex", () => {
39
  const { baseElement } = render(MarkdownRenderer, { content: "$(oo)^2$" });
src/lib/server/mcp/httpClient.ts CHANGED
@@ -13,7 +13,7 @@ export interface McpServerConfig {
13
  headers?: Record<string, string>;
14
  }
15
 
16
- const DEFAULT_TIMEOUT_MS = 30_000;
17
 
18
  export type McpToolTextResponse = {
19
  text: string;
 
13
  headers?: Record<string, string>;
14
  }
15
 
16
+ const DEFAULT_TIMEOUT_MS = 60_000;
17
 
18
  export type McpToolTextResponse = {
19
  text: string;
src/lib/server/textGeneration/mcp/toolInvocation.ts CHANGED
@@ -69,7 +69,7 @@ export async function* executeToolCalls({
69
  toPrimitive,
70
  processToolOutput,
71
  abortSignal,
72
- toolTimeoutMs = 30_000,
73
  }: ExecuteToolCallsParams): AsyncGenerator<ToolExecutionEvent, void, undefined> {
74
  const toolMessages: ChatCompletionMessageParam[] = [];
75
  const toolRuns: ToolRun[] = [];
 
69
  toPrimitive,
70
  processToolOutput,
71
  abortSignal,
72
+ toolTimeoutMs = 60_000,
73
  }: ExecuteToolCallsParams): AsyncGenerator<ToolExecutionEvent, void, undefined> {
74
  const toolMessages: ChatCompletionMessageParam[] = [];
75
  const toolRuns: ToolRun[] = [];
src/lib/server/textGeneration/utils/toolPrompt.ts CHANGED
@@ -14,7 +14,7 @@ export function buildToolPreprompt(tools: OpenAiTool[]): string {
14
  return [
15
  `You can use the following tools if helpful: ${names.join(", ")}.`,
16
  `Today's date: ${currentDate}.`,
17
- `If a tool generates an image, you can inline it directly: ![alt text](image_url).`,
18
  `If a tool needs an image, set its image field ("input_image", "image", or "image_url") to a reference like "image_1", "image_2", etc. (ordered by when the user uploaded them).`,
19
  `Default to image references; only use a full http(s) URL when the tool description explicitly asks for one, or reuse a URL a previous tool returned.`,
20
  ].join(" ");
 
14
  return [
15
  `You can use the following tools if helpful: ${names.join(", ")}.`,
16
  `Today's date: ${currentDate}.`,
17
+ `If a tool generates an image, video, or audio, you can inline it using ![alt](url) or raw <video>/<audio> HTML tags. Video (.mp4, .webm) and audio (.mp3, .wav) URLs will render as playable media.`,
18
  `If a tool needs an image, set its image field ("input_image", "image", or "image_url") to a reference like "image_1", "image_2", etc. (ordered by when the user uploaded them).`,
19
  `Default to image references; only use a full http(s) URL when the tool description explicitly asks for one, or reuse a URL a previous tool returned.`,
20
  ].join(" ");
src/lib/utils/marked.spec.ts ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "vitest";
2
+ import { processTokensSync } from "./marked";
3
+
4
+ function renderHtml(md: string): string {
5
+ const tokens = processTokensSync(md, []);
6
+ const textToken = tokens.find((token) => token.type === "text");
7
+ if (!textToken || textToken.type !== "text") return "";
8
+ return typeof textToken.html === "string" ? textToken.html : "";
9
+ }
10
+
11
+ describe("marked basic rendering", () => {
12
+ test("renders bold text", () => {
13
+ const html = renderHtml("**bold**");
14
+ expect(html).toContain("<strong>bold</strong>");
15
+ });
16
+
17
+ test("renders links", () => {
18
+ const html = renderHtml("[link](https://example.com)");
19
+ expect(html).toContain('<a href="https://example.com"');
20
+ expect(html).toContain("link</a>");
21
+ });
22
+
23
+ test("renders paragraphs", () => {
24
+ const html = renderHtml("hello world");
25
+ expect(html).toContain("<p>hello world</p>");
26
+ });
27
+ });
28
+
29
+ describe("marked image renderer", () => {
30
+ test("renders video extensions as <video>", () => {
31
+ const html = renderHtml("![](https://example.com/clip.mp4)");
32
+ expect(html).toContain("<video controls");
33
+ expect(html).toContain('<source src="https://example.com/clip.mp4">');
34
+ });
35
+
36
+ test("renders audio extensions as <audio>", () => {
37
+ const html = renderHtml("![](https://example.com/clip.mp3)");
38
+ expect(html).toContain("<audio controls");
39
+ expect(html).toContain('<source src="https://example.com/clip.mp3">');
40
+ });
41
+
42
+ test("renders non-video images as <img>", () => {
43
+ const html = renderHtml("![](https://example.com/pic.png)");
44
+ expect(html).toContain('<img src="https://example.com/pic.png"');
45
+ });
46
+
47
+ test("renders video with query params", () => {
48
+ const html = renderHtml("![](https://example.com/clip.mp4?token=abc)");
49
+ expect(html).toContain("<video controls");
50
+ expect(html).toContain("clip.mp4?token=abc");
51
+ });
52
+ });
53
+
54
+ describe("marked html video tag support", () => {
55
+ test("allows raw <video> tags with controls", () => {
56
+ const html = renderHtml('<video controls src="https://example.com/video.mp4"></video>');
57
+ expect(html).toContain("<video");
58
+ expect(html).toContain("controls");
59
+ expect(html).toContain('src="https://example.com/video.mp4"');
60
+ });
61
+
62
+ test("allows <video> with nested <source> tags", () => {
63
+ const html = renderHtml(
64
+ '<video controls><source src="https://example.com/video.webm" type="video/webm"></video>'
65
+ );
66
+ expect(html).toContain("<video");
67
+ expect(html).toContain("<source");
68
+ expect(html).toContain('src="https://example.com/video.webm"');
69
+ });
70
+
71
+ test("strips disallowed attributes from video tags", () => {
72
+ const html = renderHtml('<video onclick="alert(1)" src="https://example.com/v.mp4"></video>');
73
+ expect(html).toContain("<video");
74
+ expect(html).not.toContain("onclick");
75
+ });
76
+
77
+ test("strips javascript: URLs from media sources", () => {
78
+ const html = renderHtml('<video controls src="javascript:alert(1)"></video>');
79
+ expect(html).not.toContain("javascript:");
80
+ });
81
+
82
+ test("strips disallowed html tags", () => {
83
+ const html = renderHtml("<script>alert(1)</script>");
84
+ expect(html).not.toContain("<script>");
85
+ });
86
+
87
+ test("allows <audio> tags with controls", () => {
88
+ const html = renderHtml(
89
+ '<audio controls><source src="https://example.com/audio.mp3" type="audio/mpeg"></audio>'
90
+ );
91
+ expect(html).toContain("<audio");
92
+ expect(html).toContain("<source");
93
+ expect(html).toContain('type="audio/mpeg"');
94
+ });
95
+ });
src/lib/utils/marked.ts CHANGED
@@ -2,6 +2,7 @@ import katex from "katex";
2
  import "katex/dist/contrib/mhchem.mjs";
3
  import { Marked } from "marked";
4
  import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
 
5
  // Simple type to replace removed WebSearchSource
6
  type SimpleSource = {
7
  title?: string;
@@ -56,6 +57,36 @@ const bundledLanguages: [string, LanguageFn][] = [
56
 
57
  bundledLanguages.forEach(([name, language]) => hljs.registerLanguage(name, language));
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  interface katexBlockToken extends Tokens.Generic {
60
  type: "katexBlock";
61
  raw: string;
@@ -238,7 +269,23 @@ function createMarkedInstance(sources: SimpleSource[]): Marked {
238
  ? `<a href="${escapeHTML(safeHref)}" target="_blank" rel="noreferrer">${text}</a>`
239
  : `<span>${escapeHTML(text ?? "")}</span>`;
240
  },
241
- html: (html) => escapeHTML(html),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  },
243
  gfm: true,
244
  breaks: true,
 
2
  import "katex/dist/contrib/mhchem.mjs";
3
  import { Marked } from "marked";
4
  import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
5
+ import DOMPurify from "isomorphic-dompurify";
6
  // Simple type to replace removed WebSearchSource
7
  type SimpleSource = {
8
  title?: string;
 
57
 
58
  bundledLanguages.forEach(([name, language]) => hljs.registerLanguage(name, language));
59
 
60
+ // DOMPurify config for allowing video/audio tags in markdown
61
+ const DOMPURIFY_CONFIG = {
62
+ ALLOWED_TAGS: ["video", "audio", "source"],
63
+ ALLOWED_ATTR: [
64
+ "src",
65
+ "type",
66
+ "controls",
67
+ "autoplay",
68
+ "loop",
69
+ "muted",
70
+ "playsinline",
71
+ "poster",
72
+ "width",
73
+ "height",
74
+ "preload",
75
+ ],
76
+ };
77
+
78
+ // Media URL detection
79
+ const VIDEO_EXTENSIONS = /\.(mp4|webm|ogg|mov|m4v)([?#]|$)/i;
80
+ const AUDIO_EXTENSIONS = /\.(mp3|wav|m4a|aac|flac)([?#]|$)/i;
81
+
82
+ function isVideoUrl(url: string): boolean {
83
+ return VIDEO_EXTENSIONS.test(url);
84
+ }
85
+
86
+ function isAudioUrl(url: string): boolean {
87
+ return AUDIO_EXTENSIONS.test(url);
88
+ }
89
+
90
  interface katexBlockToken extends Tokens.Generic {
91
  type: "katexBlock";
92
  raw: string;
 
269
  ? `<a href="${escapeHTML(safeHref)}" target="_blank" rel="noreferrer">${text}</a>`
270
  : `<span>${escapeHTML(text ?? "")}</span>`;
271
  },
272
+ image: (href, title, text) => {
273
+ const safeHref = sanitizeHref(href);
274
+ if (!safeHref) return `<span>${escapeHTML(text ?? "")}</span>`;
275
+
276
+ const safeSrc = escapeHTML(safeHref);
277
+ const safeTitle = title ? ` title="${escapeHTML(title)}"` : "";
278
+ const safeAlt = escapeHTML(text ?? "");
279
+
280
+ if (isVideoUrl(safeHref)) {
281
+ return `<video controls${safeTitle}><source src="${safeSrc}">${safeAlt}</video>`;
282
+ }
283
+ if (isAudioUrl(safeHref)) {
284
+ return `<audio controls${safeTitle}><source src="${safeSrc}">${safeAlt}</audio>`;
285
+ }
286
+ return `<img src="${safeSrc}" alt="${safeAlt}"${safeTitle} />`;
287
+ },
288
+ html: (html) => DOMPurify.sanitize(html, DOMPURIFY_CONFIG),
289
  },
290
  gfm: true,
291
  breaks: true,