|
|
import katex from "katex"; |
|
|
import "katex/dist/contrib/mhchem.mjs"; |
|
|
import { Marked } from "marked"; |
|
|
import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; |
|
|
import type { WebSearchSource } from "$lib/types/WebSearch"; |
|
|
import hljs from "highlight.js"; |
|
|
|
|
|
interface katexBlockToken extends Tokens.Generic { |
|
|
type: "katexBlock"; |
|
|
raw: string; |
|
|
text: string; |
|
|
displayMode: true; |
|
|
} |
|
|
|
|
|
interface katexInlineToken extends Tokens.Generic { |
|
|
type: "katexInline"; |
|
|
raw: string; |
|
|
text: string; |
|
|
displayMode: false; |
|
|
} |
|
|
|
|
|
export const katexBlockExtension: TokenizerExtension & RendererExtension = { |
|
|
name: "katexBlock", |
|
|
level: "block", |
|
|
|
|
|
start(src: string): number | undefined { |
|
|
const match = src.match(/(\${2}|\\\[)/); |
|
|
return match ? match.index : -1; |
|
|
}, |
|
|
|
|
|
tokenizer(src: string): katexBlockToken | undefined { |
|
|
|
|
|
const rule1 = /^\${2}([\s\S]+?)\${2}/; |
|
|
const match1 = rule1.exec(src); |
|
|
if (match1) { |
|
|
const token: katexBlockToken = { |
|
|
type: "katexBlock", |
|
|
raw: match1[0], |
|
|
text: match1[1].trim(), |
|
|
displayMode: true, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
|
|
|
const rule2 = /^\\\[([\s\S]+?)\\\]/; |
|
|
const match2 = rule2.exec(src); |
|
|
if (match2) { |
|
|
const token: katexBlockToken = { |
|
|
type: "katexBlock", |
|
|
raw: match2[0], |
|
|
text: match2[1].trim(), |
|
|
displayMode: true, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
return undefined; |
|
|
}, |
|
|
|
|
|
renderer(token) { |
|
|
if (token.type === "katexBlock") { |
|
|
return katex.renderToString(token.text, { |
|
|
throwOnError: false, |
|
|
displayMode: token.displayMode, |
|
|
}); |
|
|
} |
|
|
return undefined; |
|
|
}, |
|
|
}; |
|
|
|
|
|
const katexInlineExtension: TokenizerExtension & RendererExtension = { |
|
|
name: "katexInline", |
|
|
level: "inline", |
|
|
|
|
|
start(src: string): number | undefined { |
|
|
const match = src.match(/(\$|\\\()/); |
|
|
return match ? match.index : -1; |
|
|
}, |
|
|
|
|
|
tokenizer(src: string): katexInlineToken | undefined { |
|
|
|
|
|
const rule1 = /^\$([^$]+?)\$/; |
|
|
const match1 = rule1.exec(src); |
|
|
if (match1) { |
|
|
const token: katexInlineToken = { |
|
|
type: "katexInline", |
|
|
raw: match1[0], |
|
|
text: match1[1].trim(), |
|
|
displayMode: false, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
|
|
|
const rule2 = /^\\\(([\s\S]+?)\\\)/; |
|
|
const match2 = rule2.exec(src); |
|
|
if (match2) { |
|
|
const token: katexInlineToken = { |
|
|
type: "katexInline", |
|
|
raw: match2[0], |
|
|
text: match2[1].trim(), |
|
|
displayMode: false, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
return undefined; |
|
|
}, |
|
|
|
|
|
renderer(token) { |
|
|
if (token.type === "katexInline") { |
|
|
return katex.renderToString(token.text, { |
|
|
throwOnError: false, |
|
|
displayMode: token.displayMode, |
|
|
}); |
|
|
} |
|
|
return undefined; |
|
|
}, |
|
|
}; |
|
|
|
|
|
function escapeHTML(content: string) { |
|
|
return content.replace( |
|
|
/[<>&"']/g, |
|
|
(x) => |
|
|
({ |
|
|
"<": "<", |
|
|
">": ">", |
|
|
"&": "&", |
|
|
"'": "'", |
|
|
'"': """, |
|
|
})[x] || x |
|
|
); |
|
|
} |
|
|
|
|
|
function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string { |
|
|
const linkStyle = |
|
|
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;"; |
|
|
return md.replace(/\[(\d+)\]/g, (match: string) => { |
|
|
const indices: number[] = (match.match(/\d+/g) || []).map(Number); |
|
|
const links: string = indices |
|
|
.map((index: number) => { |
|
|
if (index === 0) return false; |
|
|
const source = webSearchSources[index - 1]; |
|
|
if (source) { |
|
|
return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`; |
|
|
} |
|
|
return ""; |
|
|
}) |
|
|
.filter(Boolean) |
|
|
.join(", "); |
|
|
return links ? ` <sup>${links}</sup>` : match; |
|
|
}); |
|
|
} |
|
|
|
|
|
function createMarkedInstance(sources: WebSearchSource[]): Marked { |
|
|
return new Marked({ |
|
|
hooks: { |
|
|
postprocess: (html) => addInlineCitations(html, sources), |
|
|
}, |
|
|
extensions: [katexBlockExtension, katexInlineExtension], |
|
|
renderer: { |
|
|
link: (href, title, text) => |
|
|
`<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`, |
|
|
html: (html) => escapeHTML(html), |
|
|
}, |
|
|
gfm: true, |
|
|
breaks: true, |
|
|
}); |
|
|
} |
|
|
type CodeToken = { |
|
|
type: "code"; |
|
|
lang: string; |
|
|
code: string; |
|
|
rawCode: string; |
|
|
}; |
|
|
|
|
|
type TextToken = { |
|
|
type: "text"; |
|
|
html: string | Promise<string>; |
|
|
}; |
|
|
|
|
|
export async function processTokens(content: string, sources: WebSearchSource[]): Promise<Token[]> { |
|
|
const marked = createMarkedInstance(sources); |
|
|
const tokens = marked.lexer(content); |
|
|
|
|
|
const processedTokens = await Promise.all( |
|
|
tokens.map(async (token) => { |
|
|
if (token.type === "code") { |
|
|
return { |
|
|
type: "code" as const, |
|
|
lang: token.lang, |
|
|
code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, |
|
|
rawCode: token.text, |
|
|
}; |
|
|
} else { |
|
|
return { |
|
|
type: "text" as const, |
|
|
html: marked.parse(token.raw), |
|
|
}; |
|
|
} |
|
|
}) |
|
|
); |
|
|
|
|
|
return processedTokens; |
|
|
} |
|
|
|
|
|
export function processTokensSync(content: string, sources: WebSearchSource[]): Token[] { |
|
|
const marked = createMarkedInstance(sources); |
|
|
const tokens = marked.lexer(content); |
|
|
return tokens.map((token) => { |
|
|
if (token.type === "code") { |
|
|
return { |
|
|
type: "code" as const, |
|
|
lang: token.lang, |
|
|
code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, |
|
|
rawCode: token.text, |
|
|
}; |
|
|
} |
|
|
return { type: "text" as const, html: marked.parse(token.raw) }; |
|
|
}); |
|
|
} |
|
|
|
|
|
export type Token = CodeToken | TextToken; |
|
|
|