| import katex from "katex"; |
| import "katex/dist/contrib/mhchem.mjs"; |
| import { Marked } from "marked"; |
| import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; |
| import type { WebSearchSource } from "$lib/types/WebSearch"; |
| import hljs from "highlight.js"; |
|
|
| interface katexBlockToken extends Tokens.Generic { |
| type: "katexBlock"; |
| raw: string; |
| text: string; |
| displayMode: true; |
| } |
|
|
| interface katexInlineToken extends Tokens.Generic { |
| type: "katexInline"; |
| raw: string; |
| text: string; |
| displayMode: false; |
| } |
|
|
| export const katexBlockExtension: TokenizerExtension & RendererExtension = { |
| name: "katexBlock", |
| level: "block", |
|
|
| start(src: string): number | undefined { |
| const match = src.match(/(\${2}|\\\[)/); |
| return match ? match.index : -1; |
| }, |
|
|
| tokenizer(src: string): katexBlockToken | undefined { |
| |
| const rule1 = /^\${2}([\s\S]+?)\${2}/; |
| const match1 = rule1.exec(src); |
| if (match1) { |
| const token: katexBlockToken = { |
| type: "katexBlock", |
| raw: match1[0], |
| text: match1[1].trim(), |
| displayMode: true, |
| }; |
| return token; |
| } |
|
|
| |
| const rule2 = /^\\\[([\s\S]+?)\\\]/; |
| const match2 = rule2.exec(src); |
| if (match2) { |
| const token: katexBlockToken = { |
| type: "katexBlock", |
| raw: match2[0], |
| text: match2[1].trim(), |
| displayMode: true, |
| }; |
| return token; |
| } |
|
|
| return undefined; |
| }, |
|
|
| renderer(token) { |
| if (token.type === "katexBlock") { |
| return katex.renderToString(token.text, { |
| throwOnError: false, |
| displayMode: token.displayMode, |
| }); |
| } |
| return undefined; |
| }, |
| }; |
|
|
| const katexInlineExtension: TokenizerExtension & RendererExtension = { |
| name: "katexInline", |
| level: "inline", |
|
|
| start(src: string): number | undefined { |
| const match = src.match(/(\$|\\\()/); |
| return match ? match.index : -1; |
| }, |
|
|
| tokenizer(src: string): katexInlineToken | undefined { |
| |
| const rule1 = /^\$([^$]+?)\$/; |
| const match1 = rule1.exec(src); |
| if (match1) { |
| const token: katexInlineToken = { |
| type: "katexInline", |
| raw: match1[0], |
| text: match1[1].trim(), |
| displayMode: false, |
| }; |
| return token; |
| } |
|
|
| |
| const rule2 = /^\\\(([\s\S]+?)\\\)/; |
| const match2 = rule2.exec(src); |
| if (match2) { |
| const token: katexInlineToken = { |
| type: "katexInline", |
| raw: match2[0], |
| text: match2[1].trim(), |
| displayMode: false, |
| }; |
| return token; |
| } |
|
|
| return undefined; |
| }, |
|
|
| renderer(token) { |
| if (token.type === "katexInline") { |
| return katex.renderToString(token.text, { |
| throwOnError: false, |
| displayMode: token.displayMode, |
| }); |
| } |
| return undefined; |
| }, |
| }; |
|
|
| function escapeHTML(content: string) { |
| return content.replace( |
| /[<>&"']/g, |
| (x) => |
| ({ |
| "<": "<", |
| ">": ">", |
| "&": "&", |
| "'": "'", |
| '"': """, |
| })[x] || x |
| ); |
| } |
|
|
| function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string { |
| const linkStyle = |
| "color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;"; |
| return md.replace(/\[(\d+)\]/g, (match: string) => { |
| const indices: number[] = (match.match(/\d+/g) || []).map(Number); |
| const links: string = indices |
| .map((index: number) => { |
| if (index === 0) return false; |
| const source = webSearchSources[index - 1]; |
| if (source) { |
| return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`; |
| } |
| return ""; |
| }) |
| .filter(Boolean) |
| .join(", "); |
| return links ? ` <sup>${links}</sup>` : match; |
| }); |
| } |
|
|
| function createMarkedInstance(sources: WebSearchSource[]): Marked { |
| return new Marked({ |
| hooks: { |
| postprocess: (html) => addInlineCitations(html, sources), |
| }, |
| extensions: [katexBlockExtension, katexInlineExtension], |
| renderer: { |
| link: (href, title, text) => |
| `<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`, |
| html: (html) => escapeHTML(html), |
| }, |
| gfm: true, |
| breaks: true, |
| }); |
| } |
| type CodeToken = { |
| type: "code"; |
| lang: string; |
| code: string; |
| rawCode: string; |
| }; |
|
|
| type TextToken = { |
| type: "text"; |
| html: string | Promise<string>; |
| }; |
|
|
| export async function processTokens(content: string, sources: WebSearchSource[]): Promise<Token[]> { |
| const marked = createMarkedInstance(sources); |
| const tokens = marked.lexer(content); |
|
|
| const processedTokens = await Promise.all( |
| tokens.map(async (token) => { |
| if (token.type === "code") { |
| return { |
| type: "code" as const, |
| lang: token.lang, |
| code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, |
| rawCode: token.text, |
| }; |
| } else { |
| return { |
| type: "text" as const, |
| html: marked.parse(token.raw), |
| }; |
| } |
| }) |
| ); |
|
|
| return processedTokens; |
| } |
|
|
| export function processTokensSync(content: string, sources: WebSearchSource[]): Token[] { |
| const marked = createMarkedInstance(sources); |
| const tokens = marked.lexer(content); |
| return tokens.map((token) => { |
| if (token.type === "code") { |
| return { |
| type: "code" as const, |
| lang: token.lang, |
| code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value, |
| rawCode: token.text, |
| }; |
| } |
| return { type: "text" as const, html: marked.parse(token.raw) }; |
| }); |
| } |
|
|
| export type Token = CodeToken | TextToken; |
|
|