|
|
import katex from "katex"; |
|
|
import "katex/dist/contrib/mhchem.mjs"; |
|
|
import { Marked } from "marked"; |
|
|
import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; |
|
|
|
|
|
type SimpleSource = { |
|
|
title?: string; |
|
|
link: string; |
|
|
}; |
|
|
import hljs from "highlight.js/lib/core"; |
|
|
import type { LanguageFn } from "highlight.js"; |
|
|
import javascript from "highlight.js/lib/languages/javascript"; |
|
|
import typescript from "highlight.js/lib/languages/typescript"; |
|
|
import json from "highlight.js/lib/languages/json"; |
|
|
import bash from "highlight.js/lib/languages/bash"; |
|
|
import shell from "highlight.js/lib/languages/shell"; |
|
|
import python from "highlight.js/lib/languages/python"; |
|
|
import go from "highlight.js/lib/languages/go"; |
|
|
import rust from "highlight.js/lib/languages/rust"; |
|
|
import java from "highlight.js/lib/languages/java"; |
|
|
import csharp from "highlight.js/lib/languages/csharp"; |
|
|
import cpp from "highlight.js/lib/languages/cpp"; |
|
|
import cLang from "highlight.js/lib/languages/c"; |
|
|
import xml from "highlight.js/lib/languages/xml"; |
|
|
import css from "highlight.js/lib/languages/css"; |
|
|
import scss from "highlight.js/lib/languages/scss"; |
|
|
import markdownLang from "highlight.js/lib/languages/markdown"; |
|
|
import yaml from "highlight.js/lib/languages/yaml"; |
|
|
import sql from "highlight.js/lib/languages/sql"; |
|
|
import plaintext from "highlight.js/lib/languages/plaintext"; |
|
|
import { parseIncompleteMarkdown } from "./parseIncompleteMarkdown"; |
|
|
import { parseMarkdownIntoBlocks } from "./parseBlocks"; |
|
|
|
|
|
const bundledLanguages: [string, LanguageFn][] = [ |
|
|
["javascript", javascript], |
|
|
["typescript", typescript], |
|
|
["json", json], |
|
|
["bash", bash], |
|
|
["shell", shell], |
|
|
["python", python], |
|
|
["go", go], |
|
|
["rust", rust], |
|
|
["java", java], |
|
|
["csharp", csharp], |
|
|
["cpp", cpp], |
|
|
["c", cLang], |
|
|
["xml", xml], |
|
|
["html", xml], |
|
|
["css", css], |
|
|
["scss", scss], |
|
|
["markdown", markdownLang], |
|
|
["yaml", yaml], |
|
|
["sql", sql], |
|
|
["plaintext", plaintext], |
|
|
]; |
|
|
|
|
|
bundledLanguages.forEach(([name, language]) => hljs.registerLanguage(name, language)); |
|
|
|
|
|
interface katexBlockToken extends Tokens.Generic { |
|
|
type: "katexBlock"; |
|
|
raw: string; |
|
|
text: string; |
|
|
displayMode: true; |
|
|
} |
|
|
|
|
|
interface katexInlineToken extends Tokens.Generic { |
|
|
type: "katexInline"; |
|
|
raw: string; |
|
|
text: string; |
|
|
displayMode: false; |
|
|
} |
|
|
|
|
|
export const katexBlockExtension: TokenizerExtension & RendererExtension = { |
|
|
name: "katexBlock", |
|
|
level: "block", |
|
|
|
|
|
start(src: string): number | undefined { |
|
|
const match = src.match(/(\${2}|\\\[)/); |
|
|
return match ? match.index : -1; |
|
|
}, |
|
|
|
|
|
tokenizer(src: string): katexBlockToken | undefined { |
|
|
|
|
|
const rule1 = /^\${2}([\s\S]+?)\${2}/; |
|
|
const match1 = rule1.exec(src); |
|
|
if (match1) { |
|
|
const token: katexBlockToken = { |
|
|
type: "katexBlock", |
|
|
raw: match1[0], |
|
|
text: match1[1].trim(), |
|
|
displayMode: true, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
|
|
|
const rule2 = /^\\\[([\s\S]+?)\\\]/; |
|
|
const match2 = rule2.exec(src); |
|
|
if (match2) { |
|
|
const token: katexBlockToken = { |
|
|
type: "katexBlock", |
|
|
raw: match2[0], |
|
|
text: match2[1].trim(), |
|
|
displayMode: true, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
return undefined; |
|
|
}, |
|
|
|
|
|
renderer(token) { |
|
|
if (token.type === "katexBlock") { |
|
|
return katex.renderToString(token.text, { |
|
|
throwOnError: false, |
|
|
displayMode: token.displayMode, |
|
|
}); |
|
|
} |
|
|
return undefined; |
|
|
}, |
|
|
}; |
|
|
|
|
|
const katexInlineExtension: TokenizerExtension & RendererExtension = { |
|
|
name: "katexInline", |
|
|
level: "inline", |
|
|
|
|
|
start(src: string): number | undefined { |
|
|
const match = src.match(/(\$|\\\()/); |
|
|
return match ? match.index : -1; |
|
|
}, |
|
|
|
|
|
tokenizer(src: string): katexInlineToken | undefined { |
|
|
|
|
|
const rule1 = /^\$([^$]+?)\$/; |
|
|
const match1 = rule1.exec(src); |
|
|
if (match1) { |
|
|
const token: katexInlineToken = { |
|
|
type: "katexInline", |
|
|
raw: match1[0], |
|
|
text: match1[1].trim(), |
|
|
displayMode: false, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
|
|
|
const rule2 = /^\\\(([\s\S]+?)\\\)/; |
|
|
const match2 = rule2.exec(src); |
|
|
if (match2) { |
|
|
const token: katexInlineToken = { |
|
|
type: "katexInline", |
|
|
raw: match2[0], |
|
|
text: match2[1].trim(), |
|
|
displayMode: false, |
|
|
}; |
|
|
return token; |
|
|
} |
|
|
|
|
|
return undefined; |
|
|
}, |
|
|
|
|
|
renderer(token) { |
|
|
if (token.type === "katexInline") { |
|
|
return katex.renderToString(token.text, { |
|
|
throwOnError: false, |
|
|
displayMode: token.displayMode, |
|
|
}); |
|
|
} |
|
|
return undefined; |
|
|
}, |
|
|
}; |
|
|
|
|
|
function escapeHTML(content: string) { |
|
|
return content.replace( |
|
|
/[<>&"']/g, |
|
|
(x) => |
|
|
({ |
|
|
"<": "<", |
|
|
">": ">", |
|
|
"&": "&", |
|
|
"'": "'", |
|
|
'"': """, |
|
|
})[x] || x |
|
|
); |
|
|
} |
|
|
|
|
|
function addInlineCitations(md: string, webSearchSources: SimpleSource[] = []): string { |
|
|
const linkStyle = |
|
|
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;"; |
|
|
return md.replace(/\[(\d+)\]/g, (match: string) => { |
|
|
const indices: number[] = (match.match(/\d+/g) || []).map(Number); |
|
|
const links: string = indices |
|
|
.map((index: number) => { |
|
|
if (index === 0) return false; |
|
|
const source = webSearchSources[index - 1]; |
|
|
if (source) { |
|
|
return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`; |
|
|
} |
|
|
return ""; |
|
|
}) |
|
|
.filter(Boolean) |
|
|
.join(", "); |
|
|
return links ? ` <sup>${links}</sup>` : match; |
|
|
}); |
|
|
} |
|
|
|
|
|
function sanitizeHref(href?: string | null): string | undefined { |
|
|
if (!href) return undefined; |
|
|
const trimmed = href.trim(); |
|
|
const lower = trimmed.toLowerCase(); |
|
|
if (lower.startsWith("javascript:") || lower.startsWith("data:text/html")) { |
|
|
return undefined; |
|
|
} |
|
|
return trimmed.replace(/>$/, ""); |
|
|
} |
|
|
|
|
|
function highlightCode(text: string, lang?: string): string { |
|
|
if (lang && hljs.getLanguage(lang)) { |
|
|
try { |
|
|
return hljs.highlight(text, { language: lang, ignoreIllegals: true }).value; |
|
|
} catch { |
|
|
|
|
|
} |
|
|
} |
|
|
return hljs.highlightAuto(text).value; |
|
|
} |
|
|
|
|
|
function createMarkedInstance(sources: SimpleSource[]): Marked { |
|
|
return new Marked({ |
|
|
hooks: { |
|
|
postprocess: (html) => addInlineCitations(html, sources), |
|
|
}, |
|
|
extensions: [katexBlockExtension, katexInlineExtension], |
|
|
renderer: { |
|
|
link: (href, title, text) => { |
|
|
const safeHref = sanitizeHref(href); |
|
|
return safeHref |
|
|
? `<a href="${safeHref}" target="_blank" rel="noreferrer">${text}</a>` |
|
|
: `<span>${escapeHTML(text ?? "")}</span>`; |
|
|
}, |
|
|
html: (html) => escapeHTML(html), |
|
|
}, |
|
|
gfm: true, |
|
|
breaks: true, |
|
|
}); |
|
|
} |
|
|
function isFencedBlockClosed(raw?: string): boolean { |
|
|
if (!raw) return true; |
|
|
|
|
|
const trimmed = raw.replace(/[\s\u0000]+$/, ""); |
|
|
const openingFenceMatch = trimmed.match(/^([`~]{3,})/); |
|
|
if (!openingFenceMatch) { |
|
|
return true; |
|
|
} |
|
|
const fence = openingFenceMatch[1]; |
|
|
const closingFencePattern = new RegExp(`(?:\n|\r\n)${fence}(?:[\t ]+)?$`); |
|
|
return closingFencePattern.test(trimmed); |
|
|
} |
|
|
|
|
|
type CodeToken = { |
|
|
type: "code"; |
|
|
lang: string; |
|
|
code: string; |
|
|
rawCode: string; |
|
|
isClosed: boolean; |
|
|
}; |
|
|
|
|
|
type TextToken = { |
|
|
type: "text"; |
|
|
html: string | Promise<string>; |
|
|
}; |
|
|
|
|
|
const blockCache = new Map<string, BlockToken>(); |
|
|
|
|
|
function cacheKey(index: number, blockContent: string, sources: SimpleSource[]) { |
|
|
const sourceKey = sources.map((s) => s.link).join("|"); |
|
|
return `${index}-${hashString(blockContent)}|${sourceKey}`; |
|
|
} |
|
|
|
|
|
export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> { |
|
|
|
|
|
const processedContent = parseIncompleteMarkdown(content); |
|
|
|
|
|
const marked = createMarkedInstance(sources); |
|
|
const tokens = marked.lexer(processedContent); |
|
|
|
|
|
const processedTokens = await Promise.all( |
|
|
tokens.map(async (token) => { |
|
|
if (token.type === "code") { |
|
|
return { |
|
|
type: "code" as const, |
|
|
lang: token.lang, |
|
|
code: highlightCode(token.text, token.lang), |
|
|
rawCode: token.text, |
|
|
isClosed: isFencedBlockClosed(token.raw ?? ""), |
|
|
}; |
|
|
} else { |
|
|
return { |
|
|
type: "text" as const, |
|
|
html: marked.parse(token.raw), |
|
|
}; |
|
|
} |
|
|
}) |
|
|
); |
|
|
|
|
|
return processedTokens; |
|
|
} |
|
|
|
|
|
export function processTokensSync(content: string, sources: SimpleSource[]): Token[] { |
|
|
|
|
|
const processedContent = parseIncompleteMarkdown(content); |
|
|
|
|
|
const marked = createMarkedInstance(sources); |
|
|
const tokens = marked.lexer(processedContent); |
|
|
return tokens.map((token) => { |
|
|
if (token.type === "code") { |
|
|
return { |
|
|
type: "code" as const, |
|
|
lang: token.lang, |
|
|
code: highlightCode(token.text, token.lang), |
|
|
rawCode: token.text, |
|
|
isClosed: isFencedBlockClosed(token.raw ?? ""), |
|
|
}; |
|
|
} |
|
|
return { type: "text" as const, html: marked.parse(token.raw) }; |
|
|
}); |
|
|
} |
|
|
|
|
|
export type Token = CodeToken | TextToken; |
|
|
|
|
|
export type BlockToken = { |
|
|
id: string; |
|
|
content: string; |
|
|
tokens: Token[]; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function hashString(str: string): string { |
|
|
let hash = 0; |
|
|
for (let i = 0; i < str.length; i++) { |
|
|
const char = str.charCodeAt(i); |
|
|
hash = (hash << 5) - hash + char; |
|
|
hash = hash & hash; |
|
|
} |
|
|
return Math.abs(hash).toString(36); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export async function processBlocks( |
|
|
content: string, |
|
|
sources: SimpleSource[] = [] |
|
|
): Promise<BlockToken[]> { |
|
|
const blocks = parseMarkdownIntoBlocks(content); |
|
|
|
|
|
return await Promise.all( |
|
|
blocks.map(async (blockContent, index) => { |
|
|
const key = cacheKey(index, blockContent, sources); |
|
|
const cached = blockCache.get(key); |
|
|
if (cached) return cached; |
|
|
|
|
|
const tokens = await processTokens(blockContent, sources); |
|
|
const block: BlockToken = { |
|
|
id: `${index}-${hashString(blockContent)}`, |
|
|
content: blockContent, |
|
|
tokens, |
|
|
}; |
|
|
blockCache.set(key, block); |
|
|
return block; |
|
|
}) |
|
|
); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function processBlocksSync(content: string, sources: SimpleSource[] = []): BlockToken[] { |
|
|
const blocks = parseMarkdownIntoBlocks(content); |
|
|
|
|
|
return blocks.map((blockContent, index) => { |
|
|
const key = cacheKey(index, blockContent, sources); |
|
|
const cached = blockCache.get(key); |
|
|
if (cached) return cached; |
|
|
|
|
|
const tokens = processTokensSync(blockContent, sources); |
|
|
const block: BlockToken = { |
|
|
id: `${index}-${hashString(blockContent)}`, |
|
|
content: blockContent, |
|
|
tokens, |
|
|
}; |
|
|
blockCache.set(key, block); |
|
|
return block; |
|
|
}); |
|
|
} |
|
|
|