import type { Element as HastElement, ElementContent, Root as HastRoot } from "hast"; import { getSharedHighlighter, isSupportedLang, normalizeLang, SHIKI_THEMES } from "../../shared/shiki-config.js"; import { detectShikiLang } from "../../shared/detect-lang.js"; import type { Transformer } from "./types.js"; /** * Syntax-highlights every `
` block emitted by
* the TipTap `codeBlock` node using the shared Shiki highlighter, and tags
* `` with `data-lang="X"` so the stylesheet can render the language
* label via `pre::after { content: attr(data-lang) }`.
*
* Rationale: TipTap's `generateHTML()` produces plain text inside ``,
* because syntax highlighting is a view-only ProseMirror plugin that does not
* run server-side. We do the highlighting here instead of shipping the JS to
* every reader (no CDN, no FOUC, offline-safe). Using the same Shiki config
* as the editor guarantees identical supported languages and identical token
* colors in edit vs. published view.
*
* Line numbers are rendered as `N` inserted
* as the first child of every Shiki-emitted ``. The editor
* uses PM widget decorations to inject the exact same markup, so a single
* stylesheet rule targets both views. This markup-based approach survives
* soft-wrap (the number sits on the first visual row of its source line) and
* avoids the pitfalls of CSS counters or PM's overlapping-decoration merging.
*/
/**
* Serialize a hast AST back to HTML. Hand-rolled (3 cases, no dependency
* on `hast-util-to-html`) because Shiki already produces a tree that only
* contains elements + text nodes with trivial attributes (class, style).
*/
function hastToHtml(nodes: readonly ElementContent[]): string {
let out = "";
for (const n of nodes) {
if (n.type === "text") {
out += escapeHtmlText(n.value);
continue;
}
if (n.type === "element") {
out += `<${n.tagName}`;
const props = n.properties || {};
for (const [key, value] of Object.entries(props)) {
if (value === undefined || value === null || value === false) continue;
const attr = propToAttr(key);
const str = Array.isArray(value) ? value.join(" ") : String(value);
out += ` ${attr}="${escapeAttr(str)}"`;
}
if (isVoidElement(n.tagName)) {
out += " />";
} else {
out += ">";
out += hastToHtml(n.children);
out += `${n.tagName}>`;
}
}
}
return out;
}
function propToAttr(key: string): string {
if (key === "className") return "class";
if (key === "htmlFor") return "for";
return key.toLowerCase();
}
function escapeHtmlText(s: string): string {
return s.replace(/[&<>]/g, (c) => (c === "&" ? "&" : c === "<" ? "<" : ">"));
}
function escapeAttr(s: string): string {
return s.replace(/[&"]/g, (c) => (c === "&" ? "&" : """));
}
function isVoidElement(tag: string): boolean {
return ["br", "hr", "img", "input", "meta", "link"].includes(tag);
}
/**
* Extract language from a `` class list. TipTap writes
* `class="language-python"`; anything else falls back to plain-text.
*/
function extractLang(code: Element): string {
const cls = code.getAttribute("class") || "";
const match = cls.match(/language-([\w+-]+)/i);
return match ? match[1] : "";
}
/**
* Prepend `N` to every Shiki `.line`
* wrapper. Matches the editor's widget markup exactly so the stylesheet can
* target a single selector in both views.
*/
function injectLineNumbers(codeChildren: ElementContent[]): void {
let n = 0;
for (const child of codeChildren) {
if (child.type !== "element") continue;
// Shiki's HAST stores the class as a plain `class` string (not
// `className`), so inspect both just in case the config changes.
const props = child.properties ?? {};
const classValue = props.class ?? props.className;
const classTokens = Array.isArray(classValue)
? classValue.map(String)
: typeof classValue === "string"
? classValue.split(/\s+/)
: [];
if (!classTokens.includes("line")) continue;
n += 1;
const numSpan: HastElement = {
type: "element",
tagName: "span",
properties: { class: "code-line-num", "aria-hidden": "true" },
children: [{ type: "text", value: String(n) }],
};
child.children.unshift(numSpan);
}
}
export const highlightCodeTransformer: Transformer = {
name: "highlightCode",
async apply(document) {
const blocks = [...document.querySelectorAll("pre > code")];
if (blocks.length === 0) return;
const highlighter = await getSharedHighlighter();
for (const codeEl of blocks) {
const pre = codeEl.parentElement;
if (!pre || pre.tagName.toLowerCase() !== "pre") continue;
if (pre.classList.contains("mermaid")) continue;
const source = codeEl.textContent || "";
if (!source) continue;
// Fall back to auto-detection when the block has no explicit language,
// so language-less blocks (the common case in authored docs) still get
// highlighted. Same logic runs in the editor for an identical result.
const rawLang = extractLang(codeEl as unknown as Element);
const lang = normalizeLang(rawLang) || detectShikiLang(source);
let hast: HastRoot;
try {
hast = highlighter.codeToHast(source, {
lang: isSupportedLang(lang) ? lang : "text",
themes: SHIKI_THEMES,
defaultColor: false,
}) as HastRoot;
} catch {
continue;
}
const shikiPre = hast.children.find((c): c is HastElement => c.type === "element" && c.tagName === "pre");
const shikiCode = shikiPre?.children.find((c): c is HastElement => c.type === "element" && c.tagName === "code");
if (!shikiCode) continue;
injectLineNumbers(shikiCode.children);
codeEl.innerHTML = hastToHtml(shikiCode.children);
pre.classList.add("shiki");
if (isSupportedLang(lang)) {
pre.setAttribute("data-lang", lang);
} else {
pre.removeAttribute("data-lang");
}
const shikiStyle = shikiPre?.properties?.style;
if (typeof shikiStyle === "string" && shikiStyle) {
const existing = pre.getAttribute("style") || "";
pre.setAttribute("style", existing ? `${existing};${shikiStyle}` : shikiStyle);
}
}
},
};