File size: 6,476 Bytes
7843436 bf2abd0 7843436 bf2abd0 7843436 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | import type { Element as HastElement, ElementContent, Root as HastRoot } from "hast";
import { getSharedHighlighter, isSupportedLang, normalizeLang, SHIKI_THEMES } from "../../shared/shiki-config.js";
import { detectShikiLang } from "../../shared/detect-lang.js";
import type { Transformer } from "./types.js";
/**
* Syntax-highlights every `<pre><code class="language-X">` block emitted by
* the TipTap `codeBlock` node using the shared Shiki highlighter, and tags
* `<pre>` with `data-lang="X"` so the stylesheet can render the language
* label via `pre::after { content: attr(data-lang) }`.
*
* Rationale: TipTap's `generateHTML()` produces plain text inside `<code>`,
* because syntax highlighting is a view-only ProseMirror plugin that does not
* run server-side. We do the highlighting here instead of shipping the JS to
* every reader (no CDN, no FOUC, offline-safe). Using the same Shiki config
* as the editor guarantees identical supported languages and identical token
* colors in edit vs. published view.
*
* Line numbers are rendered as `<span class="code-line-num">N</span>` inserted
* as the first child of every Shiki-emitted `<span class="line">`. The editor
* uses PM widget decorations to inject the exact same markup, so a single
* stylesheet rule targets both views. This markup-based approach survives
* soft-wrap (the number sits on the first visual row of its source line) and
* avoids the pitfalls of CSS counters or PM's overlapping-decoration merging.
*/
/**
* Serialize a hast AST back to HTML. Hand-rolled (3 cases, no dependency
* on `hast-util-to-html`) because Shiki already produces a tree that only
* contains elements + text nodes with trivial attributes (class, style).
*/
function hastToHtml(nodes: readonly ElementContent[]): string {
let out = "";
for (const n of nodes) {
if (n.type === "text") {
out += escapeHtmlText(n.value);
continue;
}
if (n.type === "element") {
out += `<${n.tagName}`;
const props = n.properties || {};
for (const [key, value] of Object.entries(props)) {
if (value === undefined || value === null || value === false) continue;
const attr = propToAttr(key);
const str = Array.isArray(value) ? value.join(" ") : String(value);
out += ` ${attr}="${escapeAttr(str)}"`;
}
if (isVoidElement(n.tagName)) {
out += " />";
} else {
out += ">";
out += hastToHtml(n.children);
out += `</${n.tagName}>`;
}
}
}
return out;
}
function propToAttr(key: string): string {
if (key === "className") return "class";
if (key === "htmlFor") return "for";
return key.toLowerCase();
}
function escapeHtmlText(s: string): string {
return s.replace(/[&<>]/g, (c) => (c === "&" ? "&" : c === "<" ? "<" : ">"));
}
function escapeAttr(s: string): string {
return s.replace(/[&"]/g, (c) => (c === "&" ? "&" : """));
}
function isVoidElement(tag: string): boolean {
return ["br", "hr", "img", "input", "meta", "link"].includes(tag);
}
/**
* Extract language from a `<code>` class list. TipTap writes
* `class="language-python"`; anything else falls back to plain-text.
*/
function extractLang(code: Element): string {
const cls = code.getAttribute("class") || "";
const match = cls.match(/language-([\w+-]+)/i);
return match ? match[1] : "";
}
/**
* Prepend `<span class="code-line-num">N</span>` to every Shiki `.line`
* wrapper. Matches the editor's widget markup exactly so the stylesheet can
* target a single selector in both views.
*/
function injectLineNumbers(codeChildren: ElementContent[]): void {
let n = 0;
for (const child of codeChildren) {
if (child.type !== "element") continue;
// Shiki's HAST stores the class as a plain `class` string (not
// `className`), so inspect both just in case the config changes.
const props = child.properties ?? {};
const classValue = props.class ?? props.className;
const classTokens = Array.isArray(classValue)
? classValue.map(String)
: typeof classValue === "string"
? classValue.split(/\s+/)
: [];
if (!classTokens.includes("line")) continue;
n += 1;
const numSpan: HastElement = {
type: "element",
tagName: "span",
properties: { class: "code-line-num", "aria-hidden": "true" },
children: [{ type: "text", value: String(n) }],
};
child.children.unshift(numSpan);
}
}
export const highlightCodeTransformer: Transformer = {
name: "highlightCode",
async apply(document) {
const blocks = [...document.querySelectorAll("pre > code")];
if (blocks.length === 0) return;
const highlighter = await getSharedHighlighter();
for (const codeEl of blocks) {
const pre = codeEl.parentElement;
if (!pre || pre.tagName.toLowerCase() !== "pre") continue;
if (pre.classList.contains("mermaid")) continue;
const source = codeEl.textContent || "";
if (!source) continue;
// Fall back to auto-detection when the block has no explicit language,
// so language-less blocks (the common case in authored docs) still get
// highlighted. Same logic runs in the editor for an identical result.
const rawLang = extractLang(codeEl as unknown as Element);
const lang = normalizeLang(rawLang) || detectShikiLang(source);
let hast: HastRoot;
try {
hast = highlighter.codeToHast(source, {
lang: isSupportedLang(lang) ? lang : "text",
themes: SHIKI_THEMES,
defaultColor: false,
}) as HastRoot;
} catch {
continue;
}
const shikiPre = hast.children.find((c): c is HastElement => c.type === "element" && c.tagName === "pre");
const shikiCode = shikiPre?.children.find((c): c is HastElement => c.type === "element" && c.tagName === "code");
if (!shikiCode) continue;
injectLineNumbers(shikiCode.children);
codeEl.innerHTML = hastToHtml(shikiCode.children);
pre.classList.add("shiki");
if (isSupportedLang(lang)) {
pre.setAttribute("data-lang", lang);
} else {
pre.removeAttribute("data-lang");
}
const shikiStyle = shikiPre?.properties?.style;
if (typeof shikiStyle === "string" && shikiStyle) {
const existing = pre.getAttribute("style") || "";
pre.setAttribute("style", existing ? `${existing};${shikiStyle}` : shikiStyle);
}
}
},
};
|