File size: 6,476 Bytes
7843436
 
bf2abd0
7843436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf2abd0
 
 
 
 
 
7843436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import type { Element as HastElement, ElementContent, Root as HastRoot } from "hast";
import { getSharedHighlighter, isSupportedLang, normalizeLang, SHIKI_THEMES } from "../../shared/shiki-config.js";
import { detectShikiLang } from "../../shared/detect-lang.js";
import type { Transformer } from "./types.js";

/**
 * Syntax-highlights every `<pre><code class="language-X">` block emitted by
 * the TipTap `codeBlock` node using the shared Shiki highlighter, and tags
 * `<pre>` with `data-lang="X"` so the stylesheet can render the language
 * label via `pre::after { content: attr(data-lang) }`.
 *
 * Rationale: TipTap's `generateHTML()` produces plain text inside `<code>`,
 * because syntax highlighting is a view-only ProseMirror plugin that does not
 * run server-side. We do the highlighting here instead of shipping the JS to
 * every reader (no CDN, no FOUC, offline-safe). Using the same Shiki config
 * as the editor guarantees identical supported languages and identical token
 * colors in edit vs. published view.
 *
 * Line numbers are rendered as `<span class="code-line-num">N</span>` inserted
 * as the first child of every Shiki-emitted `<span class="line">`. The editor
 * uses PM widget decorations to inject the exact same markup, so a single
 * stylesheet rule targets both views. This markup-based approach survives
 * soft-wrap (the number sits on the first visual row of its source line) and
 * avoids the pitfalls of CSS counters or PM's overlapping-decoration merging.
 */

/**
 * Serialize a hast AST back to HTML. Hand-rolled (3 cases, no dependency
 * on `hast-util-to-html`) because Shiki already produces a tree that only
 * contains elements + text nodes with trivial attributes (class, style).
 */
function hastToHtml(nodes: readonly ElementContent[]): string {
  let out = "";
  for (const n of nodes) {
    if (n.type === "text") {
      out += escapeHtmlText(n.value);
      continue;
    }
    if (n.type === "element") {
      out += `<${n.tagName}`;
      const props = n.properties || {};
      for (const [key, value] of Object.entries(props)) {
        if (value === undefined || value === null || value === false) continue;
        const attr = propToAttr(key);
        const str = Array.isArray(value) ? value.join(" ") : String(value);
        out += ` ${attr}="${escapeAttr(str)}"`;
      }
      if (isVoidElement(n.tagName)) {
        out += " />";
      } else {
        out += ">";
        out += hastToHtml(n.children);
        out += `</${n.tagName}>`;
      }
    }
  }
  return out;
}

function propToAttr(key: string): string {
  if (key === "className") return "class";
  if (key === "htmlFor") return "for";
  return key.toLowerCase();
}

function escapeHtmlText(s: string): string {
  return s.replace(/[&<>]/g, (c) => (c === "&" ? "&amp;" : c === "<" ? "&lt;" : "&gt;"));
}

function escapeAttr(s: string): string {
  return s.replace(/[&"]/g, (c) => (c === "&" ? "&amp;" : "&quot;"));
}

function isVoidElement(tag: string): boolean {
  return ["br", "hr", "img", "input", "meta", "link"].includes(tag);
}

/**
 * Extract language from a `<code>` class list. TipTap writes
 * `class="language-python"`; anything else falls back to plain-text.
 */
function extractLang(code: Element): string {
  const cls = code.getAttribute("class") || "";
  const match = cls.match(/language-([\w+-]+)/i);
  return match ? match[1] : "";
}

/**
 * Prepend `<span class="code-line-num">N</span>` to every Shiki `.line`
 * wrapper. Matches the editor's widget markup exactly so the stylesheet can
 * target a single selector in both views.
 */
function injectLineNumbers(codeChildren: ElementContent[]): void {
  let n = 0;
  for (const child of codeChildren) {
    if (child.type !== "element") continue;
    // Shiki's HAST stores the class as a plain `class` string (not
    // `className`), so inspect both just in case the config changes.
    const props = child.properties ?? {};
    const classValue = props.class ?? props.className;
    const classTokens = Array.isArray(classValue)
      ? classValue.map(String)
      : typeof classValue === "string"
        ? classValue.split(/\s+/)
        : [];
    if (!classTokens.includes("line")) continue;
    n += 1;
    const numSpan: HastElement = {
      type: "element",
      tagName: "span",
      properties: { class: "code-line-num", "aria-hidden": "true" },
      children: [{ type: "text", value: String(n) }],
    };
    child.children.unshift(numSpan);
  }
}

export const highlightCodeTransformer: Transformer = {
  name: "highlightCode",
  async apply(document) {
    const blocks = [...document.querySelectorAll("pre > code")];
    if (blocks.length === 0) return;

    const highlighter = await getSharedHighlighter();

    for (const codeEl of blocks) {
      const pre = codeEl.parentElement;
      if (!pre || pre.tagName.toLowerCase() !== "pre") continue;
      if (pre.classList.contains("mermaid")) continue;

      const source = codeEl.textContent || "";
      if (!source) continue;

      // Fall back to auto-detection when the block has no explicit language,
      // so language-less blocks (the common case in authored docs) still get
      // highlighted. Same logic runs in the editor for an identical result.
      const rawLang = extractLang(codeEl as unknown as Element);
      const lang = normalizeLang(rawLang) || detectShikiLang(source);

      let hast: HastRoot;
      try {
        hast = highlighter.codeToHast(source, {
          lang: isSupportedLang(lang) ? lang : "text",
          themes: SHIKI_THEMES,
          defaultColor: false,
        }) as HastRoot;
      } catch {
        continue;
      }

      const shikiPre = hast.children.find((c): c is HastElement => c.type === "element" && c.tagName === "pre");
      const shikiCode = shikiPre?.children.find((c): c is HastElement => c.type === "element" && c.tagName === "code");
      if (!shikiCode) continue;

      injectLineNumbers(shikiCode.children);
      codeEl.innerHTML = hastToHtml(shikiCode.children);

      pre.classList.add("shiki");
      if (isSupportedLang(lang)) {
        pre.setAttribute("data-lang", lang);
      } else {
        pre.removeAttribute("data-lang");
      }
      const shikiStyle = shikiPre?.properties?.style;
      if (typeof shikiStyle === "string" && shikiStyle) {
        const existing = pre.getAttribute("style") || "";
        pre.setAttribute("style", existing ? `${existing};${shikiStyle}` : shikiStyle);
      }
    }
  },
};