import type { Element as HastElement, ElementContent, Root as HastRoot } from "hast"; import { getSharedHighlighter, isSupportedLang, normalizeLang, SHIKI_THEMES } from "../../shared/shiki-config.js"; import { detectShikiLang } from "../../shared/detect-lang.js"; import type { Transformer } from "./types.js"; /** * Syntax-highlights every `
` block emitted by
 * the TipTap `codeBlock` node using the shared Shiki highlighter, and tags
 * `
` with `data-lang="X"` so the stylesheet can render the language
 * label via `pre::after { content: attr(data-lang) }`.
 *
 * Rationale: TipTap's `generateHTML()` produces plain text inside ``,
 * because syntax highlighting is a view-only ProseMirror plugin that does not
 * run server-side. We do the highlighting here instead of shipping the JS to
 * every reader (no CDN, no FOUC, offline-safe). Using the same Shiki config
 * as the editor guarantees identical supported languages and identical token
 * colors in edit vs. published view.
 *
 * Line numbers are rendered as `N` inserted
 * as the first child of every Shiki-emitted ``. The editor
 * uses PM widget decorations to inject the exact same markup, so a single
 * stylesheet rule targets both views. This markup-based approach survives
 * soft-wrap (the number sits on the first visual row of its source line) and
 * avoids the pitfalls of CSS counters or PM's overlapping-decoration merging.
 */

/**
 * Serialize a hast AST back to HTML. Hand-rolled (3 cases, no dependency
 * on `hast-util-to-html`) because Shiki already produces a tree that only
 * contains elements + text nodes with trivial attributes (class, style).
 */
function hastToHtml(nodes: readonly ElementContent[]): string {
  let out = "";
  for (const n of nodes) {
    if (n.type === "text") {
      out += escapeHtmlText(n.value);
      continue;
    }
    if (n.type === "element") {
      out += `<${n.tagName}`;
      const props = n.properties || {};
      for (const [key, value] of Object.entries(props)) {
        if (value === undefined || value === null || value === false) continue;
        const attr = propToAttr(key);
        const str = Array.isArray(value) ? value.join(" ") : String(value);
        out += ` ${attr}="${escapeAttr(str)}"`;
      }
      if (isVoidElement(n.tagName)) {
        out += " />";
      } else {
        out += ">";
        out += hastToHtml(n.children);
        out += ``;
      }
    }
  }
  return out;
}

function propToAttr(key: string): string {
  if (key === "className") return "class";
  if (key === "htmlFor") return "for";
  return key.toLowerCase();
}

function escapeHtmlText(s: string): string {
  return s.replace(/[&<>]/g, (c) => (c === "&" ? "&" : c === "<" ? "<" : ">"));
}

function escapeAttr(s: string): string {
  return s.replace(/[&"]/g, (c) => (c === "&" ? "&" : """));
}

function isVoidElement(tag: string): boolean {
  return ["br", "hr", "img", "input", "meta", "link"].includes(tag);
}

/**
 * Extract language from a `` class list. TipTap writes
 * `class="language-python"`; anything else falls back to plain-text.
 */
function extractLang(code: Element): string {
  const cls = code.getAttribute("class") || "";
  const match = cls.match(/language-([\w+-]+)/i);
  return match ? match[1] : "";
}

/**
 * Prepend `N` to every Shiki `.line`
 * wrapper. Matches the editor's widget markup exactly so the stylesheet can
 * target a single selector in both views.
 */
function injectLineNumbers(codeChildren: ElementContent[]): void {
  let n = 0;
  for (const child of codeChildren) {
    if (child.type !== "element") continue;
    // Shiki's HAST stores the class as a plain `class` string (not
    // `className`), so inspect both just in case the config changes.
    const props = child.properties ?? {};
    const classValue = props.class ?? props.className;
    const classTokens = Array.isArray(classValue)
      ? classValue.map(String)
      : typeof classValue === "string"
        ? classValue.split(/\s+/)
        : [];
    if (!classTokens.includes("line")) continue;
    n += 1;
    const numSpan: HastElement = {
      type: "element",
      tagName: "span",
      properties: { class: "code-line-num", "aria-hidden": "true" },
      children: [{ type: "text", value: String(n) }],
    };
    child.children.unshift(numSpan);
  }
}

export const highlightCodeTransformer: Transformer = {
  name: "highlightCode",
  async apply(document) {
    const blocks = [...document.querySelectorAll("pre > code")];
    if (blocks.length === 0) return;

    const highlighter = await getSharedHighlighter();

    for (const codeEl of blocks) {
      const pre = codeEl.parentElement;
      if (!pre || pre.tagName.toLowerCase() !== "pre") continue;
      if (pre.classList.contains("mermaid")) continue;

      const source = codeEl.textContent || "";
      if (!source) continue;

      // Fall back to auto-detection when the block has no explicit language,
      // so language-less blocks (the common case in authored docs) still get
      // highlighted. Same logic runs in the editor for an identical result.
      const rawLang = extractLang(codeEl as unknown as Element);
      const lang = normalizeLang(rawLang) || detectShikiLang(source);

      let hast: HastRoot;
      try {
        hast = highlighter.codeToHast(source, {
          lang: isSupportedLang(lang) ? lang : "text",
          themes: SHIKI_THEMES,
          defaultColor: false,
        }) as HastRoot;
      } catch {
        continue;
      }

      const shikiPre = hast.children.find((c): c is HastElement => c.type === "element" && c.tagName === "pre");
      const shikiCode = shikiPre?.children.find((c): c is HastElement => c.type === "element" && c.tagName === "code");
      if (!shikiCode) continue;

      injectLineNumbers(shikiCode.children);
      codeEl.innerHTML = hastToHtml(shikiCode.children);

      pre.classList.add("shiki");
      if (isSupportedLang(lang)) {
        pre.setAttribute("data-lang", lang);
      } else {
        pre.removeAttribute("data-lang");
      }
      const shikiStyle = shikiPre?.properties?.style;
      if (typeof shikiStyle === "string" && shikiStyle) {
        const existing = pre.getAttribute("style") || "";
        pre.setAttribute("style", existing ? `${existing};${shikiStyle}` : shikiStyle);
      }
    }
  },
};