tfrere HF Staff commited on
Commit
34087bd
·
1 Parent(s): 8d245e0

fix: server-side bibliography formatting and citation linking

Browse files

- Add format-bibliography.ts: formats CSL-JSON entries server-side
using citation-js, independent of client-side renderedHtml
- Publisher now reads Y.Map("citations") and Y.Map("settings")
directly from the Y.Doc to build bibliography at publish time
- Inline citations are converted to clickable links pointing to
their bibliography entry (with :target highlighting)
- Supports both numeric (ieee/vancouver) and author-date styles
- Falls back to client-side renderedHtml if server formatting fails
- Add CSS for .citation-inline and .csl-entry:target

Made-with: Cursor

backend/src/publisher/format-bibliography.ts ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Server-side bibliography formatting using citation-js.
3
+ *
4
+ * Formats CSL-JSON entries into HTML bibliography, independent
5
+ * of any client-side rendering.
6
+ */
7
+
8
+ import { Cite } from "@citation-js/core";
9
+ import "@citation-js/plugin-bibtex";
10
+ import "@citation-js/plugin-csl";
11
+
12
+ /**
13
+ * Format an array of CSL-JSON entries into an HTML bibliography string.
14
+ */
15
+ export async function formatBibliographyServer(
16
+ entries: any[],
17
+ style: string = "apa"
18
+ ): Promise<string> {
19
+ if (!entries.length) return "";
20
+
21
+ const cite = new Cite(entries);
22
+ const html = cite.format("bibliography", {
23
+ format: "html",
24
+ template: style,
25
+ lang: "en-US",
26
+ });
27
+
28
+ return html as string;
29
+ }
backend/src/publisher/html-renderer.ts CHANGED
@@ -34,21 +34,31 @@ export interface PublishMeta {
34
  pdfUrl?: string;
35
  }
36
 
 
 
 
 
 
 
37
  /**
38
  * Render TipTap JSON document into a complete, self-contained HTML page.
39
  */
40
  export function renderArticleHTML(
41
  json: Record<string, unknown>,
42
  meta: PublishMeta,
43
- css: PublishCSS
 
 
44
  ): string {
45
  const extensions = getServerExtensions();
46
 
47
  // Extract bibliography HTML before generateHTML escapes it as an attribute
48
- const biblioHtml = extractBibliographyHtml(json);
 
 
49
 
50
  const bodyHtml = generateHTML(json as any, extensions);
51
- const enrichedBody = postProcess(bodyHtml, biblioHtml);
52
 
53
  const authorNames = meta.authors.map((a) => a.name);
54
  const authorsStr = authorNames.join(", ");
@@ -415,6 +425,32 @@ details[data-component="accordion"] > .accordion-content > *:last-child { margin
415
  .citation a { color: rgba(0, 0, 0, 0.6); text-decoration: underline; }
416
  .citation.short { margin-top: -4px; }
417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  .references-block .footer-heading { margin: 0; }
419
  .references-block ol { padding: 0 0 0 15px; }
420
  .references-block li { margin-bottom: 1em; }
@@ -443,6 +479,7 @@ details[data-component="accordion"] > .accordion-content > *:last-child { margin
443
  [data-theme="dark"] .footer { border-top-color: rgba(255, 255, 255, 0.15); color: rgba(200, 200, 200, 0.8); }
444
  [data-theme="dark"] .citation { background: rgba(255, 255, 255, 0.04); border-color: rgba(255, 255, 255, 0.15); color: rgba(200, 200, 200, 1); }
445
  [data-theme="dark"] .citation a { color: rgba(255, 255, 255, 0.75); }
 
446
  [data-theme="dark"] .footer a { color: var(--primary-color); }
447
  [data-theme="dark"] .template-credit p { color: rgba(200, 200, 200, 0.6); }
448
  [data-theme="dark"] .template-credit a { color: rgba(200, 200, 200, 0.7); border-bottom-color: rgba(255, 255, 255, 0.2); }
@@ -894,7 +931,7 @@ function extractBibliographyHtml(json: Record<string, unknown>): string {
894
  * - Transform accordion divs into <details><summary>
895
  * - Transform htmlEmbed into iframes
896
  */
897
- function postProcess(html: string, biblioHtml: string): string {
898
  let result = html;
899
 
900
  // Accordion: div[data-component="accordion"] → <details><summary>
@@ -911,15 +948,46 @@ function postProcess(html: string, biblioHtml: string): string {
911
  }
912
  );
913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  // Bibliography: replace empty placeholder with the pre-extracted HTML.
915
- // renderedHtml was cleared before generateHTML, so the attribute is now
916
- // empty and the div is clean (no > chars breaking the regex).
917
  result = result.replace(
918
  /<div([^>]*data-type="bibliography"[^>]*)><\/div>/gi,
919
  (_match, attrs: string) => {
920
  const cleanAttrs = attrs.replace(/\s*renderedhtml="[^"]*"/i, "");
921
  if (biblioHtml) {
922
- return `<div${cleanAttrs}><h2 class="bibliography-title">References</h2><div class="bibliography-content">${biblioHtml}</div></div>`;
 
 
 
 
 
923
  }
924
  return `<div${cleanAttrs}><p class="bibliography-empty">No citations</p></div>`;
925
  }
@@ -977,6 +1045,23 @@ function postProcess(html: string, biblioHtml: string): string {
977
  return result;
978
  }
979
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980
  function escapeHtml(str: string): string {
981
  return str
982
  .replace(/&/g, "&amp;")
 
34
  pdfUrl?: string;
35
  }
36
 
37
+ export interface CitationData {
38
+ entries: any[];
39
+ orderedKeys: string[];
40
+ style: string;
41
+ }
42
+
43
  /**
44
  * Render TipTap JSON document into a complete, self-contained HTML page.
45
  */
46
  export function renderArticleHTML(
47
  json: Record<string, unknown>,
48
  meta: PublishMeta,
49
+ css: PublishCSS,
50
+ citationData?: CitationData,
51
+ serverBiblioHtml?: string
52
  ): string {
53
  const extensions = getServerExtensions();
54
 
55
  // Extract bibliography HTML before generateHTML escapes it as an attribute
56
+ // (fallback to client-side renderedHtml if server formatting not available)
57
+ const clientBiblioHtml = extractBibliographyHtml(json);
58
+ const biblioHtml = serverBiblioHtml || clientBiblioHtml;
59
 
60
  const bodyHtml = generateHTML(json as any, extensions);
61
+ const enrichedBody = postProcess(bodyHtml, biblioHtml, citationData);
62
 
63
  const authorNames = meta.authors.map((a) => a.name);
64
  const authorsStr = authorNames.join(", ");
 
425
  .citation a { color: rgba(0, 0, 0, 0.6); text-decoration: underline; }
426
  .citation.short { margin-top: -4px; }
427
 
428
+ .citation-inline {
429
+ color: var(--primary-color, #958df1);
430
+ text-decoration: none;
431
+ text-decoration-line: underline;
432
+ text-decoration-color: transparent;
433
+ text-underline-offset: 2px;
434
+ cursor: pointer;
435
+ transition: text-decoration-color 0.15s;
436
+ }
437
+ .citation-inline:hover {
438
+ text-decoration-color: currentColor;
439
+ }
440
+
441
+ .bibliography-content .csl-entry {
442
+ margin-bottom: 0.75em;
443
+ padding-left: 1.5em;
444
+ text-indent: -1.5em;
445
+ font-size: 0.9em;
446
+ line-height: 1.5;
447
+ }
448
+ .bibliography-content .csl-entry:target {
449
+ background: rgba(149, 141, 241, 0.1);
450
+ border-radius: 4px;
451
+ padding: 4px 4px 4px 1.5em;
452
+ }
453
+
454
  .references-block .footer-heading { margin: 0; }
455
  .references-block ol { padding: 0 0 0 15px; }
456
  .references-block li { margin-bottom: 1em; }
 
479
  [data-theme="dark"] .footer { border-top-color: rgba(255, 255, 255, 0.15); color: rgba(200, 200, 200, 0.8); }
480
  [data-theme="dark"] .citation { background: rgba(255, 255, 255, 0.04); border-color: rgba(255, 255, 255, 0.15); color: rgba(200, 200, 200, 1); }
481
  [data-theme="dark"] .citation a { color: rgba(255, 255, 255, 0.75); }
482
+ [data-theme="dark"] .bibliography-content .csl-entry:target { background: rgba(149, 141, 241, 0.15); }
483
  [data-theme="dark"] .footer a { color: var(--primary-color); }
484
  [data-theme="dark"] .template-credit p { color: rgba(200, 200, 200, 0.6); }
485
  [data-theme="dark"] .template-credit a { color: rgba(200, 200, 200, 0.7); border-bottom-color: rgba(255, 255, 255, 0.2); }
 
931
  * - Transform accordion divs into <details><summary>
932
  * - Transform htmlEmbed into iframes
933
  */
934
+ function postProcess(html: string, biblioHtml: string, citationData?: CitationData): string {
935
  let result = html;
936
 
937
  // Accordion: div[data-component="accordion"] → <details><summary>
 
948
  }
949
  );
950
 
951
+ // Inline citations: replace <span data-type="citation" ...>label</span>
952
+ // with proper numbered links pointing to bibliography entries.
953
+ const NUMERIC_STYLES = new Set(["ieee", "vancouver"]);
954
+ const isNumeric = citationData ? NUMERIC_STYLES.has(citationData.style) : false;
955
+ const citationKeyOrder: string[] = [];
956
+
957
+ result = result.replace(
958
+ /<span[^>]*data-type="citation"[^>]*>[\s\S]*?<\/span>/g,
959
+ (match) => {
960
+ const keyMatch = match.match(/(?:\skey="|data-key=")([^"]*)"/);
961
+ if (!keyMatch) return match;
962
+ const key = keyMatch[1];
963
+
964
+ if (!citationKeyOrder.includes(key)) citationKeyOrder.push(key);
965
+ const idx = citationKeyOrder.indexOf(key) + 1;
966
+
967
+ // Extract the label text from the original HTML
968
+ const labelMatch = match.match(/>([^<]*)<\/span>/);
969
+ const originalLabel = labelMatch ? labelMatch[1] : `[${key}]`;
970
+
971
+ // For numeric styles, always use number; for author-date, keep original label
972
+ const displayLabel = isNumeric ? `[${idx}]` : originalLabel;
973
+
974
+ return `<a href="#ref-${escapeHtml(key)}" class="citation-inline" id="cite-${escapeHtml(key)}-${idx}" title="${escapeHtml(key)}">${displayLabel}</a>`;
975
+ }
976
+ );
977
+
978
  // Bibliography: replace empty placeholder with the pre-extracted HTML.
979
+ // Add id anchors to bibliography entries for citation linking.
 
980
  result = result.replace(
981
  /<div([^>]*data-type="bibliography"[^>]*)><\/div>/gi,
982
  (_match, attrs: string) => {
983
  const cleanAttrs = attrs.replace(/\s*renderedhtml="[^"]*"/i, "");
984
  if (biblioHtml) {
985
+ // Wrap bibliography HTML with entry anchors
986
+ let enrichedBiblio = biblioHtml;
987
+ if (citationData && citationData.orderedKeys.length > 0) {
988
+ enrichedBiblio = addBibliographyAnchors(biblioHtml, citationData.orderedKeys);
989
+ }
990
+ return `<div${cleanAttrs}><h2 class="bibliography-title">References</h2><div class="bibliography-content">${enrichedBiblio}</div></div>`;
991
  }
992
  return `<div${cleanAttrs}><p class="bibliography-empty">No citations</p></div>`;
993
  }
 
1045
  return result;
1046
  }
1047
 
1048
+ /**
1049
+ * Add id anchors to bibliography entries so inline citations can link to them.
1050
+ * citation-js outputs entries as <div class="csl-entry"> elements.
1051
+ * We wrap each one with an id matching the citation key.
1052
+ */
1053
+ function addBibliographyAnchors(html: string, orderedKeys: string[]): string {
1054
+ let idx = 0;
1055
+ return html.replace(
1056
+ /<div class="csl-entry">/g,
1057
+ () => {
1058
+ const key = orderedKeys[idx] || `entry-${idx}`;
1059
+ idx++;
1060
+ return `<div class="csl-entry" id="ref-${key}">`;
1061
+ }
1062
+ );
1063
+ }
1064
+
1065
  function escapeHtml(str: string): string {
1066
  return str
1067
  .replace(/&/g, "&amp;")
backend/src/publisher/index.ts CHANGED
@@ -10,7 +10,8 @@ import { fileURLToPath } from "url";
10
  import * as Y from "yjs";
11
  import { DATA_DIR as SHARED_DATA_DIR, docPath } from "../utils.js";
12
  import { TiptapTransformer } from "@hocuspocus/transformer";
13
- import { renderArticleHTML, type PublishMeta } from "./html-renderer.js";
 
14
  import { getServerExtensions } from "./extensions.js";
15
  import { isPdfEnabled, generatePdfAndThumbnail } from "./pdf-generator.js";
16
  import {
@@ -187,6 +188,44 @@ function extractFromYDoc(ydoc: Y.Doc): {
187
  };
188
  }
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  export interface PublishResult {
191
  htmlUrl: string | null;
192
  pdfUrl: string | null;
@@ -213,6 +252,9 @@ export async function publishDocument(docName: string, token?: string): Promise<
213
  // Extract content
214
  const { json, frontmatter, authors, affiliations } = extractFromYDoc(ydoc);
215
 
 
 
 
216
  const meta: PublishMeta = {
217
  title: (frontmatter.title as string) || docName,
218
  subtitle: (frontmatter.subtitle as string) || undefined,
@@ -243,8 +285,19 @@ export async function publishDocument(docName: string, token?: string): Promise<
243
  console.log("[publish] CSS variables length:", css.variables.length, "article length:", css.article.length);
244
  console.log("[publish] Meta:", JSON.stringify(meta));
245
 
 
 
 
 
 
 
 
 
 
 
 
246
  // First pass: generate HTML with og:image + PDF link pre-injected
247
- let html = renderArticleHTML(json, meta, css);
248
  console.log("[publish] Generated HTML length:", html.length);
249
 
250
  // Generate PDF + thumbnail from the HTML
@@ -261,7 +314,7 @@ export async function publishDocument(docName: string, token?: string): Promise<
261
  // Clear URLs from meta if generation failed
262
  if (!thumbnail) delete meta.ogImage;
263
  if (!pdf) delete meta.pdfUrl;
264
- html = renderArticleHTML(json, meta, css);
265
  }
266
  }
267
 
@@ -277,7 +330,7 @@ export async function publishDocument(docName: string, token?: string): Promise<
277
  else delete localMeta.ogImage;
278
  if (pdf) localMeta.pdfUrl = `/published/${safeDirName}/article.pdf`;
279
  else delete localMeta.pdfUrl;
280
- const localHtml = renderArticleHTML(json, localMeta, css);
281
 
282
  fsWrite(join(publishDir, "index.html"), localHtml);
283
  if (pdf) fsWrite(join(publishDir, "article.pdf"), pdf);
 
10
  import * as Y from "yjs";
11
  import { DATA_DIR as SHARED_DATA_DIR, docPath } from "../utils.js";
12
  import { TiptapTransformer } from "@hocuspocus/transformer";
13
+ import { renderArticleHTML, type PublishMeta, type CitationData } from "./html-renderer.js";
14
+ import { formatBibliographyServer } from "./format-bibliography.js";
15
  import { getServerExtensions } from "./extensions.js";
16
  import { isPdfEnabled, generatePdfAndThumbnail } from "./pdf-generator.js";
17
  import {
 
188
  };
189
  }
190
 
191
+ /**
192
+ * Extract citation entries and document order from Y.Doc.
193
+ */
194
+ function extractCitationsFromYDoc(ydoc: Y.Doc, json: Record<string, unknown>): CitationData {
195
+ const citationsMap = ydoc.getMap("citations");
196
+ const settingsMap = ydoc.getMap("settings");
197
+
198
+ const style = (settingsMap.get("citationStyle") as string) || "apa";
199
+
200
+ // Collect all citation keys in document order
201
+ const orderedKeys: string[] = [];
202
+ function walkCitations(nodes: any[]) {
203
+ for (const node of nodes) {
204
+ if (node.type === "citation" && node.attrs?.key) {
205
+ const k = node.attrs.key as string;
206
+ if (!orderedKeys.includes(k)) orderedKeys.push(k);
207
+ }
208
+ if (Array.isArray(node.content)) walkCitations(node.content);
209
+ }
210
+ }
211
+ const content = (json as any)?.content;
212
+ if (Array.isArray(content)) walkCitations(content);
213
+
214
+ // Build entries list in document order
215
+ const entries: any[] = [];
216
+ for (const key of orderedKeys) {
217
+ const entry = citationsMap.get(key);
218
+ if (entry) {
219
+ entries.push({ ...(entry as any), id: key });
220
+ }
221
+ }
222
+
223
+ console.log("[publish] Citations from Y.Doc:", orderedKeys.length, "keys, style:", style);
224
+ console.log("[publish] Citation map size:", citationsMap.size);
225
+
226
+ return { entries, orderedKeys, style };
227
+ }
228
+
229
  export interface PublishResult {
230
  htmlUrl: string | null;
231
  pdfUrl: string | null;
 
252
  // Extract content
253
  const { json, frontmatter, authors, affiliations } = extractFromYDoc(ydoc);
254
 
255
+ // Extract citation data from Y.Map for server-side bibliography formatting
256
+ const citationData = extractCitationsFromYDoc(ydoc, json);
257
+
258
  const meta: PublishMeta = {
259
  title: (frontmatter.title as string) || docName,
260
  subtitle: (frontmatter.subtitle as string) || undefined,
 
285
  console.log("[publish] CSS variables length:", css.variables.length, "article length:", css.article.length);
286
  console.log("[publish] Meta:", JSON.stringify(meta));
287
 
288
+ // Format bibliography server-side from citation entries
289
+ let biblioHtml = "";
290
+ if (citationData.entries.length > 0) {
291
+ try {
292
+ biblioHtml = await formatBibliographyServer(citationData.entries, citationData.style);
293
+ console.log("[publish] Bibliography formatted server-side:", citationData.entries.length, "entries");
294
+ } catch (err) {
295
+ console.error("[publish] Server-side bibliography formatting failed:", err);
296
+ }
297
+ }
298
+
299
  // First pass: generate HTML with og:image + PDF link pre-injected
300
+ let html = renderArticleHTML(json, meta, css, citationData, biblioHtml);
301
  console.log("[publish] Generated HTML length:", html.length);
302
 
303
  // Generate PDF + thumbnail from the HTML
 
314
  // Clear URLs from meta if generation failed
315
  if (!thumbnail) delete meta.ogImage;
316
  if (!pdf) delete meta.pdfUrl;
317
+ html = renderArticleHTML(json, meta, css, citationData, biblioHtml);
318
  }
319
  }
320
 
 
330
  else delete localMeta.ogImage;
331
  if (pdf) localMeta.pdfUrl = `/published/${safeDirName}/article.pdf`;
332
  else delete localMeta.pdfUrl;
333
+ const localHtml = renderArticleHTML(json, localMeta, css, citationData, biblioHtml);
334
 
335
  fsWrite(join(publishDir, "index.html"), localHtml);
336
  if (pdf) fsWrite(join(publishDir, "article.pdf"), pdf);