thibaud frere commited on
Commit
2903163
·
1 Parent(s): f43302a

update references

Browse files
app/.astro/astro/content.d.ts CHANGED
@@ -1,238 +0,0 @@
1
- declare module 'astro:content' {
2
- interface Render {
3
- '.mdx': Promise<{
4
- Content: import('astro').MarkdownInstance<{}>['Content'];
5
- headings: import('astro').MarkdownHeading[];
6
- remarkPluginFrontmatter: Record<string, any>;
7
- components: import('astro').MDXInstance<{}>['components'];
8
- }>;
9
- }
10
- }
11
-
12
- declare module 'astro:content' {
13
- interface RenderResult {
14
- Content: import('astro/runtime/server/index.js').AstroComponentFactory;
15
- headings: import('astro').MarkdownHeading[];
16
- remarkPluginFrontmatter: Record<string, any>;
17
- }
18
- interface Render {
19
- '.md': Promise<RenderResult>;
20
- }
21
-
22
- export interface RenderedContent {
23
- html: string;
24
- metadata?: {
25
- imagePaths: Array<string>;
26
- [key: string]: unknown;
27
- };
28
- }
29
- }
30
-
31
- declare module 'astro:content' {
32
- type Flatten<T> = T extends { [K: string]: infer U } ? U : never;
33
-
34
- export type CollectionKey = keyof AnyEntryMap;
35
- export type CollectionEntry<C extends CollectionKey> = Flatten<AnyEntryMap[C]>;
36
-
37
- export type ContentCollectionKey = keyof ContentEntryMap;
38
- export type DataCollectionKey = keyof DataEntryMap;
39
-
40
- type AllValuesOf<T> = T extends any ? T[keyof T] : never;
41
- type ValidContentEntrySlug<C extends keyof ContentEntryMap> = AllValuesOf<
42
- ContentEntryMap[C]
43
- >['slug'];
44
-
45
- /** @deprecated Use `getEntry` instead. */
46
- export function getEntryBySlug<
47
- C extends keyof ContentEntryMap,
48
- E extends ValidContentEntrySlug<C> | (string & {}),
49
- >(
50
- collection: C,
51
- // Note that this has to accept a regular string too, for SSR
52
- entrySlug: E,
53
- ): E extends ValidContentEntrySlug<C>
54
- ? Promise<CollectionEntry<C>>
55
- : Promise<CollectionEntry<C> | undefined>;
56
-
57
- /** @deprecated Use `getEntry` instead. */
58
- export function getDataEntryById<C extends keyof DataEntryMap, E extends keyof DataEntryMap[C]>(
59
- collection: C,
60
- entryId: E,
61
- ): Promise<CollectionEntry<C>>;
62
-
63
- export function getCollection<C extends keyof AnyEntryMap, E extends CollectionEntry<C>>(
64
- collection: C,
65
- filter?: (entry: CollectionEntry<C>) => entry is E,
66
- ): Promise<E[]>;
67
- export function getCollection<C extends keyof AnyEntryMap>(
68
- collection: C,
69
- filter?: (entry: CollectionEntry<C>) => unknown,
70
- ): Promise<CollectionEntry<C>[]>;
71
-
72
- export function getEntry<
73
- C extends keyof ContentEntryMap,
74
- E extends ValidContentEntrySlug<C> | (string & {}),
75
- >(entry: {
76
- collection: C;
77
- slug: E;
78
- }): E extends ValidContentEntrySlug<C>
79
- ? Promise<CollectionEntry<C>>
80
- : Promise<CollectionEntry<C> | undefined>;
81
- export function getEntry<
82
- C extends keyof DataEntryMap,
83
- E extends keyof DataEntryMap[C] | (string & {}),
84
- >(entry: {
85
- collection: C;
86
- id: E;
87
- }): E extends keyof DataEntryMap[C]
88
- ? Promise<DataEntryMap[C][E]>
89
- : Promise<CollectionEntry<C> | undefined>;
90
- export function getEntry<
91
- C extends keyof ContentEntryMap,
92
- E extends ValidContentEntrySlug<C> | (string & {}),
93
- >(
94
- collection: C,
95
- slug: E,
96
- ): E extends ValidContentEntrySlug<C>
97
- ? Promise<CollectionEntry<C>>
98
- : Promise<CollectionEntry<C> | undefined>;
99
- export function getEntry<
100
- C extends keyof DataEntryMap,
101
- E extends keyof DataEntryMap[C] | (string & {}),
102
- >(
103
- collection: C,
104
- id: E,
105
- ): E extends keyof DataEntryMap[C]
106
- ? Promise<DataEntryMap[C][E]>
107
- : Promise<CollectionEntry<C> | undefined>;
108
-
109
- /** Resolve an array of entry references from the same collection */
110
- export function getEntries<C extends keyof ContentEntryMap>(
111
- entries: {
112
- collection: C;
113
- slug: ValidContentEntrySlug<C>;
114
- }[],
115
- ): Promise<CollectionEntry<C>[]>;
116
- export function getEntries<C extends keyof DataEntryMap>(
117
- entries: {
118
- collection: C;
119
- id: keyof DataEntryMap[C];
120
- }[],
121
- ): Promise<CollectionEntry<C>[]>;
122
-
123
- export function render<C extends keyof AnyEntryMap>(
124
- entry: AnyEntryMap[C][string],
125
- ): Promise<RenderResult>;
126
-
127
- export function reference<C extends keyof AnyEntryMap>(
128
- collection: C,
129
- ): import('astro/zod').ZodEffects<
130
- import('astro/zod').ZodString,
131
- C extends keyof ContentEntryMap
132
- ? {
133
- collection: C;
134
- slug: ValidContentEntrySlug<C>;
135
- }
136
- : {
137
- collection: C;
138
- id: keyof DataEntryMap[C];
139
- }
140
- >;
141
- // Allow generic `string` to avoid excessive type errors in the config
142
- // if `dev` is not running to update as you edit.
143
- // Invalid collection names will be caught at build time.
144
- export function reference<C extends string>(
145
- collection: C,
146
- ): import('astro/zod').ZodEffects<import('astro/zod').ZodString, never>;
147
-
148
- type ReturnTypeOrOriginal<T> = T extends (...args: any[]) => infer R ? R : T;
149
- type InferEntrySchema<C extends keyof AnyEntryMap> = import('astro/zod').infer<
150
- ReturnTypeOrOriginal<Required<ContentConfig['collections'][C]>['schema']>
151
- >;
152
-
153
- type ContentEntryMap = {
154
- "chapters": {
155
- "00_abstract.mdx": {
156
- id: "00_abstract.mdx";
157
- slug: "00_abstract";
158
- body: string;
159
- collection: "chapters";
160
- data: any
161
- } & { render(): Render[".mdx"] };
162
- "01_introduction.mdx": {
163
- id: "01_introduction.mdx";
164
- slug: "01_introduction";
165
- body: string;
166
- collection: "chapters";
167
- data: any
168
- } & { render(): Render[".mdx"] };
169
- "02_classic_robotics.mdx": {
170
- id: "02_classic_robotics.mdx";
171
- slug: "02_classic_robotics";
172
- body: string;
173
- collection: "chapters";
174
- data: any
175
- } & { render(): Render[".mdx"] };
176
- "03_reinforcement_learning.mdx": {
177
- id: "03_reinforcement_learning.mdx";
178
- slug: "03_reinforcement_learning";
179
- body: string;
180
- collection: "chapters";
181
- data: any
182
- } & { render(): Render[".mdx"] };
183
- "04_imitation_learning.mdx": {
184
- id: "04_imitation_learning.mdx";
185
- slug: "04_imitation_learning";
186
- body: string;
187
- collection: "chapters";
188
- data: any
189
- } & { render(): Render[".mdx"] };
190
- "06_next_directions.mdx": {
191
- id: "06_next_directions.mdx";
192
- slug: "06_next_directions";
193
- body: string;
194
- collection: "chapters";
195
- data: any
196
- } & { render(): Render[".mdx"] };
197
- "07_conclusions.mdx": {
198
- id: "07_conclusions.mdx";
199
- slug: "07_conclusions";
200
- body: string;
201
- collection: "chapters";
202
- data: any
203
- } & { render(): Render[".mdx"] };
204
- "A_foreword.mdx": {
205
- id: "A_foreword.mdx";
206
- slug: "a_foreword";
207
- body: string;
208
- collection: "chapters";
209
- data: any
210
- } & { render(): Render[".mdx"] };
211
- };
212
- "test": {
213
- "converted-sample.mdx": {
214
- id: "converted-sample.mdx";
215
- slug: "converted-sample";
216
- body: string;
217
- collection: "test";
218
- data: any
219
- } & { render(): Render[".mdx"] };
220
- };
221
-
222
- };
223
-
224
- type DataEntryMap = {
225
- "assets": {
226
- "data/somedata": {
227
- id: "data/somedata";
228
- collection: "assets";
229
- data: any
230
- };
231
- };
232
-
233
- };
234
-
235
- type AnyEntryMap = ContentEntryMap & DataEntryMap;
236
-
237
- export type ContentConfig = never;
238
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/astro.config.mjs CHANGED
@@ -56,7 +56,7 @@ export default defineConfig({
56
  rehypePlugins: [
57
  rehypeSlug,
58
  [rehypeAutolinkHeadings, { behavior: 'wrap' }],
59
- rehypeKatex,
60
  [rehypeCitation, {
61
  bibliography: 'src/content/bibliography.bib',
62
  linkCitations: true,
 
56
  rehypePlugins: [
57
  rehypeSlug,
58
  [rehypeAutolinkHeadings, { behavior: 'wrap' }],
59
+ [rehypeKatex, { trust: true }],
60
  [rehypeCitation, {
61
  bibliography: 'src/content/bibliography.bib',
62
  linkCitations: true,
app/scripts/latex-to-markdown/filters/equation-ids.lua ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --[[
2
+ Pandoc Lua filter to add IDs to equations using KaTeX \htmlId syntax
3
+
4
+ This filter processes display math equations and inline math that contain
5
+ \label{} commands, and wraps them with \htmlId{clean-id}{content} for KaTeX.
6
+
7
+ Requirements:
8
+ - KaTeX renderer with trust: true option
9
+ - Equations with \label{} commands in LaTeX
10
+ --]]
11
+
12
+ -- Function to clean identifier strings (remove prefixes and colons)
13
+ function clean_identifier(id_str)
14
+ if id_str and type(id_str) == "string" then
15
+ -- Remove common prefixes and replace colons with dashes
16
+ local clean = id_str
17
+ :gsub("^(eq|equation):", "") -- Remove eq: prefix
18
+ :gsub(":", "-") -- Replace colons with dashes
19
+ :gsub("[^a-zA-Z0-9_-]", "-") -- Replace other problematic chars
20
+ :gsub("-+", "-") -- Collapse multiple dashes
21
+ :gsub("^-", "") -- Remove leading dash
22
+ :gsub("-$", "") -- Remove trailing dash
23
+
24
+ -- Ensure we don't have empty identifiers
25
+ if clean == "" then
26
+ clean = id_str:gsub(":", "-")
27
+ end
28
+
29
+ return clean
30
+ end
31
+ return id_str
32
+ end
33
+
34
+ -- Process Math elements (both inline and display)
35
+ function Math(el)
36
+ local math_content = el.text
37
+
38
+ -- Look for \label{...} commands in the math content
39
+ local label_match = math_content:match("\\label%{([^}]+)%}")
40
+
41
+ if label_match then
42
+ -- Clean the identifier
43
+ local clean_id = clean_identifier(label_match)
44
+
45
+ -- Remove the \label{} command from the math content
46
+ local clean_math = math_content:gsub("\\label%{[^}]+%}", "")
47
+
48
+ -- Clean up any extra whitespace or line breaks that might remain
49
+ clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
50
+
51
+ -- Remove problematic equation environments that don't work well with \htmlId
52
+ clean_math = clean_math:gsub("\\begin%{equation%}", ""):gsub("\\end%{equation%}", "")
53
+ clean_math = clean_math:gsub("\\begin%{align%}", ""):gsub("\\end%{align%}", "")
54
+ clean_math = clean_math:gsub("\\begin%{equation%*%}", ""):gsub("\\end%{equation%*%}", "")
55
+ clean_math = clean_math:gsub("\\begin%{align%*%}", ""):gsub("\\end%{align%*%}", "")
56
+
57
+ -- Clean up any remaining whitespace
58
+ clean_math = clean_math:gsub("%s*$", ""):gsub("^%s*", "")
59
+
60
+ -- Wrap the equation content with \htmlId{}
61
+ local new_math = "\\htmlId{" .. clean_id .. "}{" .. clean_math .. "}"
62
+
63
+ -- Return new Math element with the updated content
64
+ return pandoc.Math(el.mathtype, new_math)
65
+ end
66
+
67
+ -- Return unchanged if no label found
68
+ return el
69
+ end
70
+
71
+ -- Optional: Process RawInline elements that might contain LaTeX math
72
+ function RawInline(el)
73
+ if el.format == "latex" or el.format == "tex" then
74
+ local content = el.text
75
+
76
+ -- Look for equation environments with labels
77
+ local label_match = content:match("\\label%{([^}]+)%}")
78
+
79
+ if label_match then
80
+ local clean_id = clean_identifier(label_match)
81
+
82
+ -- For raw LaTeX, we might need different handling
83
+ -- This is a simplified approach - adjust based on your needs
84
+ local clean_content = content:gsub("\\label%{[^}]+%}", "")
85
+
86
+ if clean_content:match("\\begin%{equation") or clean_content:match("\\begin%{align") then
87
+ -- For equation environments, we might need to wrap differently
88
+ -- This depends on how your KaTeX setup handles equation environments
89
+ return pandoc.RawInline(el.format, clean_content)
90
+ end
91
+ end
92
+ end
93
+
94
+ return el
95
+ end
96
+
97
+ -- Optional: Process RawBlock elements for display equations
98
+ function RawBlock(el)
99
+ if el.format == "latex" or el.format == "tex" then
100
+ local content = el.text
101
+
102
+ -- Look for equation environments with labels
103
+ local label_match = content:match("\\label%{([^}]+)%}")
104
+
105
+ if label_match then
106
+ local clean_id = clean_identifier(label_match)
107
+ local clean_content = content:gsub("\\label%{[^}]+%}", "")
108
+
109
+ -- For block equations, we might want to preserve the structure
110
+ -- but add the htmlId functionality
111
+ return pandoc.RawBlock(el.format, clean_content)
112
+ end
113
+ end
114
+
115
+ return el
116
+ end
app/scripts/latex-to-markdown/latex-converter.mjs CHANGED
@@ -6,6 +6,7 @@ import { join, dirname, basename } from 'path';
6
  import { fileURLToPath } from 'url';
7
  import { cleanBibliography } from './bib-cleaner.mjs';
8
  import { postProcessMarkdown } from './post-processor.mjs';
 
9
 
10
  const __filename = fileURLToPath(import.meta.url);
11
  const __dirname = dirname(__filename);
@@ -167,6 +168,11 @@ function preprocessLatexFile(inputFile, outputDir) {
167
  }
168
  }
169
 
 
 
 
 
 
170
  // Write the preprocessed file
171
  writeFileSync(tempFile, content);
172
  return tempFile;
@@ -226,7 +232,8 @@ export function convertLatexToMarkdown(inputFile, outputDir) {
226
  const mediaDir = join(outputDir, 'assets', 'image');
227
  ensureDirectory(mediaDir);
228
  const inputDir = dirname(inputFile);
229
- const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" -o "${outputFile}"`;
 
230
 
231
  console.log(` Running: ${pandocCommand}`);
232
  execSync(pandocCommand, { stdio: 'pipe' });
 
6
  import { fileURLToPath } from 'url';
7
  import { cleanBibliography } from './bib-cleaner.mjs';
8
  import { postProcessMarkdown } from './post-processor.mjs';
9
+ import { preprocessLatexReferences } from './reference-preprocessor.mjs';
10
 
11
  const __filename = fileURLToPath(import.meta.url);
12
  const __dirname = dirname(__filename);
 
168
  }
169
  }
170
 
171
+ // Apply reference preprocessing AFTER input inclusion to ensure all references are captured
172
+ console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
173
+ const referenceResult = preprocessLatexReferences(content);
174
+ content = referenceResult.content;
175
+
176
  // Write the preprocessed file
177
  writeFileSync(tempFile, content);
178
  return tempFile;
 
232
  const mediaDir = join(outputDir, 'assets', 'image');
233
  ensureDirectory(mediaDir);
234
  const inputDir = dirname(inputFile);
235
+ const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
236
+ const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
237
 
238
  console.log(` Running: ${pandocCommand}`);
239
  execSync(pandocCommand, { stdio: 'pipe' });
app/scripts/latex-to-markdown/output/main.md CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-markdown/output/main.mdx CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-markdown/post-processor.mjs CHANGED
@@ -197,6 +197,65 @@ function injectCodeSnippets(content, inputDir = null) {
197
  return processedContent;
198
  }
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  /**
201
  * Main post-processing function that applies all cleanup steps
202
  * @param {string} content - Raw Markdown content from Pandoc
@@ -215,6 +274,8 @@ export function postProcessMarkdown(content, inputDir = null) {
215
  processedContent = fixMathCommands(processedContent);
216
  processedContent = fixUnicodeIssues(processedContent);
217
  processedContent = fixMultilineMath(processedContent);
 
 
218
 
219
  // Inject code snippets if input directory is provided
220
  if (inputDir) {
 
197
  return processedContent;
198
  }
199
 
200
+ /**
201
+ * Fix all attributes that still contain colons (href, data-reference, id)
202
+ * @param {string} content - Markdown content
203
+ * @returns {string} - Cleaned content
204
+ */
205
+ function fixAllAttributes(content) {
206
+ console.log(' 🔗 Fixing all attributes with colons...');
207
+
208
+ let fixedCount = 0;
209
+
210
+ // Fix href attributes containing colons
211
+ content = content.replace(/href="([^"]*):([^"]*)"/g, (match, before, after) => {
212
+ fixedCount++;
213
+ return `href="${before}-${after}"`;
214
+ });
215
+
216
+ // Fix data-reference attributes containing colons
217
+ content = content.replace(/data-reference="([^"]*):([^"]*)"/g, (match, before, after) => {
218
+ fixedCount++;
219
+ return `data-reference="${before}-${after}"`;
220
+ });
221
+
222
+ // Fix id attributes containing colons (like in ResponsiveImage components)
223
+ content = content.replace(/id="([^"]*):([^"]*)"/g, (match, before, after) => {
224
+ fixedCount++;
225
+ return `id="${before}-${after}"`;
226
+ });
227
+
228
+ if (fixedCount > 0) {
229
+ console.log(` ✅ Fixed ${fixedCount} attribute(s) with colons`);
230
+ }
231
+
232
+ return content;
233
+ }
234
+
235
+ /**
236
+ * Fix link text content that still contains colons
237
+ * @param {string} content - Markdown content
238
+ * @returns {string} - Cleaned content
239
+ */
240
+ function fixLinkTextContent(content) {
241
+ console.log(' 📝 Fixing link text content with colons...');
242
+
243
+ let fixedCount = 0;
244
+
245
+ // Fix text content within links that contain references with colons
246
+ // Pattern: <a ...>[text:content]</a>
247
+ const cleanedContent = content.replace(/<a([^>]*)>\[([^:]*):([^\]]*)\]<\/a>/g, (match, attributes, before, after) => {
248
+ fixedCount++;
249
+ return `<a${attributes}>[${before}-${after}]</a>`;
250
+ });
251
+
252
+ if (fixedCount > 0) {
253
+ console.log(` ✅ Fixed ${fixedCount} link text(s) with colons`);
254
+ }
255
+
256
+ return cleanedContent;
257
+ }
258
+
259
  /**
260
  * Main post-processing function that applies all cleanup steps
261
  * @param {string} content - Raw Markdown content from Pandoc
 
274
  processedContent = fixMathCommands(processedContent);
275
  processedContent = fixUnicodeIssues(processedContent);
276
  processedContent = fixMultilineMath(processedContent);
277
+ processedContent = fixAllAttributes(processedContent);
278
+ processedContent = fixLinkTextContent(processedContent);
279
 
280
  // Inject code snippets if input directory is provided
281
  if (inputDir) {
app/scripts/latex-to-markdown/reference-preprocessor.mjs ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * LaTeX Reference Preprocessor
5
+ *
6
+ * This module cleans up LaTeX references BEFORE Pandoc conversion to ensure
7
+ * consistent, MDX-compatible identifiers throughout the document.
8
+ *
9
+ * What it does:
10
+ * - Removes prefixes from labels: \label{sec:intro} → \label{sec-intro}
11
+ * - Updates corresponding refs: \ref{sec:intro} → \ref{sec-intro}
12
+ * - Handles all reference types: sec:, fig:, eq:, table:, etc.
13
+ * - Maintains consistency between labels and references
14
+ */
15
+
16
+ /**
17
+ * Extract all references from LaTeX content
18
+ * @param {string} content - LaTeX content
19
+ * @returns {Object} - Object with labels and refs arrays
20
+ */
21
+ function extractReferences(content) {
22
+ const references = {
23
+ labels: new Set(),
24
+ refs: new Set(),
25
+ cites: new Set()
26
+ };
27
+
28
+ // Find all \label{...} commands
29
+ const labelMatches = content.matchAll(/\\label\{([^}]+)\}/g);
30
+ for (const match of labelMatches) {
31
+ references.labels.add(match[1]);
32
+ }
33
+
34
+ // Find all \ref{...} commands
35
+ const refMatches = content.matchAll(/\\ref\{([^}]+)\}/g);
36
+ for (const match of refMatches) {
37
+ references.refs.add(match[1]);
38
+ }
39
+
40
+ // Find all \cite{...} commands (already handled in existing code but included for completeness)
41
+ const citeMatches = content.matchAll(/\\cite[tp]?\{([^}]+)\}/g);
42
+ for (const match of citeMatches) {
43
+ // Handle multiple citations: \cite{ref1,ref2,ref3}
44
+ const citations = match[1].split(',').map(cite => cite.trim());
45
+ citations.forEach(cite => references.cites.add(cite));
46
+ }
47
+
48
+ return references;
49
+ }
50
+
51
+ /**
52
+ * Create clean identifier mapping
53
+ * @param {Object} references - References object from extractReferences
54
+ * @returns {Map} - Mapping from original to clean identifiers
55
+ */
56
+ function createCleanMapping(references) {
57
+ const mapping = new Map();
58
+
59
+ // Create mapping for all unique identifiers
60
+ const allIdentifiers = new Set([
61
+ ...references.labels,
62
+ ...references.refs
63
+ ]);
64
+
65
+ for (const id of allIdentifiers) {
66
+ // Remove common prefixes and replace colons with dashes
67
+ let cleanId = id
68
+ .replace(/^(sec|section|ch|chapter|fig|figure|eq|equation|tab|table|lst|listing|app|appendix):/gi, '')
69
+ .replace(/:/g, '-')
70
+ .replace(/[^a-zA-Z0-9_-]/g, '-') // Replace any other problematic characters
71
+ .replace(/-+/g, '-') // Collapse multiple dashes
72
+ .replace(/^-|-$/g, ''); // Remove leading/trailing dashes
73
+
74
+ // Ensure we don't have empty identifiers
75
+ if (!cleanId) {
76
+ cleanId = id.replace(/:/g, '-');
77
+ }
78
+
79
+ mapping.set(id, cleanId);
80
+ }
81
+
82
+ return mapping;
83
+ }
84
+
85
+ /**
86
+ * Apply mapping to LaTeX content
87
+ * @param {string} content - Original LaTeX content
88
+ * @param {Map} mapping - Identifier mapping
89
+ * @returns {string} - Cleaned LaTeX content
90
+ */
91
+ function applyMapping(content, mapping) {
92
+ let cleanedContent = content;
93
+ let changesCount = 0;
94
+
95
+ // Apply mapping to all identifiers
96
+ for (const [original, clean] of mapping) {
97
+ if (original !== clean) {
98
+ // Replace \label{original} with \label{clean}
99
+ const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
100
+ const labelMatches = cleanedContent.match(labelRegex);
101
+ if (labelMatches) {
102
+ cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
103
+ changesCount += labelMatches.length;
104
+ }
105
+
106
+ // Replace \ref{original} with \ref{clean}
107
+ const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
108
+ const refMatches = cleanedContent.match(refRegex);
109
+ if (refMatches) {
110
+ cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
111
+ changesCount += refMatches.length;
112
+ }
113
+ }
114
+ }
115
+
116
+ return { content: cleanedContent, changesCount };
117
+ }
118
+
119
+ /**
120
+ * Escape special regex characters
121
+ * @param {string} string - String to escape
122
+ * @returns {string} - Escaped string
123
+ */
124
+ function escapeRegex(string) {
125
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
126
+ }
127
+
128
+ /**
129
+ * Main preprocessing function
130
+ * @param {string} latexContent - Original LaTeX content
131
+ * @returns {Object} - Result with cleaned content and statistics
132
+ */
133
+ export function preprocessLatexReferences(latexContent) {
134
+ console.log('🔧 Preprocessing LaTeX references for MDX compatibility...');
135
+
136
+ // 1. Extract all references
137
+ const references = extractReferences(latexContent);
138
+
139
+ console.log(` 📊 Found: ${references.labels.size} labels, ${references.refs.size} refs`);
140
+
141
+ // 2. Create clean mapping
142
+ const mapping = createCleanMapping(references);
143
+
144
+ // 3. Apply mapping
145
+ const result = applyMapping(latexContent, mapping);
146
+
147
+ if (result.changesCount > 0) {
148
+ console.log(` ✅ Cleaned ${result.changesCount} reference(s) for MDX compatibility`);
149
+
150
+ // Show some examples of changes
151
+ let exampleCount = 0;
152
+ for (const [original, clean] of mapping) {
153
+ if (original !== clean && exampleCount < 3) {
154
+ console.log(` ${original} → ${clean}`);
155
+ exampleCount++;
156
+ }
157
+ }
158
+ if (mapping.size > 3) {
159
+ console.log(` ... and ${mapping.size - 3} more`);
160
+ }
161
+ } else {
162
+ console.log(' ℹ️ No reference cleanup needed');
163
+ }
164
+
165
+ return {
166
+ content: result.content,
167
+ changesCount: result.changesCount,
168
+ mapping: mapping,
169
+ references: references
170
+ };
171
+ }
app/src/content/article.mdx CHANGED
The diff for this file is too large to render. See raw diff