thibaud frere commited on
Commit
5897d8c
·
1 Parent(s): 2903163

update links and highlights

Browse files
app/scripts/latex-to-markdown/input/sections/01_introduction.tex CHANGED
@@ -32,7 +32,7 @@ This tutorial is structured as follows:
32
  \begin{itemize}
33
  \item Section~\ref{sec:classical} reviews classical robotics foundations, introducing the limitations of dynamics-based approaches to robotics.
34
  \item Section~\ref{sec:learning-rl} elaborates on the limitations of dynamics-based methods, and introduce RL as a practical approach to solve robotics problems, considering its upsides and potential limitations.
35
- \item Section~\ref{sec:learning-imitation} further describes robot learning techniques that aim at solving single-tasks learning, leveraging BC techniques to autonomously reproduce specific expert demonstrations.
36
  \item Section~\ref{sec:learning-foundation} presents recent contributions on developing generalist models for robotics applications, by learning from large corpora of multi-task \& multi-robot data (\emph{robotics foundation models}).
37
  % \item Lastly, Section~\ref{sec:extensions} covers emerging directions in robot learning research, introducing recent works in post-training techniques for robotics foundation models, as well as recent works in world models for robotics.
38
  \end{itemize}
 
32
  \begin{itemize}
33
  \item Section~\ref{sec:classical} reviews classical robotics foundations, introducing the limitations of dynamics-based approaches to robotics.
34
  \item Section~\ref{sec:learning-rl} elaborates on the limitations of dynamics-based methods, and introduce RL as a practical approach to solve robotics problems, considering its upsides and potential limitations.
35
+ \item Section~\ref{sec:robot-imitation-learning} further describes robot learning techniques that aim at solving single-tasks learning, leveraging BC techniques to autonomously reproduce specific expert demonstrations.
36
  \item Section~\ref{sec:learning-foundation} presents recent contributions on developing generalist models for robotics applications, by learning from large corpora of multi-task \& multi-robot data (\emph{robotics foundation models}).
37
  % \item Lastly, Section~\ref{sec:extensions} covers emerging directions in robot learning research, introducing recent works in post-training techniques for robotics foundation models, as well as recent works in world models for robotics.
38
  \end{itemize}
app/scripts/latex-to-markdown/input/sections/04_imitation_learning.tex CHANGED
@@ -1,5 +1,5 @@
1
  \section{Robot (Imitation) Learning}
2
- \label{sec:learning-imitation}
3
 
4
  \epigraph{\emph{The best material model for a cat is another, or preferably the same cat}}{Norbert Wiener}
5
 
 
1
  \section{Robot (Imitation) Learning}
2
+ \label{sec:robot-imitation-learning}
3
 
4
  \epigraph{\emph{The best material model for a cat is another, or preferably the same cat}}{Norbert Wiener}
5
 
app/scripts/latex-to-markdown/latex-converter.mjs CHANGED
@@ -233,7 +233,7 @@ export function convertLatexToMarkdown(inputFile, outputDir) {
233
  ensureDirectory(mediaDir);
234
  const inputDir = dirname(inputFile);
235
  const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
236
- const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
237
 
238
  console.log(` Running: ${pandocCommand}`);
239
  execSync(pandocCommand, { stdio: 'pipe' });
 
233
  ensureDirectory(mediaDir);
234
  const inputDir = dirname(inputFile);
235
  const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
236
+ const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars+raw_html --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
237
 
238
  console.log(` Running: ${pandocCommand}`);
239
  execSync(pandocCommand, { stdio: 'pipe' });
app/scripts/latex-to-markdown/mdx-converter.mjs CHANGED
@@ -323,6 +323,56 @@ function transformStyledSpans(content) {
323
  * @param {string} content - MDX content
324
  * @returns {string} - Content with transformed links
325
  */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  function transformReferenceLinks(content) {
327
  console.log(' 🔗 Transforming reference links...');
328
 
@@ -543,6 +593,8 @@ function processMdxContent(content, latexContent = '') {
543
  processedContent = transformImages(processedContent);
544
  processedContent = transformStyledSpans(processedContent);
545
  processedContent = transformReferenceLinks(processedContent);
 
 
546
  processedContent = cleanFigcaptionNewlines(processedContent);
547
 
548
  // Add component imports at the end
 
323
  * @param {string} content - MDX content
324
  * @returns {string} - Content with transformed links
325
  */
326
+ function fixHtmlEscaping(content) {
327
+ console.log(' 🔧 Fixing HTML escaping in spans...');
328
+
329
+ let fixedCount = 0;
330
+
331
+ // Fix escaped HTML in anchor spans with various escaping patterns
332
+ // Pattern 1: \<span id="..." style="..."\>\</span\>
333
+ content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => {
334
+ fixedCount++;
335
+ // Fix common style issues like "position- absolute;" -> "position: absolute;"
336
+ const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
337
+ return `<span id="${id}" style="${cleanStyle}"></span>`;
338
+ });
339
+
340
+ // Pattern 2: \<span class="..."\>...\</span\>
341
+ content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => {
342
+ fixedCount++;
343
+ // Remove numbering like (1), (2), (3) from highlight spans
344
+ let cleanText = text;
345
+ if (className === 'highlight') {
346
+ cleanText = text.replace(/^\(\d+\)\s*/, '');
347
+ }
348
+ return `<span class="${className}">${cleanText}</span>`;
349
+ });
350
+
351
+
352
+ if (fixedCount > 0) {
353
+ console.log(` ✅ Fixed ${fixedCount} escaped span(s)`);
354
+ }
355
+
356
+ return content;
357
+ }
358
+
359
+ function cleanHighlightNumbering(content) {
360
+ console.log(' 🔢 Removing numbering from highlight spans...');
361
+
362
+ let cleanedCount = 0;
363
+ // Clean numbering from non-escaped highlight spans too
364
+ content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => {
365
+ cleanedCount++;
366
+ return `<span class="highlight">${text}</span>`;
367
+ });
368
+
369
+ if (cleanedCount > 0) {
370
+ console.log(` ✅ Removed numbering from ${cleanedCount} highlight span(s)`);
371
+ }
372
+
373
+ return content;
374
+ }
375
+
376
  function transformReferenceLinks(content) {
377
  console.log(' 🔗 Transforming reference links...');
378
 
 
593
  processedContent = transformImages(processedContent);
594
  processedContent = transformStyledSpans(processedContent);
595
  processedContent = transformReferenceLinks(processedContent);
596
+ processedContent = fixHtmlEscaping(processedContent);
597
+ processedContent = cleanHighlightNumbering(processedContent);
598
  processedContent = cleanFigcaptionNewlines(processedContent);
599
 
600
  // Add component imports at the end
app/scripts/latex-to-markdown/old-latex-converter/README.md DELETED
@@ -1,107 +0,0 @@
1
- # Convertisseur LaTeX vers Markdown
2
-
3
- Conversion robuste de projets LaTeX complexes vers Markdown/MDX pour Astro.
4
-
5
- ## 🚀 Usage rapide
6
-
7
- ```bash
8
- # Conversion standard
9
- node scripts/latex-converter/index.mjs
10
-
11
- # Avec nettoyage du dossier de sortie
12
- node scripts/latex-converter/index.mjs --clean
13
-
14
- # Chemins personnalisés
15
- node scripts/latex-converter/index.mjs \
16
- --input=../tools/latex-to-markdown/input \
17
- --output=src/content \
18
- --clean
19
- ```
20
-
21
- ## 📁 Architecture
22
-
23
- ```
24
- scripts/latex-converter/
25
- ├── index.mjs # Point d'entrée principal
26
- ├── config.mjs # Configuration et mappings
27
- ├── preprocessor.mjs # Préprocesseur LaTeX
28
- ├── bibliography-cleaner.mjs # Nettoyeur de bibliographie
29
- ├── converter.mjs # Convertisseur principal
30
- └── README.md # Documentation
31
- ```
32
-
33
- ## 🔧 Fonctionnalités
34
-
35
- ### ✅ Ce qui est géré
36
- - **412+ commandes personnalisées** (math, text, projet-spécifique)
37
- - **Environnements custom** (`tldr`, `callout`, `finding`)
38
- - **41 figures** avec organisation par chapitre
39
- - **2247 entrées bibliographiques** avec nettoyage automatique
40
- - **Citations** et références croisées
41
- - **Structure MDX** compatible Astro
42
-
43
- ### 🛠️ Transformations automatiques
44
-
45
- #### Commandes LaTeX → Markdown
46
- ```latex
47
- \lerobot → **LeRobot**
48
- \lerobotdataset → `LeRobotDataset`
49
- \huggingface → 🤗 **Hugging Face**
50
- \eg → e.g.,
51
- \X → \mathcal{X}
52
- ```
53
-
54
- #### Environnements → Callouts
55
- ```latex
56
- \begin{tldr}
57
- Content here
58
- \end{tldr}
59
- ```
60
-
61
- ```markdown
62
- > **TL;DR**
63
- > Content here
64
- ```
65
-
66
- #### Bibliographie
67
- - `{{Title}}` → `Title` (suppression doubles accolades)
68
- - `\&` → `&` (déséchappement)
69
- - Nettoyage général du formatting
70
-
71
- ## 📊 Statistiques exemple
72
-
73
- ```
74
- ⏱️ Time: 1.02s
75
- 📄 Files: 9 sections converties
76
- 🖼️ Figures: 41 images copiées
77
- 📚 Citations: Detection automatique
78
- 🔧 Commands replaced: 34 transformations
79
- 📦 Environments processed: 4 environnements
80
- 📚 Bibliography: 159 entries, 403 fixes
81
- ```
82
-
83
- ## 🎯 Résultat
84
-
85
- Structure finale dans `src/content/`:
86
- ```
87
- src/content/
88
- ├── article.mdx # Article principal avec imports
89
- ├── bibliography.bib # Bibliographie nettoyée
90
- ├── chapters/ # Sections converties
91
- │ ├── 00_abstract.mdx
92
- │ ├── 01_introduction.mdx
93
- │ └── ...
94
- └── assets/image/ # Figures organisées
95
- ├── ch1/
96
- ├── ch2/
97
- └── ...
98
- ```
99
-
100
- ## ⚠️ Prérequis
101
-
102
- - **Pandoc** installé (`brew install pandoc`)
103
- - Node.js avec support ESM
104
-
105
- ## 🔍 Debugging
106
-
107
- Les warnings sont normaux pour les sections avec math complexe non supporté par Pandoc. Le convertisseur continue et produit un résultat utilisable.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/bibliography-cleaner.mjs DELETED
@@ -1,123 +0,0 @@
1
- /**
2
- * Nettoyeur de bibliographie - Corrige les doubles accolades et problèmes de formatting
3
- */
4
-
5
- export class BibliographyCleaner {
6
- constructor() {
7
- this.stats = {
8
- entriesProcessed: 0,
9
- doubleAccoladesFixed: 0,
10
- escapedCharsFixed: 0,
11
- mathExpressionsFixed: 0
12
- };
13
- }
14
-
15
- cleanContent(content) {
16
- let cleaned = content;
17
-
18
- // Count entries
19
- this.stats.entriesProcessed = (content.match(/@\w+\{/g) || []).length;
20
-
21
- // Fix double accolades
22
- cleaned = this.fixDoubleAccolades(cleaned);
23
-
24
- // Fix escaped characters
25
- cleaned = this.fixEscapedCharacters(cleaned);
26
-
27
- // Fix malformed math expressions
28
- cleaned = this.fixMathExpressions(cleaned);
29
-
30
- // General cleanup
31
- cleaned = this.generalCleanup(cleaned);
32
-
33
- return cleaned;
34
- }
35
-
36
- fixDoubleAccolades(content) {
37
- let fixed = content;
38
- let fixCount = 0;
39
-
40
- fixed = fixed.replace(/\{\{([^}]+)\}\}/g, (match, inner) => {
41
- fixCount++;
42
-
43
- // Keep accolades for important terms
44
- if (/^[A-Z][A-Z0-9]*$/.test(inner) || // Acronyms like "API", "ML"
45
- /^[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*$/.test(inner) || // Proper nouns
46
- inner.includes('++') || // Languages like "C++"
47
- inner.includes('$') // Math
48
- ) {
49
- return `{${inner}}`;
50
- }
51
-
52
- return inner;
53
- });
54
-
55
- this.stats.doubleAccoladesFixed = fixCount;
56
- return fixed;
57
- }
58
-
59
- fixEscapedCharacters(content) {
60
- let fixed = content;
61
- let fixCount = 0;
62
-
63
- const replacements = [
64
- [/\\&/g, '&'],
65
- [/\\\$/g, '$'],
66
- [/\\%/g, '%'],
67
- [/\\#/g, '#'],
68
- [/\\_/g, '_']
69
- ];
70
-
71
- for (const [pattern, replacement] of replacements) {
72
- const matches = fixed.match(pattern);
73
- if (matches) {
74
- fixCount += matches.length;
75
- fixed = fixed.replace(pattern, replacement);
76
- }
77
- }
78
-
79
- this.stats.escapedCharsFixed = fixCount;
80
- return fixed;
81
- }
82
-
83
- fixMathExpressions(content) {
84
- let fixed = content;
85
- let fixCount = 0;
86
-
87
- // Fix specific problematic patterns
88
- const mathFixes = [
89
- // ${$\pi_$}0$ → $\pi_0$
90
- [/\$\{\$\\pi_\$\}([0-9]+)\$/g, '$\\pi_$1$'],
91
- // ${$something$}text$ → $something_text$
92
- [/\$\{\$([^}]+)\$\}([^$]*)\$/g, '$$$1_$2$$'],
93
- // Fix other malformed patterns
94
- [/\$\{([^}]+)\}\$/g, '$$$1$$'],
95
- [/\$([^$]*)\\\$([^$]*)\$/g, '$$$1$2$$']
96
- ];
97
-
98
- for (const [pattern, replacement] of mathFixes) {
99
- const matches = fixed.match(pattern);
100
- if (matches) {
101
- fixCount += matches.length;
102
- fixed = fixed.replace(pattern, replacement);
103
- }
104
- }
105
-
106
- this.stats.mathExpressionsFixed = fixCount;
107
- return fixed;
108
- }
109
-
110
- generalCleanup(content) {
111
- let cleaned = content;
112
-
113
- // Normalize whitespace
114
- cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
115
- cleaned = cleaned.trim() + '\n';
116
-
117
- return cleaned;
118
- }
119
-
120
- getStats() {
121
- return this.stats;
122
- }
123
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/config.mjs DELETED
@@ -1,59 +0,0 @@
1
- /**
2
- * Configuration et mappings pour la conversion LaTeX vers Markdown
3
- */
4
-
5
- export const COMMAND_MAPPINGS = {
6
- // Math shortcuts
7
- 'X': '\\mathcal{X}',
8
- 'Z': '\\mathcal{Z}',
9
- 'G': '\\mathcal{G}',
10
- 'D': '\\mathcal{D}',
11
- 'F': '\\mathcal{F}',
12
- 'R': '\\mathcal{R}',
13
-
14
- // Text commands
15
- 'eg': 'e.g.,',
16
- 'ie': 'i.e.,',
17
- 'versus': 'vs.',
18
- 'wrt': 'w.r.t.',
19
- 'etc': 'etc.',
20
-
21
- // Project-specific
22
- 'lerobot': '**LeRobot**',
23
- 'lerobotdataset': '`LeRobotDataset`',
24
- 'huggingface': '🤗 **Hugging Face**',
25
-
26
- // Functions
27
- 'qfunction': 'Q-function',
28
- 'qopt': 'Q^*'
29
- };
30
-
31
- export const ENVIRONMENT_MAPPINGS = {
32
- 'tldr': {
33
- start: '> **TL;DR**\n> ',
34
- end: '\n',
35
- type: 'callout'
36
- },
37
- 'callout': {
38
- start: '> **Note**\n> ',
39
- end: '\n',
40
- type: 'callout'
41
- },
42
- 'finding': {
43
- start: '> **🔍 Finding**: ',
44
- end: '\n',
45
- type: 'finding'
46
- }
47
- };
48
-
49
- export const PANDOC_OPTIONS = [
50
- '--from=latex',
51
- '--to=markdown',
52
- '--wrap=preserve',
53
- '--markdown-headings=atx'
54
- ];
55
-
56
- export const DEFAULT_PATHS = {
57
- input: '../tools/latex-to-markdown/input',
58
- output: 'src/content'
59
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/converter.mjs DELETED
@@ -1,456 +0,0 @@
1
- /**
2
- * Convertisseur principal LaTeX vers Markdown
3
- */
4
-
5
- import { spawn } from 'node:child_process';
6
- import { promises as fs } from 'node:fs';
7
- import { resolve, dirname, basename, join } from 'node:path';
8
-
9
- import { LaTeXPreprocessor } from './preprocessor.mjs';
10
- import { RobustLaTeXPreprocessor } from './robust-preprocessor.mjs';
11
- import { BibliographyCleaner } from './bibliography-cleaner.mjs';
12
- import { DEFAULT_PATHS, PANDOC_OPTIONS } from './config.mjs';
13
-
14
- export class LaTeXConverter {
15
- constructor() {
16
- this.preprocessor = new LaTeXPreprocessor();
17
- this.robustPreprocessor = new RobustLaTeXPreprocessor();
18
- this.bibCleaner = new BibliographyCleaner();
19
- this.stats = {
20
- totalFiles: 0,
21
- totalFigures: 0,
22
- totalCitations: 0,
23
- conversionTime: 0
24
- };
25
- this.warnings = [];
26
- this.errors = [];
27
- }
28
-
29
- async convert(inputDir, outputDir, options = {}) {
30
- const startTime = Date.now();
31
-
32
- console.log('🚀 LaTeX to Markdown Converter');
33
- console.log(`📁 Input: ${inputDir}`);
34
- console.log(`📁 Output: ${outputDir}`);
35
-
36
- try {
37
- // Setup
38
- await this.setupOutput(outputDir, options.clean);
39
-
40
- // Convert sections
41
- await this.convertSections(inputDir, outputDir);
42
-
43
- // Handle assets
44
- await this.handleAssets(inputDir, outputDir);
45
-
46
- // Create main article
47
- await this.createMainArticle(outputDir);
48
-
49
- // Generate report
50
- this.stats.conversionTime = Date.now() - startTime;
51
- this.generateReport();
52
-
53
- console.log('🎉 Conversion completed successfully!');
54
- return true;
55
-
56
- } catch (error) {
57
- this.errors.push(`Conversion failed: ${error.message}`);
58
- throw error;
59
- }
60
- }
61
-
62
- async setupOutput(outputDir, clean = false) {
63
- if (clean) {
64
- console.log('🧹 Cleaning output directory...');
65
- await fs.rm(outputDir, { recursive: true, force: true });
66
- }
67
-
68
- await fs.mkdir(outputDir, { recursive: true });
69
- await fs.mkdir(join(outputDir, 'chapters'), { recursive: true });
70
- await fs.mkdir(join(outputDir, 'assets', 'image'), { recursive: true });
71
- }
72
-
73
- async convertSections(inputDir, outputDir) {
74
- console.log('\n📄 Converting sections...');
75
-
76
- const sectionsDir = join(inputDir, 'sections');
77
- const outputChaptersDir = join(outputDir, 'chapters');
78
-
79
- try {
80
- const files = await fs.readdir(sectionsDir);
81
- const texFiles = files.filter(f => f.endsWith('.tex'));
82
-
83
- for (const file of texFiles) {
84
- const inputPath = join(sectionsDir, file);
85
- const outputPath = join(outputChaptersDir, file.replace('.tex', '.mdx'));
86
-
87
- console.log(` Converting ${file}...`);
88
- await this.convertSingleFile(inputPath, outputPath);
89
- }
90
-
91
- this.stats.totalFiles = texFiles.length;
92
-
93
- } catch (error) {
94
- this.errors.push(`Section conversion failed: ${error.message}`);
95
- }
96
- }
97
-
98
- async convertSingleFile(inputPath, outputPath) {
99
- try {
100
- // Read and preprocess with robust preprocessor
101
- let content = await fs.readFile(inputPath, 'utf-8');
102
- content = this.robustPreprocessor.preprocessContent(content, basename(inputPath));
103
-
104
- // Create temp file for Pandoc
105
- const tempPath = inputPath + '.temp';
106
- await fs.writeFile(tempPath, content);
107
-
108
- // Convert with Pandoc
109
- const pandocArgs = [tempPath, '-o', outputPath, ...PANDOC_OPTIONS];
110
- await this.runPandoc(pandocArgs);
111
-
112
- // Cleanup
113
- await fs.unlink(tempPath);
114
-
115
- // Post-process
116
- await this.postProcessFile(outputPath);
117
-
118
- } catch (error) {
119
- this.warnings.push(`Failed to convert ${basename(inputPath)}: ${error.message}`);
120
- }
121
- }
122
-
123
- async runPandoc(args) {
124
- return new Promise((resolve, reject) => {
125
- const child = spawn('pandoc', args, {
126
- stdio: ['pipe', 'pipe', 'pipe'],
127
- shell: false
128
- });
129
-
130
- let stderr = '';
131
- child.stderr.on('data', (data) => {
132
- stderr += data.toString();
133
- });
134
-
135
- child.on('error', reject);
136
- child.on('exit', (code) => {
137
- if (code === 0) {
138
- resolve();
139
- } else {
140
- reject(new Error(`Pandoc failed: ${stderr}`));
141
- }
142
- });
143
- });
144
- }
145
-
146
- fixMalformedMath(content) {
147
- let fixed = content;
148
-
149
- // Fix problematic expressions like ${$\pi_$}0$
150
- fixed = fixed.replace(/\$\{\$([^$}]+)\$\}([^$]*)\$/g, '$$$1_{$2}$$');
151
-
152
- // Fix nested math delimiters
153
- fixed = fixed.replace(/\$\$([^$]*)\$([^$]*)\$([^$]*)\$\$/g, '$$$1 $2 $3$$');
154
-
155
- // Fix incomplete math expressions
156
- fixed = fixed.replace(/\$([^$]*)\{([^}]*)\$([^$]*)\$/g, '$$$1\\{$2\\}$3$$');
157
-
158
- // Fix math with unescaped braces
159
- fixed = fixed.replace(/\$([^$]*)\{([^}]*)\}([^$]*)\$/g, '$$$1\\{$2\\}$3$$');
160
-
161
- // Fix common pi expressions
162
- fixed = fixed.replace(/\$\\pi_\$([0-9]+)\$/g, '$\\pi_$1$');
163
- fixed = fixed.replace(/\$\{\\pi_\}([0-9]+)\$/g, '$\\pi_$1$');
164
-
165
- // Fix doubled dollar signs (but preserve display math)
166
- fixed = fixed.replace(/\$\$\$+/g, '$$');
167
-
168
- // Ensure proper spacing around math
169
- fixed = fixed.replace(/([a-zA-Z])\$([^$]+)\$([a-zA-Z])/g, '$1 $$$2$$ $3');
170
-
171
- return fixed;
172
- }
173
-
174
- fixMDXUrls(content) {
175
- let fixed = content;
176
-
177
- // Fix all escaped markdown that should be unescaped for MDX
178
- fixed = fixed.replace(/\\\*/g, '*');
179
- fixed = fixed.replace(/\\\[/g, '[');
180
- fixed = fixed.replace(/\\\]/g, ']');
181
- fixed = fixed.replace(/\\\(/g, '(');
182
- fixed = fixed.replace(/\\\)/g, ')');
183
- fixed = fixed.replace(/\\>/g, '>');
184
- fixed = fixed.replace(/\\!/g, '!');
185
-
186
- // Fix angle bracket URLs that are MDX-incompatible
187
- fixed = fixed.replace(/\*\*<(https?:\/\/[^>]+)>\*\*/g, '**[$1]($1)**');
188
- fixed = fixed.replace(/<(https?:\/\/[^>]+)>/g, '[$1]($1)');
189
-
190
- // Fix malformed math expressions with escaped braces
191
- fixed = fixed.replace(/\\\{/g, '{');
192
- fixed = fixed.replace(/\\\}/g, '}');
193
-
194
- // Escape all braces in math expressions for MDX compatibility
195
- fixed = fixed.replace(/\$([^$]*)\$/g, (match, mathContent) => {
196
- const escaped = mathContent.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
197
- return `$${escaped}$`;
198
- });
199
-
200
- fixed = fixed.replace(/\$\$([^$]*)\$\$/g, (match, mathContent) => {
201
- const escaped = mathContent.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
202
- return `$$${escaped}$$`;
203
- });
204
-
205
- // Fix Section references that are malformed
206
- fixed = fixed.replace(/Section\s+([a-zA-Z-]+:[a-zA-Z0-9-]+)\\/g, 'the referenced figure');
207
- fixed = fixed.replace(/Figure\s+Section\s+([a-zA-Z-]+:[a-zA-Z0-9-]+)\\/g, 'the referenced figure');
208
-
209
- return fixed;
210
- }
211
-
212
- async postProcessFile(filePath) {
213
- try {
214
- let content = await fs.readFile(filePath, 'utf-8');
215
-
216
- // Fix common issues
217
- content = content.replace(/\\\\#/g, '#');
218
- content = content.replace(/\\\\!/g, '!');
219
- content = content.replace(/\\\\\*/g, '*');
220
-
221
- // Fix citations
222
- content = content.replace(/\\citep\{([^}]+)\}/g, '[@$1]');
223
- content = content.replace(/\\citet\{([^}]+)\}/g, '@$1');
224
- content = content.replace(/\\cite\{([^}]+)\}/g, '[@$1]');
225
-
226
- // Remove section labels from headers
227
- content = content.replace(/^(#{1,6}.*?)\s*\{#[^}]+\}/gm, '$1');
228
-
229
- // Fix complex LaTeX references like [\[sec:xxx\]](#sec:xxx){reference-type="ref" reference="sec:xxx"}
230
- content = content.replace(/\[\\?\[([^\]]+)\\?\]\]\(#[^)]+\)\{[^}]*reference[^}]*\}/g, 'Section $1');
231
-
232
- // Fix simple references [\[ref\]](#ref)
233
- content = content.replace(/\[\\?\[([^\]]+)\\?\]\]\(#[^)]+\)/g, '$1');
234
-
235
- // Fix remaining malformed references like "Section Section sec:classical\"
236
- content = content.replace(/Section\s+Section\s+([^\\]+)\\/g, 'Section $1');
237
- content = content.replace(/Section\s+Section\s+([^\\]+)/g, 'Section $1');
238
-
239
- // Remove remaining LaTeX labels and references
240
- content = content.replace(/\\label\{[^}]+\}/g, '');
241
- content = content.replace(/\\ref\{[^}]+\}/g, '[Reference]');
242
-
243
- // Clean up section references with colons (be more specific)
244
- content = content.replace(/Section\s+sec:([a-zA-Z-]+)/g, 'the following section');
245
-
246
- // Fix broken section references that got mangled
247
- content = content.replace(/Section\s+secs[a-zA-Z]*\s+/g, 'The following section ');
248
- content = content.replace(/Section\s+sec[a-zA-Z]*\s+/g, 'The following section ');
249
-
250
- // Count citations
251
- const citations = content.match(/\[@[^\]]+\]/g) || [];
252
- this.stats.totalCitations += citations.length;
253
-
254
- // Fix malformed math expressions
255
- content = this.fixMalformedMath(content);
256
-
257
- // Fix MDX-incompatible URLs (post-pandoc)
258
- content = this.fixMDXUrls(content);
259
-
260
- // Final cleanup
261
- content = content.replace(/\n{3,}/g, '\n\n');
262
- content = content.replace(/\\texttt\{([^}]+)\}/g, '`$1`');
263
- content = content.replace(/\\textbf\{([^}]+)\}/g, '**$1**');
264
- content = content.replace(/\\emph\{([^}]+)\}/g, '*$1*');
265
- content = content.trim();
266
-
267
- await fs.writeFile(filePath, content);
268
-
269
- } catch (error) {
270
- this.warnings.push(`Post-processing failed for ${basename(filePath)}: ${error.message}`);
271
- }
272
- }
273
-
274
- async handleAssets(inputDir, outputDir) {
275
- console.log('\n🖼️ Handling assets...');
276
-
277
- // Copy figures
278
- try {
279
- const figuresInputDir = join(inputDir, 'figures');
280
- const assetsOutputDir = join(outputDir, 'assets', 'image');
281
-
282
- await this.copyDirectoryRecursive(figuresInputDir, assetsOutputDir);
283
- this.stats.totalFigures = await this.countFiles(assetsOutputDir, /\.(png|jpg|jpeg|pdf|svg)$/i);
284
-
285
- console.log(` 📊 Copied ${this.stats.totalFigures} figures`);
286
- } catch (error) {
287
- this.warnings.push(`Could not copy figures: ${error.message}`);
288
- }
289
-
290
- // Handle bibliography
291
- try {
292
- const bibPath = join(inputDir, 'main.bib');
293
- const outputBibPath = join(outputDir, 'bibliography.bib');
294
-
295
- // Copy and clean bibliography
296
- let bibContent = await fs.readFile(bibPath, 'utf-8');
297
- bibContent = this.bibCleaner.cleanContent(bibContent);
298
- await fs.writeFile(outputBibPath, bibContent);
299
-
300
- const bibStats = this.bibCleaner.getStats();
301
- console.log(` 📚 Bibliography: ${bibStats.entriesProcessed} entries, ${bibStats.doubleAccoladesFixed} fixes, ${bibStats.mathExpressionsFixed} math fixes`);
302
-
303
- } catch (error) {
304
- this.warnings.push(`Could not handle bibliography: ${error.message}`);
305
- }
306
- }
307
-
308
- async copyDirectoryRecursive(src, dest) {
309
- await fs.mkdir(dest, { recursive: true });
310
- const entries = await fs.readdir(src, { withFileTypes: true });
311
-
312
- for (const entry of entries) {
313
- const srcPath = join(src, entry.name);
314
- const destPath = join(dest, entry.name);
315
-
316
- if (entry.isDirectory()) {
317
- await this.copyDirectoryRecursive(srcPath, destPath);
318
- } else {
319
- await fs.copyFile(srcPath, destPath);
320
- }
321
- }
322
- }
323
-
324
- async countFiles(dir, pattern) {
325
- let count = 0;
326
- try {
327
- const entries = await fs.readdir(dir, { withFileTypes: true });
328
-
329
- for (const entry of entries) {
330
- if (entry.isDirectory()) {
331
- count += await this.countFiles(join(dir, entry.name), pattern);
332
- } else if (pattern.test(entry.name)) {
333
- count++;
334
- }
335
- }
336
- } catch {
337
- // Directory doesn't exist
338
- }
339
-
340
- return count;
341
- }
342
-
343
- async createMainArticle(outputDir) {
344
- console.log('\n📝 Creating main article...');
345
-
346
- try {
347
- const chaptersDir = join(outputDir, 'chapters');
348
- const files = await fs.readdir(chaptersDir);
349
- const mdxFiles = files.filter(f => f.endsWith('.mdx')).sort();
350
-
351
- const frontmatter = this.generateFrontmatter();
352
- const { imports, components } = this.generateChapterImports(mdxFiles);
353
-
354
- const articleContent = frontmatter + imports + '\n\n' + components;
355
-
356
- const articlePath = join(outputDir, 'article.mdx');
357
- await fs.writeFile(articlePath, articleContent);
358
-
359
- console.log(` 📄 Created article.mdx with ${mdxFiles.length} chapters`);
360
-
361
- } catch (error) {
362
- this.errors.push(`Failed to create main article: ${error.message}`);
363
- }
364
- }
365
-
366
- generateFrontmatter() {
367
- const now = new Date().toISOString().split('T')[0];
368
-
369
- return `---
370
- title: "Robot Learning: A Tutorial"
371
- subtitle: "From Classical Robotics to Foundation Models"
372
- description: "A comprehensive guide to modern robot learning techniques"
373
- date: "${now}"
374
- authors:
375
- - name: "Francesco Capuano"
376
- affiliations: [1, 2]
377
- - name: "Adil Zouitine"
378
- affiliations: [2]
379
- - name: "Pepijn Kooijmans"
380
- affiliations: [2]
381
- - name: "Thomas Wolf"
382
- affiliations: [2]
383
- - name: "Michel Aractingi"
384
- affiliations: [2]
385
- affiliations:
386
- - name: "École Normale Supérieure Paris-Saclay"
387
- url: "https://ens-paris-saclay.fr"
388
- - name: "Hugging Face"
389
- url: "https://huggingface.co"
390
- tags:
391
- - robotics
392
- - machine-learning
393
- - tutorial
394
- bibliography: bibliography.bib
395
- converted_from: "LaTeX"
396
- ---
397
-
398
- `;
399
- }
400
-
401
- generateChapterImports(mdxFiles) {
402
- let imports = '';
403
- let components = '';
404
-
405
- mdxFiles.forEach(file => {
406
- const sectionName = basename(file, '.mdx');
407
- const componentName = this.formatComponentName(sectionName);
408
-
409
- imports += `import ${componentName} from "./chapters/${sectionName}.mdx";\n`;
410
- components += `<${componentName} />\n\n`;
411
- });
412
-
413
- return { imports, components };
414
- }
415
-
416
- formatComponentName(sectionName) {
417
- let componentName = sectionName
418
- .split(/[_-]/)
419
- .map(part => part.charAt(0).toUpperCase() + part.slice(1))
420
- .join('');
421
-
422
- if (/^\d/.test(componentName)) {
423
- componentName = 'Chapter' + componentName;
424
- }
425
-
426
- if (componentName === 'AForword') componentName = 'Foreword';
427
- if (componentName === 'Chapter00Abstract') componentName = 'Abstract';
428
-
429
- return componentName;
430
- }
431
-
432
- generateReport() {
433
- console.log('\n📊 Conversion Report:');
434
- console.log('=====================');
435
- console.log(`⏱️ Time: ${(this.stats.conversionTime / 1000).toFixed(2)}s`);
436
- console.log(`📄 Files: ${this.stats.totalFiles}`);
437
- console.log(`🖼️ Figures: ${this.stats.totalFigures}`);
438
- console.log(`📚 Citations: ${this.stats.totalCitations}`);
439
- console.log(`⚠️ Warnings: ${this.warnings.length}`);
440
- console.log(`❌ Errors: ${this.errors.length}`);
441
-
442
- const robustStats = this.robustPreprocessor.getStats();
443
- console.log(`🔧 Commands replaced: ${robustStats.commandsReplaced}`);
444
- console.log(`📦 Environments processed: ${robustStats.environmentsProcessed}`);
445
- console.log(`🖼️ Figures processed: ${robustStats.figuresProcessed}`);
446
- console.log(`📐 Math expressions fixed: ${robustStats.mathExpressionsFixed}`);
447
-
448
- if (this.warnings.length > 0 && this.warnings.length <= 3) {
449
- console.log('\n⚠️ Warnings:');
450
- this.warnings.forEach(w => console.log(` - ${w}`));
451
- } else if (this.warnings.length > 3) {
452
- console.log(`\n⚠️ ${this.warnings.length} warnings:`);
453
- this.warnings.forEach(w => console.log(` - ${w.substring(0, 150)}...`));
454
- }
455
- }
456
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/image-transformer.mjs DELETED
@@ -1,179 +0,0 @@
1
- /**
2
- * Transformateur d'images : Markdown → ResponsiveImage Astro
3
- * Convertit les images markdown en composants ResponsiveImage optimisés
4
- */
5
-
6
- import { promises as fs } from 'node:fs';
7
- import { dirname, basename, extname, resolve, relative } from 'node:path';
8
-
9
- export class ImageTransformer {
10
- constructor() {
11
- this.stats = {
12
- filesProcessed: 0,
13
- imagesTransformed: 0,
14
- importsAdded: 0
15
- };
16
- }
17
-
18
- async transformImagesInDirectory(contentDir) {
19
- const chaptersDir = resolve(contentDir, 'chapters');
20
-
21
- try {
22
- const files = await fs.readdir(chaptersDir);
23
- const mdxFiles = files.filter(file => file.endsWith('.mdx'));
24
-
25
- for (const file of mdxFiles) {
26
- const filePath = resolve(chaptersDir, file);
27
- await this.transformImagesInFile(filePath, contentDir);
28
- this.stats.filesProcessed++;
29
- }
30
-
31
- console.log(`📸 Image transformation completed:`);
32
- console.log(` 📄 Files processed: ${this.stats.filesProcessed}`);
33
- console.log(` 🖼️ Images transformed: ${this.stats.imagesTransformed}`);
34
- console.log(` 📦 Imports added: ${this.stats.importsAdded}`);
35
-
36
- } catch (error) {
37
- console.error('Error transforming images:', error.message);
38
- }
39
- }
40
-
41
- async transformImagesInFile(filePath, contentDir) {
42
- try {
43
- let content = await fs.readFile(filePath, 'utf-8');
44
-
45
- const imageInfo = this.extractImageInfo(content);
46
- if (imageInfo.length === 0) {
47
- return; // No images to transform
48
- }
49
-
50
- const imports = this.generateImports(imageInfo, filePath, contentDir);
51
- const transformedContent = this.transformImageReferences(content, imageInfo);
52
-
53
- // Add imports at the top of the file
54
- const finalContent = this.addImportsToFile(transformedContent, imports);
55
-
56
- await fs.writeFile(filePath, finalContent);
57
-
58
- this.stats.imagesTransformed += imageInfo.length;
59
- this.stats.importsAdded += imports.length;
60
-
61
- } catch (error) {
62
- console.error(`Error processing ${filePath}:`, error.message);
63
- }
64
- }
65
-
66
- extractImageInfo(content) {
67
- // More robust regex that handles complex alt text with brackets and parentheses
68
- const imageRegex = /!\[([^\]]*(?:\[[^\]]*\][^\]]*)*)\]\(([^)]+)\)(?:\s*(#[^\s]+))?/g;
69
- const images = [];
70
- let match;
71
-
72
- while ((match = imageRegex.exec(content)) !== null) {
73
- const [fullMatch, alt, src, id] = match;
74
-
75
- // Only process relative image paths (not external URLs)
76
- if (!src.startsWith('http') && !src.startsWith('//')) {
77
- images.push({
78
- fullMatch,
79
- alt: alt || 'Figure',
80
- src,
81
- id: id ? id.substring(1) : null, // Remove # from id
82
- variableName: this.generateVariableName(src)
83
- });
84
- }
85
- }
86
-
87
- return images;
88
- }
89
-
90
- generateVariableName(imagePath) {
91
- // Convert path to valid variable name
92
- // assets/image/ch4/ch4-bc-trajectories.png → ch4BcTrajectories
93
- const filename = basename(imagePath, extname(imagePath));
94
-
95
- return filename
96
- .replace(/[-_]/g, ' ')
97
- .replace(/\b\w/g, l => l.toUpperCase())
98
- .replace(/\s/g, '')
99
- .replace(/^\d+/, 'Fig$&'); // Prefix with Fig if starts with number
100
- }
101
-
102
- generateImports(imageInfo, filePath, contentDir) {
103
- const imports = [];
104
-
105
- // Add ResponsiveImage import
106
- imports.push("import ResponsiveImage from '../../components/ResponsiveImage.astro'");
107
-
108
- // Add image imports
109
- for (const image of imageInfo) {
110
- const relativePath = this.getRelativeImagePath(image.src, filePath, contentDir);
111
- imports.push(`import ${image.variableName} from '${relativePath}'`);
112
- }
113
-
114
- return imports;
115
- }
116
-
117
- getRelativeImagePath(imageSrc, filePath, contentDir) {
118
- // Convert absolute image path to relative from chapter file
119
- // From: chapters/04_imitation_learning.mdx
120
- // To: ../assets/image/ch4/ch4-bc-trajectories.png
121
-
122
- const chapterDir = dirname(filePath);
123
- const imageAbsolutePath = resolve(contentDir, imageSrc);
124
- const relativePath = relative(chapterDir, imageAbsolutePath);
125
-
126
- return relativePath.startsWith('.') ? relativePath : `./${relativePath}`;
127
- }
128
-
129
- transformImageReferences(content, imageInfo) {
130
- let transformed = content;
131
-
132
- for (const image of imageInfo) {
133
- const componentTag = this.generateResponsiveImageTag(image);
134
- transformed = transformed.replace(image.fullMatch, componentTag);
135
- }
136
-
137
- return transformed;
138
- }
139
-
140
- generateResponsiveImageTag(image) {
141
- const props = [
142
- `src={${image.variableName}}`,
143
- `alt="${image.alt}"`
144
- ];
145
-
146
- if (image.id) {
147
- props.push(`id="${image.id}"`);
148
- }
149
-
150
- return `<ResponsiveImage ${props.join(' ')} />`;
151
- }
152
-
153
- addImportsToFile(content, imports) {
154
- if (imports.length === 0) {
155
- return content;
156
- }
157
-
158
- // Check if there are already imports at the top
159
- const lines = content.split('\n');
160
- let insertIndex = 0;
161
-
162
- // Skip existing imports
163
- while (insertIndex < lines.length &&
164
- (lines[insertIndex].startsWith('import ') ||
165
- lines[insertIndex].trim() === '')) {
166
- insertIndex++;
167
- }
168
-
169
- // Insert imports
170
- const importBlock = imports.join('\n') + '\n\n';
171
- lines.splice(insertIndex, 0, importBlock);
172
-
173
- return lines.join('\n');
174
- }
175
-
176
- getStats() {
177
- return this.stats;
178
- }
179
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/index.mjs DELETED
@@ -1,75 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Point d'entrée principal pour la conversion LaTeX vers Markdown
4
- *
5
- * Usage: node scripts/latex-converter/index.mjs [--input=path] [--output=path] [--clean]
6
- */
7
-
8
- import { resolve } from 'node:path';
9
- import { spawn } from 'node:child_process';
10
- import process from 'node:process';
11
-
12
- import { LaTeXConverter } from './converter.mjs';
13
- import { ImageTransformer } from './image-transformer.mjs';
14
- import { DEFAULT_PATHS } from './config.mjs';
15
-
16
- function parseArgs(argv) {
17
- const out = {};
18
- for (const arg of argv.slice(2)) {
19
- if (!arg.startsWith('--')) continue;
20
- const [k, v] = arg.replace(/^--/, '').split('=');
21
- out[k] = v === undefined ? true : v;
22
- }
23
- return out;
24
- }
25
-
26
- async function checkPandoc() {
27
- try {
28
- const child = spawn('pandoc', ['--version'], { stdio: 'pipe' });
29
- return new Promise((resolve) => {
30
- child.on('exit', (code) => resolve(code === 0));
31
- child.on('error', () => resolve(false));
32
- });
33
- } catch {
34
- return false;
35
- }
36
- }
37
-
38
- async function main() {
39
- const cwd = process.cwd();
40
- const args = parseArgs(process.argv);
41
-
42
- // Vérifier Pandoc
43
- const hasPandoc = await checkPandoc();
44
- if (!hasPandoc) {
45
- console.error('❌ Pandoc n\'est pas installé.');
46
- console.error(' macOS: brew install pandoc');
47
- console.error(' Ubuntu: apt-get install pandoc');
48
- process.exit(1);
49
- }
50
-
51
- // Chemins
52
- const inputDir = resolve(cwd, args.input || DEFAULT_PATHS.input);
53
- const outputDir = resolve(cwd, args.output || DEFAULT_PATHS.output);
54
-
55
- try {
56
- const converter = new LaTeXConverter();
57
- await converter.convert(inputDir, outputDir, {
58
- clean: args.clean || false
59
- });
60
-
61
- // Transform images to ResponsiveImage components
62
- console.log('\n📸 Transforming images to ResponsiveImage components...');
63
- const imageTransformer = new ImageTransformer();
64
- await imageTransformer.transformImagesInDirectory(outputDir);
65
-
66
- } catch (error) {
67
- console.error('❌ Conversion échouée:', error.message);
68
- process.exit(1);
69
- }
70
- }
71
-
72
- main().catch(err => {
73
- console.error('❌ Erreur fatale:', err);
74
- process.exit(1);
75
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/preprocessor.mjs DELETED
@@ -1,115 +0,0 @@
1
- /**
2
- * Préprocesseur LaTeX - Nettoie et simplifie le contenu LaTeX
3
- */
4
-
5
- import { COMMAND_MAPPINGS, ENVIRONMENT_MAPPINGS } from './config.mjs';
6
-
7
- export class LaTeXPreprocessor {
8
- constructor() {
9
- this.stats = {
10
- commandsReplaced: 0,
11
- environmentsProcessed: 0,
12
- figuresFixed: 0
13
- };
14
- }
15
-
16
- preprocessContent(content) {
17
- let processed = content;
18
-
19
- // Remove comments
20
- processed = processed.replace(/%.*$/gm, '');
21
-
22
- // Apply command mappings
23
- processed = this.applyCommandMappings(processed);
24
-
25
- // Process custom environments
26
- processed = this.processCustomEnvironments(processed);
27
-
28
- // Fix figures
29
- processed = this.fixFigures(processed);
30
-
31
- // General cleanup
32
- processed = processed.replace(/\n{3,}/g, '\n\n');
33
- processed = processed.trim();
34
-
35
- return processed;
36
- }
37
-
38
- applyCommandMappings(content) {
39
- let processed = content;
40
-
41
- for (const [command, replacement] of Object.entries(COMMAND_MAPPINGS)) {
42
- const regex = new RegExp(`\\\\${command.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}(?![a-zA-Z])`, 'g');
43
- const matches = processed.match(regex);
44
- if (matches) {
45
- this.stats.commandsReplaced += matches.length;
46
- processed = processed.replace(regex, replacement);
47
- }
48
- }
49
-
50
- return processed;
51
- }
52
-
53
- processCustomEnvironments(content) {
54
- let processed = content;
55
-
56
- // Convert tldr environment
57
- processed = processed.replace(
58
- /\\begin\{tldr\}(.*?)\\end\{tldr\}/gs,
59
- (match, content) => {
60
- this.stats.environmentsProcessed++;
61
- return `> **TL;DR**\n> ${content.trim()}\n`;
62
- }
63
- );
64
-
65
- // Convert callout environment
66
- processed = processed.replace(
67
- /\\begin\{callout\}\{([^}]*)\}(.*?)\\end\{callout\}/gs,
68
- (match, title, content) => {
69
- this.stats.environmentsProcessed++;
70
- return `> **${title}**\n> ${content.trim()}\n`;
71
- }
72
- );
73
-
74
- // Convert finding environment
75
- processed = processed.replace(
76
- /\\finding\{([^}]*)\}\{([^}]*)\}/g,
77
- (match, number, content) => {
78
- this.stats.environmentsProcessed++;
79
- return `> **🔍 Finding ${number}**: ${content}\n`;
80
- }
81
- );
82
-
83
- return processed;
84
- }
85
-
86
- fixFigures(content) {
87
- let fixed = content;
88
-
89
- // Fix complex figure environments
90
- const figurePattern = /\\begin\{figure\}[\s\S]*?\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}[\s\S]*?\\caption\{([^}]+)\}[\s\S]*?(?:\\label\{([^}]+)\})?[\s\S]*?\\end\{figure\}/g;
91
-
92
- fixed = fixed.replace(figurePattern, (match, imagePath, caption, label) => {
93
- this.stats.figuresFixed++;
94
- const cleanPath = imagePath.replace(/^figures\//, 'assets/image/');
95
- const labelAttr = label ? ` {#fig-${label}}` : '';
96
- return `\n![${caption}](${cleanPath})${labelAttr}\n\n*${caption}*\n`;
97
- });
98
-
99
- // Fix simple includegraphics
100
- fixed = fixed.replace(
101
- /\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/g,
102
- (match, imagePath) => {
103
- this.stats.figuresFixed++;
104
- const cleanPath = imagePath.replace(/^figures\//, 'assets/image/');
105
- return `![Image](${cleanPath})`;
106
- }
107
- );
108
-
109
- return fixed;
110
- }
111
-
112
- getStats() {
113
- return this.stats;
114
- }
115
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/old-latex-converter/robust-preprocessor.mjs DELETED
@@ -1,399 +0,0 @@
1
- /**
2
- * Préprocesseur LaTeX Ultra-Robuste
3
- * Gère les cas complexes qui font planter Pandoc
4
- */
5
-
6
- export class RobustLaTeXPreprocessor {
7
- constructor() {
8
- this.stats = {
9
- figuresProcessed: 0,
10
- citationsFixed: 0,
11
- mathExpressionsFixed: 0,
12
- environmentsProcessed: 0,
13
- commandsReplaced: 0
14
- };
15
- this.debugMode = false;
16
- }
17
-
18
- preprocessContent(content, filename = 'unknown') {
19
- if (this.debugMode) {
20
- console.log(` 🔍 [DEBUG] Processing ${filename}...`);
21
- }
22
-
23
- let processed = content;
24
-
25
- // Phase 1: Structure cleanup (most important first)
26
- processed = this.phase1_StructureCleanup(processed);
27
-
28
- // Phase 2: Content transformation
29
- processed = this.phase2_ContentTransformation(processed);
30
-
31
- // Phase 3: Final polish
32
- processed = this.phase3_FinalPolish(processed);
33
-
34
- return processed;
35
- }
36
-
37
- phase1_StructureCleanup(content) {
38
- let cleaned = content;
39
-
40
- // Remove comments (but preserve structure)
41
- cleaned = cleaned.replace(/%.*$/gm, '');
42
-
43
- // Fix broken line breaks that split words
44
- cleaned = this.fixBrokenLineBreaks(cleaned);
45
-
46
- // Fix broken equation environments
47
- cleaned = this.fixBrokenEquations(cleaned);
48
-
49
- // Fix broken figure environments BEFORE processing
50
- cleaned = this.fixComplexFigures(cleaned);
51
-
52
- // Handle problematic environments early
53
- cleaned = this.handleProblematicEnvironments(cleaned);
54
-
55
- return cleaned;
56
- }
57
-
58
- fixBrokenLineBreaks(content) {
59
- let fixed = content;
60
-
61
- // Fix hyphenated words broken across lines
62
- // "length-\nT\nT" → "length-T"
63
- fixed = fixed.replace(/([a-zA-Z])-\s*\n\s*([A-Z])\s*\n\s*\2/g, '$1-$2');
64
-
65
- // Fix broken compound words
66
- // "some-\nword" → "some-word"
67
- fixed = fixed.replace(/([a-zA-Z])-\s*\n\s*([a-z])/g, '$1-$2');
68
-
69
- // Fix sentences that got broken inappropriately
70
- // "word.Sentence" → "word. Sentence"
71
- fixed = fixed.replace(/([a-z])\.([A-Z])/g, '$1. $2');
72
-
73
- return fixed;
74
- }
75
-
76
- fixBrokenEquations(content) {
77
- let fixed = content;
78
-
79
- // Fix mixed equation environments
80
- // "\end{equation}$" → "$$"
81
- fixed = fixed.replace(/\\end\{equation\}\$/g, '$$');
82
- fixed = fixed.replace(/\$\\begin\{equation\}/g, '$$');
83
-
84
- // Fix broken align environments
85
- fixed = fixed.replace(/([^$])\s*&=\s*/g, '$1 &= ');
86
-
87
- // Fix multiline math that lost structure
88
- fixed = fixed.replace(/\$([^$]*?)&=([^$]*?)\$/g, '$$\\begin{align}\n$1 &= $2\n\\end{align}$$');
89
-
90
- return fixed;
91
- }
92
-
93
- fixComplexFigures(content) {
94
- let fixed = content;
95
-
96
- // Strategy: Convert complex figures to simple markdown BEFORE Pandoc sees them
97
- const figurePattern = /\\begin\{figure\*?\}([\s\S]*?)\\end\{figure\*?\}/g;
98
- const wrapfigurePattern = /\\begin\{wrapfigure\}(?:\[[^\]]*\])?\{[^}]*\}\{[^}]*\}([\s\S]*?)\\end\{wrapfigure\}/g;
99
-
100
- fixed = fixed.replace(figurePattern, (match, figureContent) => {
101
- this.stats.figuresProcessed++;
102
-
103
- // Extract components safely
104
- const imageMatch = figureContent.match(/\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/);
105
- const captionMatch = figureContent.match(/\\caption\{([\s\S]*?)\}(?=\s*(?:\\label|\\end|\}|$))/);
106
- const labelMatch = figureContent.match(/\\label\{([^}]+)\}/);
107
-
108
- if (!imageMatch) {
109
- return match; // Keep original if we can't parse it
110
- }
111
-
112
- const imagePath = imageMatch[1].replace(/^figures\//, 'assets/image/');
113
- let caption = captionMatch ? captionMatch[1].trim() : 'Figure';
114
- const label = labelMatch ? labelMatch[1] : '';
115
-
116
- // Clean caption thoroughly
117
- caption = this.cleanCaption(caption);
118
-
119
- // Generate clean markdown
120
- const labelAttr = label ? ` {#fig-${label}}` : '';
121
-
122
- return `\n\n![${caption}](${imagePath})${labelAttr}\n\n*${caption}*\n\n`;
123
- });
124
-
125
- // Also handle wrapfigure environments
126
- fixed = fixed.replace(wrapfigurePattern, (match, figureContent) => {
127
- this.stats.figuresProcessed++;
128
-
129
- // Extract components safely
130
- const imageMatch = figureContent.match(/\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/);
131
- const captionMatch = figureContent.match(/\\caption\{([\s\S]*?)\}(?=\s*(?:\\label|\\end|\}|$))/);
132
- const labelMatch = figureContent.match(/\\label\{([^}]+)\}/);
133
-
134
- if (!imageMatch) {
135
- return match; // Keep original if we can't parse it
136
- }
137
-
138
- const imagePath = imageMatch[1].replace(/^figures\//, 'assets/image/');
139
- let caption = captionMatch ? captionMatch[1].trim() : 'Figure';
140
- const label = labelMatch ? labelMatch[1] : '';
141
-
142
- // Clean caption thoroughly
143
- caption = this.cleanCaption(caption);
144
-
145
- // Generate clean markdown (simpler for wrapfigure)
146
- const labelAttr = label ? ` {#fig-${label}}` : '';
147
-
148
- return `\n\n![${caption}](${imagePath})${labelAttr}\n\n`;
149
- });
150
-
151
- return fixed;
152
- }
153
-
154
- cleanCaption(caption) {
155
- let cleaned = caption;
156
-
157
- // Handle citations in captions properly
158
- cleaned = cleaned.replace(/~\\cite[tp]?\{([^}]+)\}/g, ' [@$1]');
159
- cleaned = cleaned.replace(/\\cite[tp]?\{([^}]+)\}/g, '[@$1]');
160
-
161
- // Remove problematic LaTeX commands
162
- cleaned = cleaned.replace(/\\textit\{([^}]+)\}/g, '*$1*');
163
- cleaned = cleaned.replace(/\\textbf\{([^}]+)\}/g, '**$1**');
164
- cleaned = cleaned.replace(/\\emph\{([^}]+)\}/g, '*$1*');
165
-
166
- // Fix \textsc with complex content
167
- cleaned = cleaned.replace(/\\textsc\{([^}]*\([^)]*\)[^}]*)\}/g, '**$1**');
168
-
169
- // Handle nested braces safely
170
- let depth = 0;
171
- let result = '';
172
- for (let i = 0; i < cleaned.length; i++) {
173
- const char = cleaned[i];
174
- if (char === '{') {
175
- depth++;
176
- if (depth === 1) continue; // Skip opening brace
177
- } else if (char === '}') {
178
- depth--;
179
- if (depth === 0) continue; // Skip closing brace
180
- } else {
181
- result += char;
182
- }
183
- }
184
-
185
- return result.trim();
186
- }
187
-
188
- handleProblematicEnvironments(content) {
189
- let fixed = content;
190
-
191
- // Handle algorithm environments
192
- fixed = fixed.replace(/\\begin\{algorithm\}([\s\S]*?)\\end\{algorithm\}/g, (match, algContent) => {
193
- return '\n```\nAlgorithm:\n' + algContent.replace(/\\[a-zA-Z]+/g, '') + '\n```\n';
194
- });
195
-
196
- // Handle complex math environments
197
- fixed = fixed.replace(/\\begin\{align\*?\}([\s\S]*?)\\end\{align\*?\}/g, (match, mathContent) => {
198
- const cleaned = mathContent.replace(/\\&/g, '').replace(/\\\\/g, '\n');
199
- return '\n$$\n' + cleaned + '\n$$\n';
200
- });
201
-
202
- return fixed;
203
- }
204
-
205
- phase2_ContentTransformation(content) {
206
- let transformed = content;
207
-
208
- // Apply command mappings (safer order)
209
- transformed = this.applyCommandMappings(transformed);
210
-
211
- // Process custom environments
212
- transformed = this.processCustomEnvironments(transformed);
213
-
214
- // Handle remaining citations
215
- transformed = this.processCitations(transformed);
216
-
217
- return transformed;
218
- }
219
-
220
- applyCommandMappings(content) {
221
- let processed = content;
222
-
223
- // Safe command replacements (most common first)
224
- const safeCommands = {
225
- 'eg': 'e.g.,',
226
- 'ie': 'i.e.,',
227
- 'versus': 'vs.',
228
- 'wrt': 'w.r.t.',
229
- 'etc': 'etc.',
230
- 'lerobot': '**LeRobot**',
231
- 'lerobotdataset': '`LeRobotDataset`',
232
- 'huggingface': '🤗 **Hugging Face**',
233
- 'qfunction': 'Q-function',
234
- 'qopt': 'Q^*',
235
- // Robotics-specific commands from handles.tex
236
- 'actionchunk': '\\mathbf{A}',
237
- 'actionexpert': '\\mathbf{v}_\\theta',
238
- 'pizero': '\\pi_0',
239
- 'statespace': '\\mathcal{S}',
240
- 'actionspace': '\\mathcal{A}',
241
- 'obsspace': '\\mathcal{O}',
242
- 'dynamics': '\\mathcal{D}',
243
- 'stateplusone': 's_{t+1}',
244
- 'state': 's_t',
245
- 'action': 'a_t',
246
- 'transition': '(s_t, a_t, s_{t+1})',
247
- 'sars': '(s_t, a_t, r_t, s_{t+1})',
248
- 'transitiongiven': '(s_{t+1} | s_t, a_t)',
249
- 'transitionprob': '\\mathbb{P}(s_{t+1} | s_t, a_t)',
250
- 'trajectory': '(s_0, a_0, r_0, s_1, a_1, r_1, \\dots, s_{T-1}, a_{T-1}, r_{T-1}, s_T)',
251
- 'Jpi': 'J(\\pi_\\theta)',
252
- 'supp': '\\text{supp}',
253
- 'DKL': '\\text{D}_{\\text{KL}}',
254
- 'FK': '\\text{FK}',
255
- 'targetvel': '\\dot{p}^*',
256
- 'targetpos': 'p^*'
257
- };
258
-
259
- for (const [command, replacement] of Object.entries(safeCommands)) {
260
- const regex = new RegExp(`\\\\${command}(?![a-zA-Z])`, 'g');
261
- const matches = processed.match(regex);
262
- if (matches) {
263
- this.stats.commandsReplaced += matches.length;
264
- processed = processed.replace(regex, replacement);
265
- }
266
- }
267
-
268
- // Math commands (more careful)
269
- const mathCommands = ['X', 'Z', 'G', 'D', 'F', 'R', 'S', 'T', 'U', 'Y'];
270
- mathCommands.forEach(letter => {
271
- const regex = new RegExp(`\\\\${letter}(?![a-zA-Z])`, 'g');
272
- processed = processed.replace(regex, `\\mathcal{${letter}}`);
273
- });
274
-
275
- // Handle commands with subscripts (like \actionchunk_t)
276
- processed = processed.replace(/\\actionchunk_t/g, '\\mathbf{A}_t');
277
- processed = processed.replace(/\\actionexpert_([a-zA-Z0-9]+)/g, '\\mathbf{v}_{\\theta_$1}');
278
- processed = processed.replace(/\\state_([a-zA-Z0-9]+)/g, 's_{$1}');
279
- processed = processed.replace(/\\action_([a-zA-Z0-9]+)/g, 'a_{$1}');
280
-
281
- // Fix problematic \textsc commands with complex content
282
- processed = processed.replace(/\\textsc\{([^{}]*\([^)]*\)[^{}]*)\}/g, '**$1**');
283
- processed = processed.replace(/\\textsc\{([^}]+)\}/g, '**$1**');
284
-
285
- // Fix \url commands to make them MDX-compatible
286
- processed = processed.replace(/\\textbf\{\\url\{([^}]+)\}\}/g, '**[$1]($1)**');
287
- processed = processed.replace(/\\url\{([^}]+)\}/g, '[$1]($1)');
288
-
289
- return processed;
290
- }
291
-
292
- processCustomEnvironments(content) {
293
- let processed = content;
294
-
295
- // TL;DR environment
296
- processed = processed.replace(
297
- /\\begin\{tldr\}([\s\S]*?)\\end\{tldr\}/g,
298
- (match, content) => {
299
- this.stats.environmentsProcessed++;
300
- return `\n> **TL;DR**\n> ${content.trim()}\n\n`;
301
- }
302
- );
303
-
304
- // Callout environment
305
- processed = processed.replace(
306
- /\\begin\{callout\}\{([^}]*)\}([\s\S]*?)\\end\{callout\}/g,
307
- (match, title, content) => {
308
- this.stats.environmentsProcessed++;
309
- return `\n> **${title}**\n> ${content.trim()}\n\n`;
310
- }
311
- );
312
-
313
- // Finding command
314
- processed = processed.replace(
315
- /\\finding\{([^}]*)\}\{([^}]*)\}/g,
316
- (match, number, content) => {
317
- this.stats.environmentsProcessed++;
318
- return `\n> **🔍 Finding ${number}**: ${content}\n\n`;
319
- }
320
- );
321
-
322
- return processed;
323
- }
324
-
325
- processCitations(content) {
326
- let processed = content;
327
-
328
- // Handle different citation types
329
- processed = processed.replace(/\\citep\{([^}]+)\}/g, '[@$1]');
330
- processed = processed.replace(/\\citet\{([^}]+)\}/g, '@$1');
331
- processed = processed.replace(/\\cite\{([^}]+)\}/g, '[@$1]');
332
-
333
- // Handle spaced citations (common issue)
334
- processed = processed.replace(/~\\cite/g, ' \\cite');
335
- processed = processed.replace(/~\[@/g, ' [@');
336
-
337
- // Count citations
338
- const citations = processed.match(/\[@[^\]]+\]/g) || [];
339
- this.stats.citationsFixed += citations.length;
340
-
341
- return processed;
342
- }
343
-
344
- phase3_FinalPolish(content) {
345
- let polished = content;
346
-
347
- // Fix math expressions
348
- polished = this.fixMathExpressions(polished);
349
-
350
- // Clean up whitespace and structure
351
- polished = this.finalCleanup(polished);
352
-
353
- return polished;
354
- }
355
-
356
- fixMathExpressions(content) {
357
- let fixed = content;
358
-
359
- // Fix common problematic patterns
360
- fixed = fixed.replace(/\$\{([^}]+)\}\$/g, '$$$1$$'); // ${...}$ -> $...$
361
- fixed = fixed.replace(/\$([^$]*)\\\$([^$]*)\$/g, '$$$1$2$$'); // $...\$...$ -> $...$
362
-
363
- // Fix pi expressions specifically
364
- fixed = fixed.replace(/\$\\pi_\$([0-9]+)\$/g, '$\\pi_$1$');
365
- fixed = fixed.replace(/\$\{\\pi_\}([0-9]+)\$/g, '$\\pi_$1$');
366
-
367
- // Fix malformed math delimiters
368
- fixed = fixed.replace(/\$\$\$+/g, '$$');
369
-
370
- this.stats.mathExpressionsFixed++;
371
-
372
- return fixed;
373
- }
374
-
375
- finalCleanup(content) {
376
- let cleaned = content;
377
-
378
- // Normalize whitespace
379
- cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
380
- cleaned = cleaned.replace(/[ \t]+$/gm, ''); // Trailing spaces
381
-
382
- // Fix MDX-incompatible angle bracket URLs
383
- cleaned = cleaned.replace(/\*\*<(https?:\/\/[^>]+)>\*\*/g, '**[$1]($1)**');
384
- cleaned = cleaned.replace(/<(https?:\/\/[^>]+)>/g, '[$1]($1)');
385
-
386
- // Ensure proper spacing around elements
387
- cleaned = cleaned.replace(/\n\n\n+/g, '\n\n');
388
-
389
- return cleaned.trim();
390
- }
391
-
392
- getStats() {
393
- return this.stats;
394
- }
395
-
396
- setDebugMode(enabled) {
397
- this.debugMode = enabled;
398
- }
399
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/scripts/latex-to-markdown/output/main.md CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-markdown/output/main.mdx CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-markdown/reference-preprocessor.mjs CHANGED
@@ -82,6 +82,54 @@ function createCleanMapping(references) {
82
  return mapping;
83
  }
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  /**
86
  * Apply mapping to LaTeX content
87
  * @param {string} content - Original LaTeX content
@@ -92,17 +140,19 @@ function applyMapping(content, mapping) {
92
  let cleanedContent = content;
93
  let changesCount = 0;
94
 
95
- // Apply mapping to all identifiers
 
 
 
 
 
 
 
 
 
 
96
  for (const [original, clean] of mapping) {
97
  if (original !== clean) {
98
- // Replace \label{original} with \label{clean}
99
- const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
100
- const labelMatches = cleanedContent.match(labelRegex);
101
- if (labelMatches) {
102
- cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
103
- changesCount += labelMatches.length;
104
- }
105
-
106
  // Replace \ref{original} with \ref{clean}
107
  const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
108
  const refMatches = cleanedContent.match(refRegex);
@@ -110,10 +160,24 @@ function applyMapping(content, mapping) {
110
  cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
111
  changesCount += refMatches.length;
112
  }
 
 
 
 
 
 
 
 
 
 
113
  }
114
  }
115
 
116
- return { content: cleanedContent, changesCount };
 
 
 
 
117
  }
118
 
119
  /**
@@ -145,23 +209,27 @@ export function preprocessLatexReferences(latexContent) {
145
  const result = applyMapping(latexContent, mapping);
146
 
147
  if (result.changesCount > 0) {
148
- console.log(` ✅ Cleaned ${result.changesCount} reference(s) for MDX compatibility`);
149
 
150
  // Show some examples of changes
151
  let exampleCount = 0;
152
  for (const [original, clean] of mapping) {
153
  if (original !== clean && exampleCount < 3) {
154
- console.log(` ${original} → ${clean}`);
155
  exampleCount++;
156
  }
157
  }
158
  if (mapping.size > 3) {
159
- console.log(` ... and ${mapping.size - 3} more`);
160
  }
161
  } else {
162
  console.log(' ℹ️ No reference cleanup needed');
163
  }
164
 
 
 
 
 
165
  return {
166
  content: result.content,
167
  changesCount: result.changesCount,
 
82
  return mapping;
83
  }
84
 
85
+ /**
86
+ * Convert labels to HTML anchor spans for better MDX compatibility
87
+ * @param {string} content - LaTeX content
88
+ * @param {Map} mapping - Identifier mapping (original -> clean)
89
+ * @returns {Object} - Result with content and count of conversions
90
+ */
91
+ function convertLabelsToAnchors(content, mapping) {
92
+ let processedContent = content;
93
+ let anchorsCreated = 0;
94
+
95
+ // Replace \label{...} with HTML anchor spans, but SKIP labels inside math environments
96
+ for (const [original, clean] of mapping) {
97
+ // Skip equation labels (they will be handled by the Lua filter)
98
+ if (original.startsWith('eq:')) {
99
+ continue;
100
+ }
101
+
102
+ const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
103
+ const labelMatches = processedContent.match(labelRegex);
104
+
105
+ if (labelMatches) {
106
+ // Replace \label{original} with HTML span anchor (invisible but accessible)
107
+ processedContent = processedContent.replace(labelRegex, `\n\n<span id="${clean}" style="position: absolute;"></span>\n\n`);
108
+ anchorsCreated += labelMatches.length;
109
+ }
110
+ }
111
+
112
+ return { content: processedContent, anchorsCreated };
113
+ }
114
+
115
+ /**
116
+ * Convert \highlight{...} commands to HTML spans with CSS class
117
+ * @param {string} content - LaTeX content
118
+ * @returns {Object} - Result with content and count of conversions
119
+ */
120
+ function convertHighlightCommands(content) {
121
+ let processedContent = content;
122
+ let highlightsConverted = 0;
123
+
124
+ // Replace \highlight{...} with <span class="highlight">...</span>
125
+ processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
126
+ highlightsConverted++;
127
+ return `<span class="highlight">${text}</span>`;
128
+ });
129
+
130
+ return { content: processedContent, highlightsConverted };
131
+ }
132
+
133
  /**
134
  * Apply mapping to LaTeX content
135
  * @param {string} content - Original LaTeX content
 
140
  let cleanedContent = content;
141
  let changesCount = 0;
142
 
143
+ // First, convert labels to anchor spans
144
+ const anchorResult = convertLabelsToAnchors(cleanedContent, mapping);
145
+ cleanedContent = anchorResult.content;
146
+ const anchorsCreated = anchorResult.anchorsCreated;
147
+
148
+ // Convert \highlight{} commands to spans
149
+ const highlightResult = convertHighlightCommands(cleanedContent);
150
+ cleanedContent = highlightResult.content;
151
+ const highlightsConverted = highlightResult.highlightsConverted;
152
+
153
+ // Then apply mapping to remaining references and equation labels
154
  for (const [original, clean] of mapping) {
155
  if (original !== clean) {
 
 
 
 
 
 
 
 
156
  // Replace \ref{original} with \ref{clean}
157
  const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
158
  const refMatches = cleanedContent.match(refRegex);
 
160
  cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
161
  changesCount += refMatches.length;
162
  }
163
+
164
+ // For equation labels, still clean the labels themselves (for the Lua filter)
165
+ if (original.startsWith('eq:')) {
166
+ const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
167
+ const labelMatches = cleanedContent.match(labelRegex);
168
+ if (labelMatches) {
169
+ cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
170
+ changesCount += labelMatches.length;
171
+ }
172
+ }
173
  }
174
  }
175
 
176
+ return {
177
+ content: cleanedContent,
178
+ changesCount: changesCount + anchorsCreated,
179
+ highlightsConverted: highlightsConverted
180
+ };
181
  }
182
 
183
  /**
 
209
  const result = applyMapping(latexContent, mapping);
210
 
211
  if (result.changesCount > 0) {
212
+ console.log(` ✅ Processed ${result.changesCount} reference(s) and created anchor spans`);
213
 
214
  // Show some examples of changes
215
  let exampleCount = 0;
216
  for (const [original, clean] of mapping) {
217
  if (original !== clean && exampleCount < 3) {
218
+ console.log(` ${original} → ${clean} (span + refs)`);
219
  exampleCount++;
220
  }
221
  }
222
  if (mapping.size > 3) {
223
+ console.log(` ... and ${mapping.size - 3} more anchor spans created`);
224
  }
225
  } else {
226
  console.log(' ℹ️ No reference cleanup needed');
227
  }
228
 
229
+ if (result.highlightsConverted > 0) {
230
+ console.log(` ✨ Converted ${result.highlightsConverted} \\highlight{} command(s) to <span class="highlight">`);
231
+ }
232
+
233
  return {
234
  content: result.content,
235
  changesCount: result.changesCount,
app/src/content/article.mdx CHANGED
The diff for this file is too large to render. See raw diff
 
app/src/styles/_base.css CHANGED
@@ -109,4 +109,16 @@ html {
109
 
110
  [data-footnote-ref] {
111
  margin-left: 4px;
 
 
 
 
 
 
 
 
 
 
 
 
112
  }
 
109
 
110
  [data-footnote-ref] {
111
  margin-left: 4px;
112
+ }
113
+
114
+ .highlight {
115
+ /* background-color: color-mix(in srgb, var(--primary-color) 26%, transparent); */
116
+ background-color: var(--primary-color);
117
+ color: var(--page-bg);
118
+ padding: 3px 6px;
119
+ border-radius: 3px;
120
+ }
121
+
122
+ .highlight>* {
123
+ padding: 0;
124
  }
app/src/styles/_variables.css CHANGED
@@ -8,10 +8,10 @@
8
  --neutral-300: rgb(228, 228, 228);
9
  --neutral-200: rgb(245, 245, 245);
10
 
11
- --default-font-family: Source Sans Pro,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
12
 
13
  /* Brand (OKLCH base + derived states) */
14
- --primary-base: oklch(0.75 0.12 340);
15
  --primary-color: var(--primary-base);
16
  --primary-color-hover: oklch(from var(--primary-color) calc(l - 0.05) c h);
17
  --primary-color-active: oklch(from var(--primary-color) calc(l - 0.10) c h);
@@ -19,10 +19,10 @@
19
 
20
  /* Text & Surfaces */
21
  --page-bg: #ffffff;
22
- --text-color: rgba(0,0,0,.85);
23
- --transparent-page-contrast: rgba(255,255,255,.85);
24
- --muted-color: rgba(0,0,0,.6);
25
- --border-color: rgba(0,0,0,.1);
26
  --surface-bg: #fafafa;
27
  --code-bg: #f6f8fa;
28
 
@@ -52,8 +52,10 @@
52
  @custom-media --bp-content-collapse (max-width: 1100px);
53
 
54
  /* Layout */
55
- --content-padding-x: 16px; /* default page gutter */
56
- --block-spacing-y: var(--spacing-4); /* default vertical spacing between block components */
 
 
57
 
58
  /* Config */
59
  --palette-count: 8;
@@ -86,7 +88,7 @@
86
  /* Charts (global) */
87
  --axis-color: var(--muted-color);
88
  --tick-color: var(--text-color);
89
- --grid-color: rgba(0,0,0,.08);
90
  }
91
 
92
  /* ============================================================================ */
@@ -94,17 +96,17 @@
94
  /* ============================================================================ */
95
  [data-theme="dark"] {
96
  --page-bg: #0f1115;
97
- --text-color: rgba(255,255,255,.9);
98
- --muted-color: rgba(255,255,255,.7);
99
- --border-color: rgba(255,255,255,.15);
100
  --surface-bg: #12151b;
101
  --code-bg: #12151b;
102
- --transparent-page-contrast: rgba(0,0,0,.85);
103
-
104
  /* Charts (global) */
105
  --axis-color: var(--muted-color);
106
  --tick-color: var(--muted-color);
107
- --grid-color: rgba(255,255,255,.10);
108
 
109
  /* Primary (lower L in dark) */
110
  --primary-color: oklch(from var(--primary-base) calc(l - 0.08) c h);
 
8
  --neutral-300: rgb(228, 228, 228);
9
  --neutral-200: rgb(245, 245, 245);
10
 
11
+ --default-font-family: Source Sans Pro, ui-sans-serif, system-ui, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
12
 
13
  /* Brand (OKLCH base + derived states) */
14
+ --primary-base: oklch(0.74 0.12 60.17);
15
  --primary-color: var(--primary-base);
16
  --primary-color-hover: oklch(from var(--primary-color) calc(l - 0.05) c h);
17
  --primary-color-active: oklch(from var(--primary-color) calc(l - 0.10) c h);
 
19
 
20
  /* Text & Surfaces */
21
  --page-bg: #ffffff;
22
+ --text-color: rgba(0, 0, 0, .85);
23
+ --transparent-page-contrast: rgba(255, 255, 255, .85);
24
+ --muted-color: rgba(0, 0, 0, .6);
25
+ --border-color: rgba(0, 0, 0, .1);
26
  --surface-bg: #fafafa;
27
  --code-bg: #f6f8fa;
28
 
 
52
  @custom-media --bp-content-collapse (max-width: 1100px);
53
 
54
  /* Layout */
55
+ --content-padding-x: 16px;
56
+ /* default page gutter */
57
+ --block-spacing-y: var(--spacing-4);
58
+ /* default vertical spacing between block components */
59
 
60
  /* Config */
61
  --palette-count: 8;
 
88
  /* Charts (global) */
89
  --axis-color: var(--muted-color);
90
  --tick-color: var(--text-color);
91
+ --grid-color: rgba(0, 0, 0, .08);
92
  }
93
 
94
  /* ============================================================================ */
 
96
  /* ============================================================================ */
97
  [data-theme="dark"] {
98
  --page-bg: #0f1115;
99
+ --text-color: rgba(255, 255, 255, .9);
100
+ --muted-color: rgba(255, 255, 255, .7);
101
+ --border-color: rgba(255, 255, 255, .15);
102
  --surface-bg: #12151b;
103
  --code-bg: #12151b;
104
+ --transparent-page-contrast: rgba(0, 0, 0, .85);
105
+
106
  /* Charts (global) */
107
  --axis-color: var(--muted-color);
108
  --tick-color: var(--muted-color);
109
+ --grid-color: rgba(255, 255, 255, .10);
110
 
111
  /* Primary (lower L in dark) */
112
  --primary-color: oklch(from var(--primary-base) calc(l - 0.08) c h);