thibaud frere
commited on
Commit
·
5897d8c
1
Parent(s):
2903163
update links and highlights
Browse files- app/scripts/latex-to-markdown/input/sections/01_introduction.tex +1 -1
- app/scripts/latex-to-markdown/input/sections/04_imitation_learning.tex +1 -1
- app/scripts/latex-to-markdown/latex-converter.mjs +1 -1
- app/scripts/latex-to-markdown/mdx-converter.mjs +52 -0
- app/scripts/latex-to-markdown/old-latex-converter/README.md +0 -107
- app/scripts/latex-to-markdown/old-latex-converter/bibliography-cleaner.mjs +0 -123
- app/scripts/latex-to-markdown/old-latex-converter/config.mjs +0 -59
- app/scripts/latex-to-markdown/old-latex-converter/converter.mjs +0 -456
- app/scripts/latex-to-markdown/old-latex-converter/image-transformer.mjs +0 -179
- app/scripts/latex-to-markdown/old-latex-converter/index.mjs +0 -75
- app/scripts/latex-to-markdown/old-latex-converter/preprocessor.mjs +0 -115
- app/scripts/latex-to-markdown/old-latex-converter/robust-preprocessor.mjs +0 -399
- app/scripts/latex-to-markdown/output/main.md +0 -0
- app/scripts/latex-to-markdown/output/main.mdx +0 -0
- app/scripts/latex-to-markdown/reference-preprocessor.mjs +81 -13
- app/src/content/article.mdx +0 -0
- app/src/styles/_base.css +12 -0
- app/src/styles/_variables.css +17 -15
app/scripts/latex-to-markdown/input/sections/01_introduction.tex
CHANGED
|
@@ -32,7 +32,7 @@ This tutorial is structured as follows:
|
|
| 32 |
\begin{itemize}
|
| 33 |
\item Section~\ref{sec:classical} reviews classical robotics foundations, introducing the limitations of dynamics-based approaches to robotics.
|
| 34 |
\item Section~\ref{sec:learning-rl} elaborates on the limitations of dynamics-based methods, and introduce RL as a practical approach to solve robotics problems, considering its upsides and potential limitations.
|
| 35 |
-
\item Section~\ref{sec:
|
| 36 |
\item Section~\ref{sec:learning-foundation} presents recent contributions on developing generalist models for robotics applications, by learning from large corpora of multi-task \& multi-robot data (\emph{robotics foundation models}).
|
| 37 |
% \item Lastly, Section~\ref{sec:extensions} covers emerging directions in robot learning research, introducing recent works in post-training techniques for robotics foundation models, as well as recent works in world models for robotics.
|
| 38 |
\end{itemize}
|
|
|
|
| 32 |
\begin{itemize}
|
| 33 |
\item Section~\ref{sec:classical} reviews classical robotics foundations, introducing the limitations of dynamics-based approaches to robotics.
|
| 34 |
\item Section~\ref{sec:learning-rl} elaborates on the limitations of dynamics-based methods, and introduce RL as a practical approach to solve robotics problems, considering its upsides and potential limitations.
|
| 35 |
+
\item Section~\ref{sec:robot-imitation-learning} further describes robot learning techniques that aim at solving single-tasks learning, leveraging BC techniques to autonomously reproduce specific expert demonstrations.
|
| 36 |
\item Section~\ref{sec:learning-foundation} presents recent contributions on developing generalist models for robotics applications, by learning from large corpora of multi-task \& multi-robot data (\emph{robotics foundation models}).
|
| 37 |
% \item Lastly, Section~\ref{sec:extensions} covers emerging directions in robot learning research, introducing recent works in post-training techniques for robotics foundation models, as well as recent works in world models for robotics.
|
| 38 |
\end{itemize}
|
app/scripts/latex-to-markdown/input/sections/04_imitation_learning.tex
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
\section{Robot (Imitation) Learning}
|
| 2 |
-
\label{sec:
|
| 3 |
|
| 4 |
\epigraph{\emph{The best material model for a cat is another, or preferably the same cat}}{Norbert Wiener}
|
| 5 |
|
|
|
|
| 1 |
\section{Robot (Imitation) Learning}
|
| 2 |
+
\label{sec:robot-imitation-learning}
|
| 3 |
|
| 4 |
\epigraph{\emph{The best material model for a cat is another, or preferably the same cat}}{Norbert Wiener}
|
| 5 |
|
app/scripts/latex-to-markdown/latex-converter.mjs
CHANGED
|
@@ -233,7 +233,7 @@ export function convertLatexToMarkdown(inputFile, outputDir) {
|
|
| 233 |
ensureDirectory(mediaDir);
|
| 234 |
const inputDir = dirname(inputFile);
|
| 235 |
const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
|
| 236 |
-
const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
|
| 237 |
|
| 238 |
console.log(` Running: ${pandocCommand}`);
|
| 239 |
execSync(pandocCommand, { stdio: 'pipe' });
|
|
|
|
| 233 |
ensureDirectory(mediaDir);
|
| 234 |
const inputDir = dirname(inputFile);
|
| 235 |
const equationFilterPath = join(__dirname, 'filters', 'equation-ids.lua');
|
| 236 |
+
const pandocCommand = `pandoc "${preprocessedFile}" -f latex+latex_macros -t gfm+tex_math_dollars+raw_html --shift-heading-level-by=1 --wrap=none ${bibOption} --extract-media="${mediaDir}" --resource-path="${inputDir}" --lua-filter="${equationFilterPath}" -o "${outputFile}"`;
|
| 237 |
|
| 238 |
console.log(` Running: ${pandocCommand}`);
|
| 239 |
execSync(pandocCommand, { stdio: 'pipe' });
|
app/scripts/latex-to-markdown/mdx-converter.mjs
CHANGED
|
@@ -323,6 +323,56 @@ function transformStyledSpans(content) {
|
|
| 323 |
* @param {string} content - MDX content
|
| 324 |
* @returns {string} - Content with transformed links
|
| 325 |
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
function transformReferenceLinks(content) {
|
| 327 |
console.log(' 🔗 Transforming reference links...');
|
| 328 |
|
|
@@ -543,6 +593,8 @@ function processMdxContent(content, latexContent = '') {
|
|
| 543 |
processedContent = transformImages(processedContent);
|
| 544 |
processedContent = transformStyledSpans(processedContent);
|
| 545 |
processedContent = transformReferenceLinks(processedContent);
|
|
|
|
|
|
|
| 546 |
processedContent = cleanFigcaptionNewlines(processedContent);
|
| 547 |
|
| 548 |
// Add component imports at the end
|
|
|
|
| 323 |
* @param {string} content - MDX content
|
| 324 |
* @returns {string} - Content with transformed links
|
| 325 |
*/
|
| 326 |
+
function fixHtmlEscaping(content) {
|
| 327 |
+
console.log(' 🔧 Fixing HTML escaping in spans...');
|
| 328 |
+
|
| 329 |
+
let fixedCount = 0;
|
| 330 |
+
|
| 331 |
+
// Fix escaped HTML in anchor spans with various escaping patterns
|
| 332 |
+
// Pattern 1: \<span id="..." style="..."\>\</span\>
|
| 333 |
+
content = content.replace(/\\<span id="([^"]*)" style="([^"]*)"\\>\\<\/span\\>/g, (match, id, style) => {
|
| 334 |
+
fixedCount++;
|
| 335 |
+
// Fix common style issues like "position- absolute;" -> "position: absolute;"
|
| 336 |
+
const cleanStyle = style.replace('position- absolute;', 'position: absolute;');
|
| 337 |
+
return `<span id="${id}" style="${cleanStyle}"></span>`;
|
| 338 |
+
});
|
| 339 |
+
|
| 340 |
+
// Pattern 2: \<span class="..."\>...\</span\>
|
| 341 |
+
content = content.replace(/\\<span class="([^"]*)"\\>([^\\]+)\\<\/span\\>/g, (match, className, text) => {
|
| 342 |
+
fixedCount++;
|
| 343 |
+
// Remove numbering like (1), (2), (3) from highlight spans
|
| 344 |
+
let cleanText = text;
|
| 345 |
+
if (className === 'highlight') {
|
| 346 |
+
cleanText = text.replace(/^\(\d+\)\s*/, '');
|
| 347 |
+
}
|
| 348 |
+
return `<span class="${className}">${cleanText}</span>`;
|
| 349 |
+
});
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
if (fixedCount > 0) {
|
| 353 |
+
console.log(` ✅ Fixed ${fixedCount} escaped span(s)`);
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
return content;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
function cleanHighlightNumbering(content) {
|
| 360 |
+
console.log(' 🔢 Removing numbering from highlight spans...');
|
| 361 |
+
|
| 362 |
+
let cleanedCount = 0;
|
| 363 |
+
// Clean numbering from non-escaped highlight spans too
|
| 364 |
+
content = content.replace(/<span class="highlight">(\(\d+\)\s*)([^<]+)<\/span>/g, (match, numbering, text) => {
|
| 365 |
+
cleanedCount++;
|
| 366 |
+
return `<span class="highlight">${text}</span>`;
|
| 367 |
+
});
|
| 368 |
+
|
| 369 |
+
if (cleanedCount > 0) {
|
| 370 |
+
console.log(` ✅ Removed numbering from ${cleanedCount} highlight span(s)`);
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
return content;
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
function transformReferenceLinks(content) {
|
| 377 |
console.log(' 🔗 Transforming reference links...');
|
| 378 |
|
|
|
|
| 593 |
processedContent = transformImages(processedContent);
|
| 594 |
processedContent = transformStyledSpans(processedContent);
|
| 595 |
processedContent = transformReferenceLinks(processedContent);
|
| 596 |
+
processedContent = fixHtmlEscaping(processedContent);
|
| 597 |
+
processedContent = cleanHighlightNumbering(processedContent);
|
| 598 |
processedContent = cleanFigcaptionNewlines(processedContent);
|
| 599 |
|
| 600 |
// Add component imports at the end
|
app/scripts/latex-to-markdown/old-latex-converter/README.md
DELETED
|
@@ -1,107 +0,0 @@
|
|
| 1 |
-
# Convertisseur LaTeX vers Markdown
|
| 2 |
-
|
| 3 |
-
Conversion robuste de projets LaTeX complexes vers Markdown/MDX pour Astro.
|
| 4 |
-
|
| 5 |
-
## 🚀 Usage rapide
|
| 6 |
-
|
| 7 |
-
```bash
|
| 8 |
-
# Conversion standard
|
| 9 |
-
node scripts/latex-converter/index.mjs
|
| 10 |
-
|
| 11 |
-
# Avec nettoyage du dossier de sortie
|
| 12 |
-
node scripts/latex-converter/index.mjs --clean
|
| 13 |
-
|
| 14 |
-
# Chemins personnalisés
|
| 15 |
-
node scripts/latex-converter/index.mjs \
|
| 16 |
-
--input=../tools/latex-to-markdown/input \
|
| 17 |
-
--output=src/content \
|
| 18 |
-
--clean
|
| 19 |
-
```
|
| 20 |
-
|
| 21 |
-
## 📁 Architecture
|
| 22 |
-
|
| 23 |
-
```
|
| 24 |
-
scripts/latex-converter/
|
| 25 |
-
├── index.mjs # Point d'entrée principal
|
| 26 |
-
├── config.mjs # Configuration et mappings
|
| 27 |
-
├── preprocessor.mjs # Préprocesseur LaTeX
|
| 28 |
-
├── bibliography-cleaner.mjs # Nettoyeur de bibliographie
|
| 29 |
-
├── converter.mjs # Convertisseur principal
|
| 30 |
-
└── README.md # Documentation
|
| 31 |
-
```
|
| 32 |
-
|
| 33 |
-
## 🔧 Fonctionnalités
|
| 34 |
-
|
| 35 |
-
### ✅ Ce qui est géré
|
| 36 |
-
- **412+ commandes personnalisées** (math, text, projet-spécifique)
|
| 37 |
-
- **Environnements custom** (`tldr`, `callout`, `finding`)
|
| 38 |
-
- **41 figures** avec organisation par chapitre
|
| 39 |
-
- **2247 entrées bibliographiques** avec nettoyage automatique
|
| 40 |
-
- **Citations** et références croisées
|
| 41 |
-
- **Structure MDX** compatible Astro
|
| 42 |
-
|
| 43 |
-
### 🛠️ Transformations automatiques
|
| 44 |
-
|
| 45 |
-
#### Commandes LaTeX → Markdown
|
| 46 |
-
```latex
|
| 47 |
-
\lerobot → **LeRobot**
|
| 48 |
-
\lerobotdataset → `LeRobotDataset`
|
| 49 |
-
\huggingface → 🤗 **Hugging Face**
|
| 50 |
-
\eg → e.g.,
|
| 51 |
-
\X → \mathcal{X}
|
| 52 |
-
```
|
| 53 |
-
|
| 54 |
-
#### Environnements → Callouts
|
| 55 |
-
```latex
|
| 56 |
-
\begin{tldr}
|
| 57 |
-
Content here
|
| 58 |
-
\end{tldr}
|
| 59 |
-
```
|
| 60 |
-
→
|
| 61 |
-
```markdown
|
| 62 |
-
> **TL;DR**
|
| 63 |
-
> Content here
|
| 64 |
-
```
|
| 65 |
-
|
| 66 |
-
#### Bibliographie
|
| 67 |
-
- `{{Title}}` → `Title` (suppression doubles accolades)
|
| 68 |
-
- `\&` → `&` (déséchappement)
|
| 69 |
-
- Nettoyage général du formatting
|
| 70 |
-
|
| 71 |
-
## 📊 Statistiques exemple
|
| 72 |
-
|
| 73 |
-
```
|
| 74 |
-
⏱️ Time: 1.02s
|
| 75 |
-
📄 Files: 9 sections converties
|
| 76 |
-
🖼️ Figures: 41 images copiées
|
| 77 |
-
📚 Citations: Detection automatique
|
| 78 |
-
🔧 Commands replaced: 34 transformations
|
| 79 |
-
📦 Environments processed: 4 environnements
|
| 80 |
-
📚 Bibliography: 159 entries, 403 fixes
|
| 81 |
-
```
|
| 82 |
-
|
| 83 |
-
## 🎯 Résultat
|
| 84 |
-
|
| 85 |
-
Structure finale dans `src/content/`:
|
| 86 |
-
```
|
| 87 |
-
src/content/
|
| 88 |
-
├── article.mdx # Article principal avec imports
|
| 89 |
-
├── bibliography.bib # Bibliographie nettoyée
|
| 90 |
-
├── chapters/ # Sections converties
|
| 91 |
-
│ ├── 00_abstract.mdx
|
| 92 |
-
│ ├── 01_introduction.mdx
|
| 93 |
-
│ └── ...
|
| 94 |
-
└── assets/image/ # Figures organisées
|
| 95 |
-
├── ch1/
|
| 96 |
-
├── ch2/
|
| 97 |
-
└── ...
|
| 98 |
-
```
|
| 99 |
-
|
| 100 |
-
## ⚠️ Prérequis
|
| 101 |
-
|
| 102 |
-
- **Pandoc** installé (`brew install pandoc`)
|
| 103 |
-
- Node.js avec support ESM
|
| 104 |
-
|
| 105 |
-
## 🔍 Debugging
|
| 106 |
-
|
| 107 |
-
Les warnings sont normaux pour les sections avec math complexe non supporté par Pandoc. Le convertisseur continue et produit un résultat utilisable.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/bibliography-cleaner.mjs
DELETED
|
@@ -1,123 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Nettoyeur de bibliographie - Corrige les doubles accolades et problèmes de formatting
|
| 3 |
-
*/
|
| 4 |
-
|
| 5 |
-
export class BibliographyCleaner {
|
| 6 |
-
constructor() {
|
| 7 |
-
this.stats = {
|
| 8 |
-
entriesProcessed: 0,
|
| 9 |
-
doubleAccoladesFixed: 0,
|
| 10 |
-
escapedCharsFixed: 0,
|
| 11 |
-
mathExpressionsFixed: 0
|
| 12 |
-
};
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
cleanContent(content) {
|
| 16 |
-
let cleaned = content;
|
| 17 |
-
|
| 18 |
-
// Count entries
|
| 19 |
-
this.stats.entriesProcessed = (content.match(/@\w+\{/g) || []).length;
|
| 20 |
-
|
| 21 |
-
// Fix double accolades
|
| 22 |
-
cleaned = this.fixDoubleAccolades(cleaned);
|
| 23 |
-
|
| 24 |
-
// Fix escaped characters
|
| 25 |
-
cleaned = this.fixEscapedCharacters(cleaned);
|
| 26 |
-
|
| 27 |
-
// Fix malformed math expressions
|
| 28 |
-
cleaned = this.fixMathExpressions(cleaned);
|
| 29 |
-
|
| 30 |
-
// General cleanup
|
| 31 |
-
cleaned = this.generalCleanup(cleaned);
|
| 32 |
-
|
| 33 |
-
return cleaned;
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
fixDoubleAccolades(content) {
|
| 37 |
-
let fixed = content;
|
| 38 |
-
let fixCount = 0;
|
| 39 |
-
|
| 40 |
-
fixed = fixed.replace(/\{\{([^}]+)\}\}/g, (match, inner) => {
|
| 41 |
-
fixCount++;
|
| 42 |
-
|
| 43 |
-
// Keep accolades for important terms
|
| 44 |
-
if (/^[A-Z][A-Z0-9]*$/.test(inner) || // Acronyms like "API", "ML"
|
| 45 |
-
/^[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*$/.test(inner) || // Proper nouns
|
| 46 |
-
inner.includes('++') || // Languages like "C++"
|
| 47 |
-
inner.includes('$') // Math
|
| 48 |
-
) {
|
| 49 |
-
return `{${inner}}`;
|
| 50 |
-
}
|
| 51 |
-
|
| 52 |
-
return inner;
|
| 53 |
-
});
|
| 54 |
-
|
| 55 |
-
this.stats.doubleAccoladesFixed = fixCount;
|
| 56 |
-
return fixed;
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
fixEscapedCharacters(content) {
|
| 60 |
-
let fixed = content;
|
| 61 |
-
let fixCount = 0;
|
| 62 |
-
|
| 63 |
-
const replacements = [
|
| 64 |
-
[/\\&/g, '&'],
|
| 65 |
-
[/\\\$/g, '$'],
|
| 66 |
-
[/\\%/g, '%'],
|
| 67 |
-
[/\\#/g, '#'],
|
| 68 |
-
[/\\_/g, '_']
|
| 69 |
-
];
|
| 70 |
-
|
| 71 |
-
for (const [pattern, replacement] of replacements) {
|
| 72 |
-
const matches = fixed.match(pattern);
|
| 73 |
-
if (matches) {
|
| 74 |
-
fixCount += matches.length;
|
| 75 |
-
fixed = fixed.replace(pattern, replacement);
|
| 76 |
-
}
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
this.stats.escapedCharsFixed = fixCount;
|
| 80 |
-
return fixed;
|
| 81 |
-
}
|
| 82 |
-
|
| 83 |
-
fixMathExpressions(content) {
|
| 84 |
-
let fixed = content;
|
| 85 |
-
let fixCount = 0;
|
| 86 |
-
|
| 87 |
-
// Fix specific problematic patterns
|
| 88 |
-
const mathFixes = [
|
| 89 |
-
// ${$\pi_$}0$ → $\pi_0$
|
| 90 |
-
[/\$\{\$\\pi_\$\}([0-9]+)\$/g, '$\\pi_$1$'],
|
| 91 |
-
// ${$something$}text$ → $something_text$
|
| 92 |
-
[/\$\{\$([^}]+)\$\}([^$]*)\$/g, '$$$1_$2$$'],
|
| 93 |
-
// Fix other malformed patterns
|
| 94 |
-
[/\$\{([^}]+)\}\$/g, '$$$1$$'],
|
| 95 |
-
[/\$([^$]*)\\\$([^$]*)\$/g, '$$$1$2$$']
|
| 96 |
-
];
|
| 97 |
-
|
| 98 |
-
for (const [pattern, replacement] of mathFixes) {
|
| 99 |
-
const matches = fixed.match(pattern);
|
| 100 |
-
if (matches) {
|
| 101 |
-
fixCount += matches.length;
|
| 102 |
-
fixed = fixed.replace(pattern, replacement);
|
| 103 |
-
}
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
-
this.stats.mathExpressionsFixed = fixCount;
|
| 107 |
-
return fixed;
|
| 108 |
-
}
|
| 109 |
-
|
| 110 |
-
generalCleanup(content) {
|
| 111 |
-
let cleaned = content;
|
| 112 |
-
|
| 113 |
-
// Normalize whitespace
|
| 114 |
-
cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
|
| 115 |
-
cleaned = cleaned.trim() + '\n';
|
| 116 |
-
|
| 117 |
-
return cleaned;
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
getStats() {
|
| 121 |
-
return this.stats;
|
| 122 |
-
}
|
| 123 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/config.mjs
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Configuration et mappings pour la conversion LaTeX vers Markdown
|
| 3 |
-
*/
|
| 4 |
-
|
| 5 |
-
export const COMMAND_MAPPINGS = {
|
| 6 |
-
// Math shortcuts
|
| 7 |
-
'X': '\\mathcal{X}',
|
| 8 |
-
'Z': '\\mathcal{Z}',
|
| 9 |
-
'G': '\\mathcal{G}',
|
| 10 |
-
'D': '\\mathcal{D}',
|
| 11 |
-
'F': '\\mathcal{F}',
|
| 12 |
-
'R': '\\mathcal{R}',
|
| 13 |
-
|
| 14 |
-
// Text commands
|
| 15 |
-
'eg': 'e.g.,',
|
| 16 |
-
'ie': 'i.e.,',
|
| 17 |
-
'versus': 'vs.',
|
| 18 |
-
'wrt': 'w.r.t.',
|
| 19 |
-
'etc': 'etc.',
|
| 20 |
-
|
| 21 |
-
// Project-specific
|
| 22 |
-
'lerobot': '**LeRobot**',
|
| 23 |
-
'lerobotdataset': '`LeRobotDataset`',
|
| 24 |
-
'huggingface': '🤗 **Hugging Face**',
|
| 25 |
-
|
| 26 |
-
// Functions
|
| 27 |
-
'qfunction': 'Q-function',
|
| 28 |
-
'qopt': 'Q^*'
|
| 29 |
-
};
|
| 30 |
-
|
| 31 |
-
export const ENVIRONMENT_MAPPINGS = {
|
| 32 |
-
'tldr': {
|
| 33 |
-
start: '> **TL;DR**\n> ',
|
| 34 |
-
end: '\n',
|
| 35 |
-
type: 'callout'
|
| 36 |
-
},
|
| 37 |
-
'callout': {
|
| 38 |
-
start: '> **Note**\n> ',
|
| 39 |
-
end: '\n',
|
| 40 |
-
type: 'callout'
|
| 41 |
-
},
|
| 42 |
-
'finding': {
|
| 43 |
-
start: '> **🔍 Finding**: ',
|
| 44 |
-
end: '\n',
|
| 45 |
-
type: 'finding'
|
| 46 |
-
}
|
| 47 |
-
};
|
| 48 |
-
|
| 49 |
-
export const PANDOC_OPTIONS = [
|
| 50 |
-
'--from=latex',
|
| 51 |
-
'--to=markdown',
|
| 52 |
-
'--wrap=preserve',
|
| 53 |
-
'--markdown-headings=atx'
|
| 54 |
-
];
|
| 55 |
-
|
| 56 |
-
export const DEFAULT_PATHS = {
|
| 57 |
-
input: '../tools/latex-to-markdown/input',
|
| 58 |
-
output: 'src/content'
|
| 59 |
-
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/converter.mjs
DELETED
|
@@ -1,456 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Convertisseur principal LaTeX vers Markdown
|
| 3 |
-
*/
|
| 4 |
-
|
| 5 |
-
import { spawn } from 'node:child_process';
|
| 6 |
-
import { promises as fs } from 'node:fs';
|
| 7 |
-
import { resolve, dirname, basename, join } from 'node:path';
|
| 8 |
-
|
| 9 |
-
import { LaTeXPreprocessor } from './preprocessor.mjs';
|
| 10 |
-
import { RobustLaTeXPreprocessor } from './robust-preprocessor.mjs';
|
| 11 |
-
import { BibliographyCleaner } from './bibliography-cleaner.mjs';
|
| 12 |
-
import { DEFAULT_PATHS, PANDOC_OPTIONS } from './config.mjs';
|
| 13 |
-
|
| 14 |
-
export class LaTeXConverter {
|
| 15 |
-
constructor() {
|
| 16 |
-
this.preprocessor = new LaTeXPreprocessor();
|
| 17 |
-
this.robustPreprocessor = new RobustLaTeXPreprocessor();
|
| 18 |
-
this.bibCleaner = new BibliographyCleaner();
|
| 19 |
-
this.stats = {
|
| 20 |
-
totalFiles: 0,
|
| 21 |
-
totalFigures: 0,
|
| 22 |
-
totalCitations: 0,
|
| 23 |
-
conversionTime: 0
|
| 24 |
-
};
|
| 25 |
-
this.warnings = [];
|
| 26 |
-
this.errors = [];
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
async convert(inputDir, outputDir, options = {}) {
|
| 30 |
-
const startTime = Date.now();
|
| 31 |
-
|
| 32 |
-
console.log('🚀 LaTeX to Markdown Converter');
|
| 33 |
-
console.log(`📁 Input: ${inputDir}`);
|
| 34 |
-
console.log(`📁 Output: ${outputDir}`);
|
| 35 |
-
|
| 36 |
-
try {
|
| 37 |
-
// Setup
|
| 38 |
-
await this.setupOutput(outputDir, options.clean);
|
| 39 |
-
|
| 40 |
-
// Convert sections
|
| 41 |
-
await this.convertSections(inputDir, outputDir);
|
| 42 |
-
|
| 43 |
-
// Handle assets
|
| 44 |
-
await this.handleAssets(inputDir, outputDir);
|
| 45 |
-
|
| 46 |
-
// Create main article
|
| 47 |
-
await this.createMainArticle(outputDir);
|
| 48 |
-
|
| 49 |
-
// Generate report
|
| 50 |
-
this.stats.conversionTime = Date.now() - startTime;
|
| 51 |
-
this.generateReport();
|
| 52 |
-
|
| 53 |
-
console.log('🎉 Conversion completed successfully!');
|
| 54 |
-
return true;
|
| 55 |
-
|
| 56 |
-
} catch (error) {
|
| 57 |
-
this.errors.push(`Conversion failed: ${error.message}`);
|
| 58 |
-
throw error;
|
| 59 |
-
}
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
async setupOutput(outputDir, clean = false) {
|
| 63 |
-
if (clean) {
|
| 64 |
-
console.log('🧹 Cleaning output directory...');
|
| 65 |
-
await fs.rm(outputDir, { recursive: true, force: true });
|
| 66 |
-
}
|
| 67 |
-
|
| 68 |
-
await fs.mkdir(outputDir, { recursive: true });
|
| 69 |
-
await fs.mkdir(join(outputDir, 'chapters'), { recursive: true });
|
| 70 |
-
await fs.mkdir(join(outputDir, 'assets', 'image'), { recursive: true });
|
| 71 |
-
}
|
| 72 |
-
|
| 73 |
-
async convertSections(inputDir, outputDir) {
|
| 74 |
-
console.log('\n📄 Converting sections...');
|
| 75 |
-
|
| 76 |
-
const sectionsDir = join(inputDir, 'sections');
|
| 77 |
-
const outputChaptersDir = join(outputDir, 'chapters');
|
| 78 |
-
|
| 79 |
-
try {
|
| 80 |
-
const files = await fs.readdir(sectionsDir);
|
| 81 |
-
const texFiles = files.filter(f => f.endsWith('.tex'));
|
| 82 |
-
|
| 83 |
-
for (const file of texFiles) {
|
| 84 |
-
const inputPath = join(sectionsDir, file);
|
| 85 |
-
const outputPath = join(outputChaptersDir, file.replace('.tex', '.mdx'));
|
| 86 |
-
|
| 87 |
-
console.log(` Converting ${file}...`);
|
| 88 |
-
await this.convertSingleFile(inputPath, outputPath);
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
this.stats.totalFiles = texFiles.length;
|
| 92 |
-
|
| 93 |
-
} catch (error) {
|
| 94 |
-
this.errors.push(`Section conversion failed: ${error.message}`);
|
| 95 |
-
}
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
async convertSingleFile(inputPath, outputPath) {
|
| 99 |
-
try {
|
| 100 |
-
// Read and preprocess with robust preprocessor
|
| 101 |
-
let content = await fs.readFile(inputPath, 'utf-8');
|
| 102 |
-
content = this.robustPreprocessor.preprocessContent(content, basename(inputPath));
|
| 103 |
-
|
| 104 |
-
// Create temp file for Pandoc
|
| 105 |
-
const tempPath = inputPath + '.temp';
|
| 106 |
-
await fs.writeFile(tempPath, content);
|
| 107 |
-
|
| 108 |
-
// Convert with Pandoc
|
| 109 |
-
const pandocArgs = [tempPath, '-o', outputPath, ...PANDOC_OPTIONS];
|
| 110 |
-
await this.runPandoc(pandocArgs);
|
| 111 |
-
|
| 112 |
-
// Cleanup
|
| 113 |
-
await fs.unlink(tempPath);
|
| 114 |
-
|
| 115 |
-
// Post-process
|
| 116 |
-
await this.postProcessFile(outputPath);
|
| 117 |
-
|
| 118 |
-
} catch (error) {
|
| 119 |
-
this.warnings.push(`Failed to convert ${basename(inputPath)}: ${error.message}`);
|
| 120 |
-
}
|
| 121 |
-
}
|
| 122 |
-
|
| 123 |
-
async runPandoc(args) {
|
| 124 |
-
return new Promise((resolve, reject) => {
|
| 125 |
-
const child = spawn('pandoc', args, {
|
| 126 |
-
stdio: ['pipe', 'pipe', 'pipe'],
|
| 127 |
-
shell: false
|
| 128 |
-
});
|
| 129 |
-
|
| 130 |
-
let stderr = '';
|
| 131 |
-
child.stderr.on('data', (data) => {
|
| 132 |
-
stderr += data.toString();
|
| 133 |
-
});
|
| 134 |
-
|
| 135 |
-
child.on('error', reject);
|
| 136 |
-
child.on('exit', (code) => {
|
| 137 |
-
if (code === 0) {
|
| 138 |
-
resolve();
|
| 139 |
-
} else {
|
| 140 |
-
reject(new Error(`Pandoc failed: ${stderr}`));
|
| 141 |
-
}
|
| 142 |
-
});
|
| 143 |
-
});
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
fixMalformedMath(content) {
|
| 147 |
-
let fixed = content;
|
| 148 |
-
|
| 149 |
-
// Fix problematic expressions like ${$\pi_$}0$
|
| 150 |
-
fixed = fixed.replace(/\$\{\$([^$}]+)\$\}([^$]*)\$/g, '$$$1_{$2}$$');
|
| 151 |
-
|
| 152 |
-
// Fix nested math delimiters
|
| 153 |
-
fixed = fixed.replace(/\$\$([^$]*)\$([^$]*)\$([^$]*)\$\$/g, '$$$1 $2 $3$$');
|
| 154 |
-
|
| 155 |
-
// Fix incomplete math expressions
|
| 156 |
-
fixed = fixed.replace(/\$([^$]*)\{([^}]*)\$([^$]*)\$/g, '$$$1\\{$2\\}$3$$');
|
| 157 |
-
|
| 158 |
-
// Fix math with unescaped braces
|
| 159 |
-
fixed = fixed.replace(/\$([^$]*)\{([^}]*)\}([^$]*)\$/g, '$$$1\\{$2\\}$3$$');
|
| 160 |
-
|
| 161 |
-
// Fix common pi expressions
|
| 162 |
-
fixed = fixed.replace(/\$\\pi_\$([0-9]+)\$/g, '$\\pi_$1$');
|
| 163 |
-
fixed = fixed.replace(/\$\{\\pi_\}([0-9]+)\$/g, '$\\pi_$1$');
|
| 164 |
-
|
| 165 |
-
// Fix doubled dollar signs (but preserve display math)
|
| 166 |
-
fixed = fixed.replace(/\$\$\$+/g, '$$');
|
| 167 |
-
|
| 168 |
-
// Ensure proper spacing around math
|
| 169 |
-
fixed = fixed.replace(/([a-zA-Z])\$([^$]+)\$([a-zA-Z])/g, '$1 $$$2$$ $3');
|
| 170 |
-
|
| 171 |
-
return fixed;
|
| 172 |
-
}
|
| 173 |
-
|
| 174 |
-
fixMDXUrls(content) {
|
| 175 |
-
let fixed = content;
|
| 176 |
-
|
| 177 |
-
// Fix all escaped markdown that should be unescaped for MDX
|
| 178 |
-
fixed = fixed.replace(/\\\*/g, '*');
|
| 179 |
-
fixed = fixed.replace(/\\\[/g, '[');
|
| 180 |
-
fixed = fixed.replace(/\\\]/g, ']');
|
| 181 |
-
fixed = fixed.replace(/\\\(/g, '(');
|
| 182 |
-
fixed = fixed.replace(/\\\)/g, ')');
|
| 183 |
-
fixed = fixed.replace(/\\>/g, '>');
|
| 184 |
-
fixed = fixed.replace(/\\!/g, '!');
|
| 185 |
-
|
| 186 |
-
// Fix angle bracket URLs that are MDX-incompatible
|
| 187 |
-
fixed = fixed.replace(/\*\*<(https?:\/\/[^>]+)>\*\*/g, '**[$1]($1)**');
|
| 188 |
-
fixed = fixed.replace(/<(https?:\/\/[^>]+)>/g, '[$1]($1)');
|
| 189 |
-
|
| 190 |
-
// Fix malformed math expressions with escaped braces
|
| 191 |
-
fixed = fixed.replace(/\\\{/g, '{');
|
| 192 |
-
fixed = fixed.replace(/\\\}/g, '}');
|
| 193 |
-
|
| 194 |
-
// Escape all braces in math expressions for MDX compatibility
|
| 195 |
-
fixed = fixed.replace(/\$([^$]*)\$/g, (match, mathContent) => {
|
| 196 |
-
const escaped = mathContent.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
|
| 197 |
-
return `$${escaped}$`;
|
| 198 |
-
});
|
| 199 |
-
|
| 200 |
-
fixed = fixed.replace(/\$\$([^$]*)\$\$/g, (match, mathContent) => {
|
| 201 |
-
const escaped = mathContent.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
|
| 202 |
-
return `$$${escaped}$$`;
|
| 203 |
-
});
|
| 204 |
-
|
| 205 |
-
// Fix Section references that are malformed
|
| 206 |
-
fixed = fixed.replace(/Section\s+([a-zA-Z-]+:[a-zA-Z0-9-]+)\\/g, 'the referenced figure');
|
| 207 |
-
fixed = fixed.replace(/Figure\s+Section\s+([a-zA-Z-]+:[a-zA-Z0-9-]+)\\/g, 'the referenced figure');
|
| 208 |
-
|
| 209 |
-
return fixed;
|
| 210 |
-
}
|
| 211 |
-
|
| 212 |
-
async postProcessFile(filePath) {
|
| 213 |
-
try {
|
| 214 |
-
let content = await fs.readFile(filePath, 'utf-8');
|
| 215 |
-
|
| 216 |
-
// Fix common issues
|
| 217 |
-
content = content.replace(/\\\\#/g, '#');
|
| 218 |
-
content = content.replace(/\\\\!/g, '!');
|
| 219 |
-
content = content.replace(/\\\\\*/g, '*');
|
| 220 |
-
|
| 221 |
-
// Fix citations
|
| 222 |
-
content = content.replace(/\\citep\{([^}]+)\}/g, '[@$1]');
|
| 223 |
-
content = content.replace(/\\citet\{([^}]+)\}/g, '@$1');
|
| 224 |
-
content = content.replace(/\\cite\{([^}]+)\}/g, '[@$1]');
|
| 225 |
-
|
| 226 |
-
// Remove section labels from headers
|
| 227 |
-
content = content.replace(/^(#{1,6}.*?)\s*\{#[^}]+\}/gm, '$1');
|
| 228 |
-
|
| 229 |
-
// Fix complex LaTeX references like [\[sec:xxx\]](#sec:xxx){reference-type="ref" reference="sec:xxx"}
|
| 230 |
-
content = content.replace(/\[\\?\[([^\]]+)\\?\]\]\(#[^)]+\)\{[^}]*reference[^}]*\}/g, 'Section $1');
|
| 231 |
-
|
| 232 |
-
// Fix simple references [\[ref\]](#ref)
|
| 233 |
-
content = content.replace(/\[\\?\[([^\]]+)\\?\]\]\(#[^)]+\)/g, '$1');
|
| 234 |
-
|
| 235 |
-
// Fix remaining malformed references like "Section Section sec:classical\"
|
| 236 |
-
content = content.replace(/Section\s+Section\s+([^\\]+)\\/g, 'Section $1');
|
| 237 |
-
content = content.replace(/Section\s+Section\s+([^\\]+)/g, 'Section $1');
|
| 238 |
-
|
| 239 |
-
// Remove remaining LaTeX labels and references
|
| 240 |
-
content = content.replace(/\\label\{[^}]+\}/g, '');
|
| 241 |
-
content = content.replace(/\\ref\{[^}]+\}/g, '[Reference]');
|
| 242 |
-
|
| 243 |
-
// Clean up section references with colons (be more specific)
|
| 244 |
-
content = content.replace(/Section\s+sec:([a-zA-Z-]+)/g, 'the following section');
|
| 245 |
-
|
| 246 |
-
// Fix broken section references that got mangled
|
| 247 |
-
content = content.replace(/Section\s+secs[a-zA-Z]*\s+/g, 'The following section ');
|
| 248 |
-
content = content.replace(/Section\s+sec[a-zA-Z]*\s+/g, 'The following section ');
|
| 249 |
-
|
| 250 |
-
// Count citations
|
| 251 |
-
const citations = content.match(/\[@[^\]]+\]/g) || [];
|
| 252 |
-
this.stats.totalCitations += citations.length;
|
| 253 |
-
|
| 254 |
-
// Fix malformed math expressions
|
| 255 |
-
content = this.fixMalformedMath(content);
|
| 256 |
-
|
| 257 |
-
// Fix MDX-incompatible URLs (post-pandoc)
|
| 258 |
-
content = this.fixMDXUrls(content);
|
| 259 |
-
|
| 260 |
-
// Final cleanup
|
| 261 |
-
content = content.replace(/\n{3,}/g, '\n\n');
|
| 262 |
-
content = content.replace(/\\texttt\{([^}]+)\}/g, '`$1`');
|
| 263 |
-
content = content.replace(/\\textbf\{([^}]+)\}/g, '**$1**');
|
| 264 |
-
content = content.replace(/\\emph\{([^}]+)\}/g, '*$1*');
|
| 265 |
-
content = content.trim();
|
| 266 |
-
|
| 267 |
-
await fs.writeFile(filePath, content);
|
| 268 |
-
|
| 269 |
-
} catch (error) {
|
| 270 |
-
this.warnings.push(`Post-processing failed for ${basename(filePath)}: ${error.message}`);
|
| 271 |
-
}
|
| 272 |
-
}
|
| 273 |
-
|
| 274 |
-
async handleAssets(inputDir, outputDir) {
|
| 275 |
-
console.log('\n🖼️ Handling assets...');
|
| 276 |
-
|
| 277 |
-
// Copy figures
|
| 278 |
-
try {
|
| 279 |
-
const figuresInputDir = join(inputDir, 'figures');
|
| 280 |
-
const assetsOutputDir = join(outputDir, 'assets', 'image');
|
| 281 |
-
|
| 282 |
-
await this.copyDirectoryRecursive(figuresInputDir, assetsOutputDir);
|
| 283 |
-
this.stats.totalFigures = await this.countFiles(assetsOutputDir, /\.(png|jpg|jpeg|pdf|svg)$/i);
|
| 284 |
-
|
| 285 |
-
console.log(` 📊 Copied ${this.stats.totalFigures} figures`);
|
| 286 |
-
} catch (error) {
|
| 287 |
-
this.warnings.push(`Could not copy figures: ${error.message}`);
|
| 288 |
-
}
|
| 289 |
-
|
| 290 |
-
// Handle bibliography
|
| 291 |
-
try {
|
| 292 |
-
const bibPath = join(inputDir, 'main.bib');
|
| 293 |
-
const outputBibPath = join(outputDir, 'bibliography.bib');
|
| 294 |
-
|
| 295 |
-
// Copy and clean bibliography
|
| 296 |
-
let bibContent = await fs.readFile(bibPath, 'utf-8');
|
| 297 |
-
bibContent = this.bibCleaner.cleanContent(bibContent);
|
| 298 |
-
await fs.writeFile(outputBibPath, bibContent);
|
| 299 |
-
|
| 300 |
-
const bibStats = this.bibCleaner.getStats();
|
| 301 |
-
console.log(` 📚 Bibliography: ${bibStats.entriesProcessed} entries, ${bibStats.doubleAccoladesFixed} fixes, ${bibStats.mathExpressionsFixed} math fixes`);
|
| 302 |
-
|
| 303 |
-
} catch (error) {
|
| 304 |
-
this.warnings.push(`Could not handle bibliography: ${error.message}`);
|
| 305 |
-
}
|
| 306 |
-
}
|
| 307 |
-
|
| 308 |
-
async copyDirectoryRecursive(src, dest) {
|
| 309 |
-
await fs.mkdir(dest, { recursive: true });
|
| 310 |
-
const entries = await fs.readdir(src, { withFileTypes: true });
|
| 311 |
-
|
| 312 |
-
for (const entry of entries) {
|
| 313 |
-
const srcPath = join(src, entry.name);
|
| 314 |
-
const destPath = join(dest, entry.name);
|
| 315 |
-
|
| 316 |
-
if (entry.isDirectory()) {
|
| 317 |
-
await this.copyDirectoryRecursive(srcPath, destPath);
|
| 318 |
-
} else {
|
| 319 |
-
await fs.copyFile(srcPath, destPath);
|
| 320 |
-
}
|
| 321 |
-
}
|
| 322 |
-
}
|
| 323 |
-
|
| 324 |
-
async countFiles(dir, pattern) {
|
| 325 |
-
let count = 0;
|
| 326 |
-
try {
|
| 327 |
-
const entries = await fs.readdir(dir, { withFileTypes: true });
|
| 328 |
-
|
| 329 |
-
for (const entry of entries) {
|
| 330 |
-
if (entry.isDirectory()) {
|
| 331 |
-
count += await this.countFiles(join(dir, entry.name), pattern);
|
| 332 |
-
} else if (pattern.test(entry.name)) {
|
| 333 |
-
count++;
|
| 334 |
-
}
|
| 335 |
-
}
|
| 336 |
-
} catch {
|
| 337 |
-
// Directory doesn't exist
|
| 338 |
-
}
|
| 339 |
-
|
| 340 |
-
return count;
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
-
async createMainArticle(outputDir) {
|
| 344 |
-
console.log('\n📝 Creating main article...');
|
| 345 |
-
|
| 346 |
-
try {
|
| 347 |
-
const chaptersDir = join(outputDir, 'chapters');
|
| 348 |
-
const files = await fs.readdir(chaptersDir);
|
| 349 |
-
const mdxFiles = files.filter(f => f.endsWith('.mdx')).sort();
|
| 350 |
-
|
| 351 |
-
const frontmatter = this.generateFrontmatter();
|
| 352 |
-
const { imports, components } = this.generateChapterImports(mdxFiles);
|
| 353 |
-
|
| 354 |
-
const articleContent = frontmatter + imports + '\n\n' + components;
|
| 355 |
-
|
| 356 |
-
const articlePath = join(outputDir, 'article.mdx');
|
| 357 |
-
await fs.writeFile(articlePath, articleContent);
|
| 358 |
-
|
| 359 |
-
console.log(` 📄 Created article.mdx with ${mdxFiles.length} chapters`);
|
| 360 |
-
|
| 361 |
-
} catch (error) {
|
| 362 |
-
this.errors.push(`Failed to create main article: ${error.message}`);
|
| 363 |
-
}
|
| 364 |
-
}
|
| 365 |
-
|
| 366 |
-
generateFrontmatter() {
|
| 367 |
-
const now = new Date().toISOString().split('T')[0];
|
| 368 |
-
|
| 369 |
-
return `---
|
| 370 |
-
title: "Robot Learning: A Tutorial"
|
| 371 |
-
subtitle: "From Classical Robotics to Foundation Models"
|
| 372 |
-
description: "A comprehensive guide to modern robot learning techniques"
|
| 373 |
-
date: "${now}"
|
| 374 |
-
authors:
|
| 375 |
-
- name: "Francesco Capuano"
|
| 376 |
-
affiliations: [1, 2]
|
| 377 |
-
- name: "Adil Zouitine"
|
| 378 |
-
affiliations: [2]
|
| 379 |
-
- name: "Pepijn Kooijmans"
|
| 380 |
-
affiliations: [2]
|
| 381 |
-
- name: "Thomas Wolf"
|
| 382 |
-
affiliations: [2]
|
| 383 |
-
- name: "Michel Aractingi"
|
| 384 |
-
affiliations: [2]
|
| 385 |
-
affiliations:
|
| 386 |
-
- name: "École Normale Supérieure Paris-Saclay"
|
| 387 |
-
url: "https://ens-paris-saclay.fr"
|
| 388 |
-
- name: "Hugging Face"
|
| 389 |
-
url: "https://huggingface.co"
|
| 390 |
-
tags:
|
| 391 |
-
- robotics
|
| 392 |
-
- machine-learning
|
| 393 |
-
- tutorial
|
| 394 |
-
bibliography: bibliography.bib
|
| 395 |
-
converted_from: "LaTeX"
|
| 396 |
-
---
|
| 397 |
-
|
| 398 |
-
`;
|
| 399 |
-
}
|
| 400 |
-
|
| 401 |
-
generateChapterImports(mdxFiles) {
|
| 402 |
-
let imports = '';
|
| 403 |
-
let components = '';
|
| 404 |
-
|
| 405 |
-
mdxFiles.forEach(file => {
|
| 406 |
-
const sectionName = basename(file, '.mdx');
|
| 407 |
-
const componentName = this.formatComponentName(sectionName);
|
| 408 |
-
|
| 409 |
-
imports += `import ${componentName} from "./chapters/${sectionName}.mdx";\n`;
|
| 410 |
-
components += `<${componentName} />\n\n`;
|
| 411 |
-
});
|
| 412 |
-
|
| 413 |
-
return { imports, components };
|
| 414 |
-
}
|
| 415 |
-
|
| 416 |
-
formatComponentName(sectionName) {
|
| 417 |
-
let componentName = sectionName
|
| 418 |
-
.split(/[_-]/)
|
| 419 |
-
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
|
| 420 |
-
.join('');
|
| 421 |
-
|
| 422 |
-
if (/^\d/.test(componentName)) {
|
| 423 |
-
componentName = 'Chapter' + componentName;
|
| 424 |
-
}
|
| 425 |
-
|
| 426 |
-
if (componentName === 'AForword') componentName = 'Foreword';
|
| 427 |
-
if (componentName === 'Chapter00Abstract') componentName = 'Abstract';
|
| 428 |
-
|
| 429 |
-
return componentName;
|
| 430 |
-
}
|
| 431 |
-
|
| 432 |
-
generateReport() {
|
| 433 |
-
console.log('\n📊 Conversion Report:');
|
| 434 |
-
console.log('=====================');
|
| 435 |
-
console.log(`⏱️ Time: ${(this.stats.conversionTime / 1000).toFixed(2)}s`);
|
| 436 |
-
console.log(`📄 Files: ${this.stats.totalFiles}`);
|
| 437 |
-
console.log(`🖼️ Figures: ${this.stats.totalFigures}`);
|
| 438 |
-
console.log(`📚 Citations: ${this.stats.totalCitations}`);
|
| 439 |
-
console.log(`⚠️ Warnings: ${this.warnings.length}`);
|
| 440 |
-
console.log(`❌ Errors: ${this.errors.length}`);
|
| 441 |
-
|
| 442 |
-
const robustStats = this.robustPreprocessor.getStats();
|
| 443 |
-
console.log(`🔧 Commands replaced: ${robustStats.commandsReplaced}`);
|
| 444 |
-
console.log(`📦 Environments processed: ${robustStats.environmentsProcessed}`);
|
| 445 |
-
console.log(`🖼️ Figures processed: ${robustStats.figuresProcessed}`);
|
| 446 |
-
console.log(`📐 Math expressions fixed: ${robustStats.mathExpressionsFixed}`);
|
| 447 |
-
|
| 448 |
-
if (this.warnings.length > 0 && this.warnings.length <= 3) {
|
| 449 |
-
console.log('\n⚠️ Warnings:');
|
| 450 |
-
this.warnings.forEach(w => console.log(` - ${w}`));
|
| 451 |
-
} else if (this.warnings.length > 3) {
|
| 452 |
-
console.log(`\n⚠️ ${this.warnings.length} warnings:`);
|
| 453 |
-
this.warnings.forEach(w => console.log(` - ${w.substring(0, 150)}...`));
|
| 454 |
-
}
|
| 455 |
-
}
|
| 456 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/image-transformer.mjs
DELETED
|
@@ -1,179 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Transformateur d'images : Markdown → ResponsiveImage Astro
|
| 3 |
-
* Convertit les images markdown en composants ResponsiveImage optimisés
|
| 4 |
-
*/
|
| 5 |
-
|
| 6 |
-
import { promises as fs } from 'node:fs';
|
| 7 |
-
import { dirname, basename, extname, resolve, relative } from 'node:path';
|
| 8 |
-
|
| 9 |
-
export class ImageTransformer {
|
| 10 |
-
constructor() {
|
| 11 |
-
this.stats = {
|
| 12 |
-
filesProcessed: 0,
|
| 13 |
-
imagesTransformed: 0,
|
| 14 |
-
importsAdded: 0
|
| 15 |
-
};
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
async transformImagesInDirectory(contentDir) {
|
| 19 |
-
const chaptersDir = resolve(contentDir, 'chapters');
|
| 20 |
-
|
| 21 |
-
try {
|
| 22 |
-
const files = await fs.readdir(chaptersDir);
|
| 23 |
-
const mdxFiles = files.filter(file => file.endsWith('.mdx'));
|
| 24 |
-
|
| 25 |
-
for (const file of mdxFiles) {
|
| 26 |
-
const filePath = resolve(chaptersDir, file);
|
| 27 |
-
await this.transformImagesInFile(filePath, contentDir);
|
| 28 |
-
this.stats.filesProcessed++;
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
console.log(`📸 Image transformation completed:`);
|
| 32 |
-
console.log(` 📄 Files processed: ${this.stats.filesProcessed}`);
|
| 33 |
-
console.log(` 🖼️ Images transformed: ${this.stats.imagesTransformed}`);
|
| 34 |
-
console.log(` 📦 Imports added: ${this.stats.importsAdded}`);
|
| 35 |
-
|
| 36 |
-
} catch (error) {
|
| 37 |
-
console.error('Error transforming images:', error.message);
|
| 38 |
-
}
|
| 39 |
-
}
|
| 40 |
-
|
| 41 |
-
async transformImagesInFile(filePath, contentDir) {
|
| 42 |
-
try {
|
| 43 |
-
let content = await fs.readFile(filePath, 'utf-8');
|
| 44 |
-
|
| 45 |
-
const imageInfo = this.extractImageInfo(content);
|
| 46 |
-
if (imageInfo.length === 0) {
|
| 47 |
-
return; // No images to transform
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
const imports = this.generateImports(imageInfo, filePath, contentDir);
|
| 51 |
-
const transformedContent = this.transformImageReferences(content, imageInfo);
|
| 52 |
-
|
| 53 |
-
// Add imports at the top of the file
|
| 54 |
-
const finalContent = this.addImportsToFile(transformedContent, imports);
|
| 55 |
-
|
| 56 |
-
await fs.writeFile(filePath, finalContent);
|
| 57 |
-
|
| 58 |
-
this.stats.imagesTransformed += imageInfo.length;
|
| 59 |
-
this.stats.importsAdded += imports.length;
|
| 60 |
-
|
| 61 |
-
} catch (error) {
|
| 62 |
-
console.error(`Error processing ${filePath}:`, error.message);
|
| 63 |
-
}
|
| 64 |
-
}
|
| 65 |
-
|
| 66 |
-
extractImageInfo(content) {
|
| 67 |
-
// More robust regex that handles complex alt text with brackets and parentheses
|
| 68 |
-
const imageRegex = /!\[([^\]]*(?:\[[^\]]*\][^\]]*)*)\]\(([^)]+)\)(?:\s*(#[^\s]+))?/g;
|
| 69 |
-
const images = [];
|
| 70 |
-
let match;
|
| 71 |
-
|
| 72 |
-
while ((match = imageRegex.exec(content)) !== null) {
|
| 73 |
-
const [fullMatch, alt, src, id] = match;
|
| 74 |
-
|
| 75 |
-
// Only process relative image paths (not external URLs)
|
| 76 |
-
if (!src.startsWith('http') && !src.startsWith('//')) {
|
| 77 |
-
images.push({
|
| 78 |
-
fullMatch,
|
| 79 |
-
alt: alt || 'Figure',
|
| 80 |
-
src,
|
| 81 |
-
id: id ? id.substring(1) : null, // Remove # from id
|
| 82 |
-
variableName: this.generateVariableName(src)
|
| 83 |
-
});
|
| 84 |
-
}
|
| 85 |
-
}
|
| 86 |
-
|
| 87 |
-
return images;
|
| 88 |
-
}
|
| 89 |
-
|
| 90 |
-
generateVariableName(imagePath) {
|
| 91 |
-
// Convert path to valid variable name
|
| 92 |
-
// assets/image/ch4/ch4-bc-trajectories.png → ch4BcTrajectories
|
| 93 |
-
const filename = basename(imagePath, extname(imagePath));
|
| 94 |
-
|
| 95 |
-
return filename
|
| 96 |
-
.replace(/[-_]/g, ' ')
|
| 97 |
-
.replace(/\b\w/g, l => l.toUpperCase())
|
| 98 |
-
.replace(/\s/g, '')
|
| 99 |
-
.replace(/^\d+/, 'Fig$&'); // Prefix with Fig if starts with number
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
generateImports(imageInfo, filePath, contentDir) {
|
| 103 |
-
const imports = [];
|
| 104 |
-
|
| 105 |
-
// Add ResponsiveImage import
|
| 106 |
-
imports.push("import ResponsiveImage from '../../components/ResponsiveImage.astro'");
|
| 107 |
-
|
| 108 |
-
// Add image imports
|
| 109 |
-
for (const image of imageInfo) {
|
| 110 |
-
const relativePath = this.getRelativeImagePath(image.src, filePath, contentDir);
|
| 111 |
-
imports.push(`import ${image.variableName} from '${relativePath}'`);
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
return imports;
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
getRelativeImagePath(imageSrc, filePath, contentDir) {
|
| 118 |
-
// Convert absolute image path to relative from chapter file
|
| 119 |
-
// From: chapters/04_imitation_learning.mdx
|
| 120 |
-
// To: ../assets/image/ch4/ch4-bc-trajectories.png
|
| 121 |
-
|
| 122 |
-
const chapterDir = dirname(filePath);
|
| 123 |
-
const imageAbsolutePath = resolve(contentDir, imageSrc);
|
| 124 |
-
const relativePath = relative(chapterDir, imageAbsolutePath);
|
| 125 |
-
|
| 126 |
-
return relativePath.startsWith('.') ? relativePath : `./${relativePath}`;
|
| 127 |
-
}
|
| 128 |
-
|
| 129 |
-
transformImageReferences(content, imageInfo) {
|
| 130 |
-
let transformed = content;
|
| 131 |
-
|
| 132 |
-
for (const image of imageInfo) {
|
| 133 |
-
const componentTag = this.generateResponsiveImageTag(image);
|
| 134 |
-
transformed = transformed.replace(image.fullMatch, componentTag);
|
| 135 |
-
}
|
| 136 |
-
|
| 137 |
-
return transformed;
|
| 138 |
-
}
|
| 139 |
-
|
| 140 |
-
generateResponsiveImageTag(image) {
|
| 141 |
-
const props = [
|
| 142 |
-
`src={${image.variableName}}`,
|
| 143 |
-
`alt="${image.alt}"`
|
| 144 |
-
];
|
| 145 |
-
|
| 146 |
-
if (image.id) {
|
| 147 |
-
props.push(`id="${image.id}"`);
|
| 148 |
-
}
|
| 149 |
-
|
| 150 |
-
return `<ResponsiveImage ${props.join(' ')} />`;
|
| 151 |
-
}
|
| 152 |
-
|
| 153 |
-
addImportsToFile(content, imports) {
|
| 154 |
-
if (imports.length === 0) {
|
| 155 |
-
return content;
|
| 156 |
-
}
|
| 157 |
-
|
| 158 |
-
// Check if there are already imports at the top
|
| 159 |
-
const lines = content.split('\n');
|
| 160 |
-
let insertIndex = 0;
|
| 161 |
-
|
| 162 |
-
// Skip existing imports
|
| 163 |
-
while (insertIndex < lines.length &&
|
| 164 |
-
(lines[insertIndex].startsWith('import ') ||
|
| 165 |
-
lines[insertIndex].trim() === '')) {
|
| 166 |
-
insertIndex++;
|
| 167 |
-
}
|
| 168 |
-
|
| 169 |
-
// Insert imports
|
| 170 |
-
const importBlock = imports.join('\n') + '\n\n';
|
| 171 |
-
lines.splice(insertIndex, 0, importBlock);
|
| 172 |
-
|
| 173 |
-
return lines.join('\n');
|
| 174 |
-
}
|
| 175 |
-
|
| 176 |
-
getStats() {
|
| 177 |
-
return this.stats;
|
| 178 |
-
}
|
| 179 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/index.mjs
DELETED
|
@@ -1,75 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env node
|
| 2 |
-
/**
|
| 3 |
-
* Point d'entrée principal pour la conversion LaTeX vers Markdown
|
| 4 |
-
*
|
| 5 |
-
* Usage: node scripts/latex-converter/index.mjs [--input=path] [--output=path] [--clean]
|
| 6 |
-
*/
|
| 7 |
-
|
| 8 |
-
import { resolve } from 'node:path';
|
| 9 |
-
import { spawn } from 'node:child_process';
|
| 10 |
-
import process from 'node:process';
|
| 11 |
-
|
| 12 |
-
import { LaTeXConverter } from './converter.mjs';
|
| 13 |
-
import { ImageTransformer } from './image-transformer.mjs';
|
| 14 |
-
import { DEFAULT_PATHS } from './config.mjs';
|
| 15 |
-
|
| 16 |
-
function parseArgs(argv) {
|
| 17 |
-
const out = {};
|
| 18 |
-
for (const arg of argv.slice(2)) {
|
| 19 |
-
if (!arg.startsWith('--')) continue;
|
| 20 |
-
const [k, v] = arg.replace(/^--/, '').split('=');
|
| 21 |
-
out[k] = v === undefined ? true : v;
|
| 22 |
-
}
|
| 23 |
-
return out;
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
async function checkPandoc() {
|
| 27 |
-
try {
|
| 28 |
-
const child = spawn('pandoc', ['--version'], { stdio: 'pipe' });
|
| 29 |
-
return new Promise((resolve) => {
|
| 30 |
-
child.on('exit', (code) => resolve(code === 0));
|
| 31 |
-
child.on('error', () => resolve(false));
|
| 32 |
-
});
|
| 33 |
-
} catch {
|
| 34 |
-
return false;
|
| 35 |
-
}
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
async function main() {
|
| 39 |
-
const cwd = process.cwd();
|
| 40 |
-
const args = parseArgs(process.argv);
|
| 41 |
-
|
| 42 |
-
// Vérifier Pandoc
|
| 43 |
-
const hasPandoc = await checkPandoc();
|
| 44 |
-
if (!hasPandoc) {
|
| 45 |
-
console.error('❌ Pandoc n\'est pas installé.');
|
| 46 |
-
console.error(' macOS: brew install pandoc');
|
| 47 |
-
console.error(' Ubuntu: apt-get install pandoc');
|
| 48 |
-
process.exit(1);
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
// Chemins
|
| 52 |
-
const inputDir = resolve(cwd, args.input || DEFAULT_PATHS.input);
|
| 53 |
-
const outputDir = resolve(cwd, args.output || DEFAULT_PATHS.output);
|
| 54 |
-
|
| 55 |
-
try {
|
| 56 |
-
const converter = new LaTeXConverter();
|
| 57 |
-
await converter.convert(inputDir, outputDir, {
|
| 58 |
-
clean: args.clean || false
|
| 59 |
-
});
|
| 60 |
-
|
| 61 |
-
// Transform images to ResponsiveImage components
|
| 62 |
-
console.log('\n📸 Transforming images to ResponsiveImage components...');
|
| 63 |
-
const imageTransformer = new ImageTransformer();
|
| 64 |
-
await imageTransformer.transformImagesInDirectory(outputDir);
|
| 65 |
-
|
| 66 |
-
} catch (error) {
|
| 67 |
-
console.error('❌ Conversion échouée:', error.message);
|
| 68 |
-
process.exit(1);
|
| 69 |
-
}
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
main().catch(err => {
|
| 73 |
-
console.error('❌ Erreur fatale:', err);
|
| 74 |
-
process.exit(1);
|
| 75 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/preprocessor.mjs
DELETED
|
@@ -1,115 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Préprocesseur LaTeX - Nettoie et simplifie le contenu LaTeX
|
| 3 |
-
*/
|
| 4 |
-
|
| 5 |
-
import { COMMAND_MAPPINGS, ENVIRONMENT_MAPPINGS } from './config.mjs';
|
| 6 |
-
|
| 7 |
-
export class LaTeXPreprocessor {
|
| 8 |
-
constructor() {
|
| 9 |
-
this.stats = {
|
| 10 |
-
commandsReplaced: 0,
|
| 11 |
-
environmentsProcessed: 0,
|
| 12 |
-
figuresFixed: 0
|
| 13 |
-
};
|
| 14 |
-
}
|
| 15 |
-
|
| 16 |
-
preprocessContent(content) {
|
| 17 |
-
let processed = content;
|
| 18 |
-
|
| 19 |
-
// Remove comments
|
| 20 |
-
processed = processed.replace(/%.*$/gm, '');
|
| 21 |
-
|
| 22 |
-
// Apply command mappings
|
| 23 |
-
processed = this.applyCommandMappings(processed);
|
| 24 |
-
|
| 25 |
-
// Process custom environments
|
| 26 |
-
processed = this.processCustomEnvironments(processed);
|
| 27 |
-
|
| 28 |
-
// Fix figures
|
| 29 |
-
processed = this.fixFigures(processed);
|
| 30 |
-
|
| 31 |
-
// General cleanup
|
| 32 |
-
processed = processed.replace(/\n{3,}/g, '\n\n');
|
| 33 |
-
processed = processed.trim();
|
| 34 |
-
|
| 35 |
-
return processed;
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
applyCommandMappings(content) {
|
| 39 |
-
let processed = content;
|
| 40 |
-
|
| 41 |
-
for (const [command, replacement] of Object.entries(COMMAND_MAPPINGS)) {
|
| 42 |
-
const regex = new RegExp(`\\\\${command.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}(?![a-zA-Z])`, 'g');
|
| 43 |
-
const matches = processed.match(regex);
|
| 44 |
-
if (matches) {
|
| 45 |
-
this.stats.commandsReplaced += matches.length;
|
| 46 |
-
processed = processed.replace(regex, replacement);
|
| 47 |
-
}
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
return processed;
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
processCustomEnvironments(content) {
|
| 54 |
-
let processed = content;
|
| 55 |
-
|
| 56 |
-
// Convert tldr environment
|
| 57 |
-
processed = processed.replace(
|
| 58 |
-
/\\begin\{tldr\}(.*?)\\end\{tldr\}/gs,
|
| 59 |
-
(match, content) => {
|
| 60 |
-
this.stats.environmentsProcessed++;
|
| 61 |
-
return `> **TL;DR**\n> ${content.trim()}\n`;
|
| 62 |
-
}
|
| 63 |
-
);
|
| 64 |
-
|
| 65 |
-
// Convert callout environment
|
| 66 |
-
processed = processed.replace(
|
| 67 |
-
/\\begin\{callout\}\{([^}]*)\}(.*?)\\end\{callout\}/gs,
|
| 68 |
-
(match, title, content) => {
|
| 69 |
-
this.stats.environmentsProcessed++;
|
| 70 |
-
return `> **${title}**\n> ${content.trim()}\n`;
|
| 71 |
-
}
|
| 72 |
-
);
|
| 73 |
-
|
| 74 |
-
// Convert finding environment
|
| 75 |
-
processed = processed.replace(
|
| 76 |
-
/\\finding\{([^}]*)\}\{([^}]*)\}/g,
|
| 77 |
-
(match, number, content) => {
|
| 78 |
-
this.stats.environmentsProcessed++;
|
| 79 |
-
return `> **🔍 Finding ${number}**: ${content}\n`;
|
| 80 |
-
}
|
| 81 |
-
);
|
| 82 |
-
|
| 83 |
-
return processed;
|
| 84 |
-
}
|
| 85 |
-
|
| 86 |
-
fixFigures(content) {
|
| 87 |
-
let fixed = content;
|
| 88 |
-
|
| 89 |
-
// Fix complex figure environments
|
| 90 |
-
const figurePattern = /\\begin\{figure\}[\s\S]*?\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}[\s\S]*?\\caption\{([^}]+)\}[\s\S]*?(?:\\label\{([^}]+)\})?[\s\S]*?\\end\{figure\}/g;
|
| 91 |
-
|
| 92 |
-
fixed = fixed.replace(figurePattern, (match, imagePath, caption, label) => {
|
| 93 |
-
this.stats.figuresFixed++;
|
| 94 |
-
const cleanPath = imagePath.replace(/^figures\//, 'assets/image/');
|
| 95 |
-
const labelAttr = label ? ` {#fig-${label}}` : '';
|
| 96 |
-
return `\n${labelAttr}\n\n*${caption}*\n`;
|
| 97 |
-
});
|
| 98 |
-
|
| 99 |
-
// Fix simple includegraphics
|
| 100 |
-
fixed = fixed.replace(
|
| 101 |
-
/\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/g,
|
| 102 |
-
(match, imagePath) => {
|
| 103 |
-
this.stats.figuresFixed++;
|
| 104 |
-
const cleanPath = imagePath.replace(/^figures\//, 'assets/image/');
|
| 105 |
-
return ``;
|
| 106 |
-
}
|
| 107 |
-
);
|
| 108 |
-
|
| 109 |
-
return fixed;
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
getStats() {
|
| 113 |
-
return this.stats;
|
| 114 |
-
}
|
| 115 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/old-latex-converter/robust-preprocessor.mjs
DELETED
|
@@ -1,399 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Préprocesseur LaTeX Ultra-Robuste
|
| 3 |
-
* Gère les cas complexes qui font planter Pandoc
|
| 4 |
-
*/
|
| 5 |
-
|
| 6 |
-
export class RobustLaTeXPreprocessor {
|
| 7 |
-
constructor() {
|
| 8 |
-
this.stats = {
|
| 9 |
-
figuresProcessed: 0,
|
| 10 |
-
citationsFixed: 0,
|
| 11 |
-
mathExpressionsFixed: 0,
|
| 12 |
-
environmentsProcessed: 0,
|
| 13 |
-
commandsReplaced: 0
|
| 14 |
-
};
|
| 15 |
-
this.debugMode = false;
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
preprocessContent(content, filename = 'unknown') {
|
| 19 |
-
if (this.debugMode) {
|
| 20 |
-
console.log(` 🔍 [DEBUG] Processing ${filename}...`);
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
let processed = content;
|
| 24 |
-
|
| 25 |
-
// Phase 1: Structure cleanup (most important first)
|
| 26 |
-
processed = this.phase1_StructureCleanup(processed);
|
| 27 |
-
|
| 28 |
-
// Phase 2: Content transformation
|
| 29 |
-
processed = this.phase2_ContentTransformation(processed);
|
| 30 |
-
|
| 31 |
-
// Phase 3: Final polish
|
| 32 |
-
processed = this.phase3_FinalPolish(processed);
|
| 33 |
-
|
| 34 |
-
return processed;
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
phase1_StructureCleanup(content) {
|
| 38 |
-
let cleaned = content;
|
| 39 |
-
|
| 40 |
-
// Remove comments (but preserve structure)
|
| 41 |
-
cleaned = cleaned.replace(/%.*$/gm, '');
|
| 42 |
-
|
| 43 |
-
// Fix broken line breaks that split words
|
| 44 |
-
cleaned = this.fixBrokenLineBreaks(cleaned);
|
| 45 |
-
|
| 46 |
-
// Fix broken equation environments
|
| 47 |
-
cleaned = this.fixBrokenEquations(cleaned);
|
| 48 |
-
|
| 49 |
-
// Fix broken figure environments BEFORE processing
|
| 50 |
-
cleaned = this.fixComplexFigures(cleaned);
|
| 51 |
-
|
| 52 |
-
// Handle problematic environments early
|
| 53 |
-
cleaned = this.handleProblematicEnvironments(cleaned);
|
| 54 |
-
|
| 55 |
-
return cleaned;
|
| 56 |
-
}
|
| 57 |
-
|
| 58 |
-
fixBrokenLineBreaks(content) {
|
| 59 |
-
let fixed = content;
|
| 60 |
-
|
| 61 |
-
// Fix hyphenated words broken across lines
|
| 62 |
-
// "length-\nT\nT" → "length-T"
|
| 63 |
-
fixed = fixed.replace(/([a-zA-Z])-\s*\n\s*([A-Z])\s*\n\s*\2/g, '$1-$2');
|
| 64 |
-
|
| 65 |
-
// Fix broken compound words
|
| 66 |
-
// "some-\nword" → "some-word"
|
| 67 |
-
fixed = fixed.replace(/([a-zA-Z])-\s*\n\s*([a-z])/g, '$1-$2');
|
| 68 |
-
|
| 69 |
-
// Fix sentences that got broken inappropriately
|
| 70 |
-
// "word.Sentence" → "word. Sentence"
|
| 71 |
-
fixed = fixed.replace(/([a-z])\.([A-Z])/g, '$1. $2');
|
| 72 |
-
|
| 73 |
-
return fixed;
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
fixBrokenEquations(content) {
|
| 77 |
-
let fixed = content;
|
| 78 |
-
|
| 79 |
-
// Fix mixed equation environments
|
| 80 |
-
// "\end{equation}$" → "$$"
|
| 81 |
-
fixed = fixed.replace(/\\end\{equation\}\$/g, '$$');
|
| 82 |
-
fixed = fixed.replace(/\$\\begin\{equation\}/g, '$$');
|
| 83 |
-
|
| 84 |
-
// Fix broken align environments
|
| 85 |
-
fixed = fixed.replace(/([^$])\s*&=\s*/g, '$1 &= ');
|
| 86 |
-
|
| 87 |
-
// Fix multiline math that lost structure
|
| 88 |
-
fixed = fixed.replace(/\$([^$]*?)&=([^$]*?)\$/g, '$$\\begin{align}\n$1 &= $2\n\\end{align}$$');
|
| 89 |
-
|
| 90 |
-
return fixed;
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
fixComplexFigures(content) {
|
| 94 |
-
let fixed = content;
|
| 95 |
-
|
| 96 |
-
// Strategy: Convert complex figures to simple markdown BEFORE Pandoc sees them
|
| 97 |
-
const figurePattern = /\\begin\{figure\*?\}([\s\S]*?)\\end\{figure\*?\}/g;
|
| 98 |
-
const wrapfigurePattern = /\\begin\{wrapfigure\}(?:\[[^\]]*\])?\{[^}]*\}\{[^}]*\}([\s\S]*?)\\end\{wrapfigure\}/g;
|
| 99 |
-
|
| 100 |
-
fixed = fixed.replace(figurePattern, (match, figureContent) => {
|
| 101 |
-
this.stats.figuresProcessed++;
|
| 102 |
-
|
| 103 |
-
// Extract components safely
|
| 104 |
-
const imageMatch = figureContent.match(/\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/);
|
| 105 |
-
const captionMatch = figureContent.match(/\\caption\{([\s\S]*?)\}(?=\s*(?:\\label|\\end|\}|$))/);
|
| 106 |
-
const labelMatch = figureContent.match(/\\label\{([^}]+)\}/);
|
| 107 |
-
|
| 108 |
-
if (!imageMatch) {
|
| 109 |
-
return match; // Keep original if we can't parse it
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
const imagePath = imageMatch[1].replace(/^figures\//, 'assets/image/');
|
| 113 |
-
let caption = captionMatch ? captionMatch[1].trim() : 'Figure';
|
| 114 |
-
const label = labelMatch ? labelMatch[1] : '';
|
| 115 |
-
|
| 116 |
-
// Clean caption thoroughly
|
| 117 |
-
caption = this.cleanCaption(caption);
|
| 118 |
-
|
| 119 |
-
// Generate clean markdown
|
| 120 |
-
const labelAttr = label ? ` {#fig-${label}}` : '';
|
| 121 |
-
|
| 122 |
-
return `\n\n${labelAttr}\n\n*${caption}*\n\n`;
|
| 123 |
-
});
|
| 124 |
-
|
| 125 |
-
// Also handle wrapfigure environments
|
| 126 |
-
fixed = fixed.replace(wrapfigurePattern, (match, figureContent) => {
|
| 127 |
-
this.stats.figuresProcessed++;
|
| 128 |
-
|
| 129 |
-
// Extract components safely
|
| 130 |
-
const imageMatch = figureContent.match(/\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}/);
|
| 131 |
-
const captionMatch = figureContent.match(/\\caption\{([\s\S]*?)\}(?=\s*(?:\\label|\\end|\}|$))/);
|
| 132 |
-
const labelMatch = figureContent.match(/\\label\{([^}]+)\}/);
|
| 133 |
-
|
| 134 |
-
if (!imageMatch) {
|
| 135 |
-
return match; // Keep original if we can't parse it
|
| 136 |
-
}
|
| 137 |
-
|
| 138 |
-
const imagePath = imageMatch[1].replace(/^figures\//, 'assets/image/');
|
| 139 |
-
let caption = captionMatch ? captionMatch[1].trim() : 'Figure';
|
| 140 |
-
const label = labelMatch ? labelMatch[1] : '';
|
| 141 |
-
|
| 142 |
-
// Clean caption thoroughly
|
| 143 |
-
caption = this.cleanCaption(caption);
|
| 144 |
-
|
| 145 |
-
// Generate clean markdown (simpler for wrapfigure)
|
| 146 |
-
const labelAttr = label ? ` {#fig-${label}}` : '';
|
| 147 |
-
|
| 148 |
-
return `\n\n${labelAttr}\n\n`;
|
| 149 |
-
});
|
| 150 |
-
|
| 151 |
-
return fixed;
|
| 152 |
-
}
|
| 153 |
-
|
| 154 |
-
cleanCaption(caption) {
|
| 155 |
-
let cleaned = caption;
|
| 156 |
-
|
| 157 |
-
// Handle citations in captions properly
|
| 158 |
-
cleaned = cleaned.replace(/~\\cite[tp]?\{([^}]+)\}/g, ' [@$1]');
|
| 159 |
-
cleaned = cleaned.replace(/\\cite[tp]?\{([^}]+)\}/g, '[@$1]');
|
| 160 |
-
|
| 161 |
-
// Remove problematic LaTeX commands
|
| 162 |
-
cleaned = cleaned.replace(/\\textit\{([^}]+)\}/g, '*$1*');
|
| 163 |
-
cleaned = cleaned.replace(/\\textbf\{([^}]+)\}/g, '**$1**');
|
| 164 |
-
cleaned = cleaned.replace(/\\emph\{([^}]+)\}/g, '*$1*');
|
| 165 |
-
|
| 166 |
-
// Fix \textsc with complex content
|
| 167 |
-
cleaned = cleaned.replace(/\\textsc\{([^}]*\([^)]*\)[^}]*)\}/g, '**$1**');
|
| 168 |
-
|
| 169 |
-
// Handle nested braces safely
|
| 170 |
-
let depth = 0;
|
| 171 |
-
let result = '';
|
| 172 |
-
for (let i = 0; i < cleaned.length; i++) {
|
| 173 |
-
const char = cleaned[i];
|
| 174 |
-
if (char === '{') {
|
| 175 |
-
depth++;
|
| 176 |
-
if (depth === 1) continue; // Skip opening brace
|
| 177 |
-
} else if (char === '}') {
|
| 178 |
-
depth--;
|
| 179 |
-
if (depth === 0) continue; // Skip closing brace
|
| 180 |
-
} else {
|
| 181 |
-
result += char;
|
| 182 |
-
}
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
return result.trim();
|
| 186 |
-
}
|
| 187 |
-
|
| 188 |
-
handleProblematicEnvironments(content) {
|
| 189 |
-
let fixed = content;
|
| 190 |
-
|
| 191 |
-
// Handle algorithm environments
|
| 192 |
-
fixed = fixed.replace(/\\begin\{algorithm\}([\s\S]*?)\\end\{algorithm\}/g, (match, algContent) => {
|
| 193 |
-
return '\n```\nAlgorithm:\n' + algContent.replace(/\\[a-zA-Z]+/g, '') + '\n```\n';
|
| 194 |
-
});
|
| 195 |
-
|
| 196 |
-
// Handle complex math environments
|
| 197 |
-
fixed = fixed.replace(/\\begin\{align\*?\}([\s\S]*?)\\end\{align\*?\}/g, (match, mathContent) => {
|
| 198 |
-
const cleaned = mathContent.replace(/\\&/g, '').replace(/\\\\/g, '\n');
|
| 199 |
-
return '\n$$\n' + cleaned + '\n$$\n';
|
| 200 |
-
});
|
| 201 |
-
|
| 202 |
-
return fixed;
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
-
phase2_ContentTransformation(content) {
|
| 206 |
-
let transformed = content;
|
| 207 |
-
|
| 208 |
-
// Apply command mappings (safer order)
|
| 209 |
-
transformed = this.applyCommandMappings(transformed);
|
| 210 |
-
|
| 211 |
-
// Process custom environments
|
| 212 |
-
transformed = this.processCustomEnvironments(transformed);
|
| 213 |
-
|
| 214 |
-
// Handle remaining citations
|
| 215 |
-
transformed = this.processCitations(transformed);
|
| 216 |
-
|
| 217 |
-
return transformed;
|
| 218 |
-
}
|
| 219 |
-
|
| 220 |
-
applyCommandMappings(content) {
|
| 221 |
-
let processed = content;
|
| 222 |
-
|
| 223 |
-
// Safe command replacements (most common first)
|
| 224 |
-
const safeCommands = {
|
| 225 |
-
'eg': 'e.g.,',
|
| 226 |
-
'ie': 'i.e.,',
|
| 227 |
-
'versus': 'vs.',
|
| 228 |
-
'wrt': 'w.r.t.',
|
| 229 |
-
'etc': 'etc.',
|
| 230 |
-
'lerobot': '**LeRobot**',
|
| 231 |
-
'lerobotdataset': '`LeRobotDataset`',
|
| 232 |
-
'huggingface': '🤗 **Hugging Face**',
|
| 233 |
-
'qfunction': 'Q-function',
|
| 234 |
-
'qopt': 'Q^*',
|
| 235 |
-
// Robotics-specific commands from handles.tex
|
| 236 |
-
'actionchunk': '\\mathbf{A}',
|
| 237 |
-
'actionexpert': '\\mathbf{v}_\\theta',
|
| 238 |
-
'pizero': '\\pi_0',
|
| 239 |
-
'statespace': '\\mathcal{S}',
|
| 240 |
-
'actionspace': '\\mathcal{A}',
|
| 241 |
-
'obsspace': '\\mathcal{O}',
|
| 242 |
-
'dynamics': '\\mathcal{D}',
|
| 243 |
-
'stateplusone': 's_{t+1}',
|
| 244 |
-
'state': 's_t',
|
| 245 |
-
'action': 'a_t',
|
| 246 |
-
'transition': '(s_t, a_t, s_{t+1})',
|
| 247 |
-
'sars': '(s_t, a_t, r_t, s_{t+1})',
|
| 248 |
-
'transitiongiven': '(s_{t+1} | s_t, a_t)',
|
| 249 |
-
'transitionprob': '\\mathbb{P}(s_{t+1} | s_t, a_t)',
|
| 250 |
-
'trajectory': '(s_0, a_0, r_0, s_1, a_1, r_1, \\dots, s_{T-1}, a_{T-1}, r_{T-1}, s_T)',
|
| 251 |
-
'Jpi': 'J(\\pi_\\theta)',
|
| 252 |
-
'supp': '\\text{supp}',
|
| 253 |
-
'DKL': '\\text{D}_{\\text{KL}}',
|
| 254 |
-
'FK': '\\text{FK}',
|
| 255 |
-
'targetvel': '\\dot{p}^*',
|
| 256 |
-
'targetpos': 'p^*'
|
| 257 |
-
};
|
| 258 |
-
|
| 259 |
-
for (const [command, replacement] of Object.entries(safeCommands)) {
|
| 260 |
-
const regex = new RegExp(`\\\\${command}(?![a-zA-Z])`, 'g');
|
| 261 |
-
const matches = processed.match(regex);
|
| 262 |
-
if (matches) {
|
| 263 |
-
this.stats.commandsReplaced += matches.length;
|
| 264 |
-
processed = processed.replace(regex, replacement);
|
| 265 |
-
}
|
| 266 |
-
}
|
| 267 |
-
|
| 268 |
-
// Math commands (more careful)
|
| 269 |
-
const mathCommands = ['X', 'Z', 'G', 'D', 'F', 'R', 'S', 'T', 'U', 'Y'];
|
| 270 |
-
mathCommands.forEach(letter => {
|
| 271 |
-
const regex = new RegExp(`\\\\${letter}(?![a-zA-Z])`, 'g');
|
| 272 |
-
processed = processed.replace(regex, `\\mathcal{${letter}}`);
|
| 273 |
-
});
|
| 274 |
-
|
| 275 |
-
// Handle commands with subscripts (like \actionchunk_t)
|
| 276 |
-
processed = processed.replace(/\\actionchunk_t/g, '\\mathbf{A}_t');
|
| 277 |
-
processed = processed.replace(/\\actionexpert_([a-zA-Z0-9]+)/g, '\\mathbf{v}_{\\theta_$1}');
|
| 278 |
-
processed = processed.replace(/\\state_([a-zA-Z0-9]+)/g, 's_{$1}');
|
| 279 |
-
processed = processed.replace(/\\action_([a-zA-Z0-9]+)/g, 'a_{$1}');
|
| 280 |
-
|
| 281 |
-
// Fix problematic \textsc commands with complex content
|
| 282 |
-
processed = processed.replace(/\\textsc\{([^{}]*\([^)]*\)[^{}]*)\}/g, '**$1**');
|
| 283 |
-
processed = processed.replace(/\\textsc\{([^}]+)\}/g, '**$1**');
|
| 284 |
-
|
| 285 |
-
// Fix \url commands to make them MDX-compatible
|
| 286 |
-
processed = processed.replace(/\\textbf\{\\url\{([^}]+)\}\}/g, '**[$1]($1)**');
|
| 287 |
-
processed = processed.replace(/\\url\{([^}]+)\}/g, '[$1]($1)');
|
| 288 |
-
|
| 289 |
-
return processed;
|
| 290 |
-
}
|
| 291 |
-
|
| 292 |
-
processCustomEnvironments(content) {
|
| 293 |
-
let processed = content;
|
| 294 |
-
|
| 295 |
-
// TL;DR environment
|
| 296 |
-
processed = processed.replace(
|
| 297 |
-
/\\begin\{tldr\}([\s\S]*?)\\end\{tldr\}/g,
|
| 298 |
-
(match, content) => {
|
| 299 |
-
this.stats.environmentsProcessed++;
|
| 300 |
-
return `\n> **TL;DR**\n> ${content.trim()}\n\n`;
|
| 301 |
-
}
|
| 302 |
-
);
|
| 303 |
-
|
| 304 |
-
// Callout environment
|
| 305 |
-
processed = processed.replace(
|
| 306 |
-
/\\begin\{callout\}\{([^}]*)\}([\s\S]*?)\\end\{callout\}/g,
|
| 307 |
-
(match, title, content) => {
|
| 308 |
-
this.stats.environmentsProcessed++;
|
| 309 |
-
return `\n> **${title}**\n> ${content.trim()}\n\n`;
|
| 310 |
-
}
|
| 311 |
-
);
|
| 312 |
-
|
| 313 |
-
// Finding command
|
| 314 |
-
processed = processed.replace(
|
| 315 |
-
/\\finding\{([^}]*)\}\{([^}]*)\}/g,
|
| 316 |
-
(match, number, content) => {
|
| 317 |
-
this.stats.environmentsProcessed++;
|
| 318 |
-
return `\n> **🔍 Finding ${number}**: ${content}\n\n`;
|
| 319 |
-
}
|
| 320 |
-
);
|
| 321 |
-
|
| 322 |
-
return processed;
|
| 323 |
-
}
|
| 324 |
-
|
| 325 |
-
processCitations(content) {
|
| 326 |
-
let processed = content;
|
| 327 |
-
|
| 328 |
-
// Handle different citation types
|
| 329 |
-
processed = processed.replace(/\\citep\{([^}]+)\}/g, '[@$1]');
|
| 330 |
-
processed = processed.replace(/\\citet\{([^}]+)\}/g, '@$1');
|
| 331 |
-
processed = processed.replace(/\\cite\{([^}]+)\}/g, '[@$1]');
|
| 332 |
-
|
| 333 |
-
// Handle spaced citations (common issue)
|
| 334 |
-
processed = processed.replace(/~\\cite/g, ' \\cite');
|
| 335 |
-
processed = processed.replace(/~\[@/g, ' [@');
|
| 336 |
-
|
| 337 |
-
// Count citations
|
| 338 |
-
const citations = processed.match(/\[@[^\]]+\]/g) || [];
|
| 339 |
-
this.stats.citationsFixed += citations.length;
|
| 340 |
-
|
| 341 |
-
return processed;
|
| 342 |
-
}
|
| 343 |
-
|
| 344 |
-
phase3_FinalPolish(content) {
|
| 345 |
-
let polished = content;
|
| 346 |
-
|
| 347 |
-
// Fix math expressions
|
| 348 |
-
polished = this.fixMathExpressions(polished);
|
| 349 |
-
|
| 350 |
-
// Clean up whitespace and structure
|
| 351 |
-
polished = this.finalCleanup(polished);
|
| 352 |
-
|
| 353 |
-
return polished;
|
| 354 |
-
}
|
| 355 |
-
|
| 356 |
-
fixMathExpressions(content) {
|
| 357 |
-
let fixed = content;
|
| 358 |
-
|
| 359 |
-
// Fix common problematic patterns
|
| 360 |
-
fixed = fixed.replace(/\$\{([^}]+)\}\$/g, '$$$1$$'); // ${...}$ -> $...$
|
| 361 |
-
fixed = fixed.replace(/\$([^$]*)\\\$([^$]*)\$/g, '$$$1$2$$'); // $...\$...$ -> $...$
|
| 362 |
-
|
| 363 |
-
// Fix pi expressions specifically
|
| 364 |
-
fixed = fixed.replace(/\$\\pi_\$([0-9]+)\$/g, '$\\pi_$1$');
|
| 365 |
-
fixed = fixed.replace(/\$\{\\pi_\}([0-9]+)\$/g, '$\\pi_$1$');
|
| 366 |
-
|
| 367 |
-
// Fix malformed math delimiters
|
| 368 |
-
fixed = fixed.replace(/\$\$\$+/g, '$$');
|
| 369 |
-
|
| 370 |
-
this.stats.mathExpressionsFixed++;
|
| 371 |
-
|
| 372 |
-
return fixed;
|
| 373 |
-
}
|
| 374 |
-
|
| 375 |
-
finalCleanup(content) {
|
| 376 |
-
let cleaned = content;
|
| 377 |
-
|
| 378 |
-
// Normalize whitespace
|
| 379 |
-
cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
|
| 380 |
-
cleaned = cleaned.replace(/[ \t]+$/gm, ''); // Trailing spaces
|
| 381 |
-
|
| 382 |
-
// Fix MDX-incompatible angle bracket URLs
|
| 383 |
-
cleaned = cleaned.replace(/\*\*<(https?:\/\/[^>]+)>\*\*/g, '**[$1]($1)**');
|
| 384 |
-
cleaned = cleaned.replace(/<(https?:\/\/[^>]+)>/g, '[$1]($1)');
|
| 385 |
-
|
| 386 |
-
// Ensure proper spacing around elements
|
| 387 |
-
cleaned = cleaned.replace(/\n\n\n+/g, '\n\n');
|
| 388 |
-
|
| 389 |
-
return cleaned.trim();
|
| 390 |
-
}
|
| 391 |
-
|
| 392 |
-
getStats() {
|
| 393 |
-
return this.stats;
|
| 394 |
-
}
|
| 395 |
-
|
| 396 |
-
setDebugMode(enabled) {
|
| 397 |
-
this.debugMode = enabled;
|
| 398 |
-
}
|
| 399 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/latex-to-markdown/output/main.md
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/scripts/latex-to-markdown/output/main.mdx
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/scripts/latex-to-markdown/reference-preprocessor.mjs
CHANGED
|
@@ -82,6 +82,54 @@ function createCleanMapping(references) {
|
|
| 82 |
return mapping;
|
| 83 |
}
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
/**
|
| 86 |
* Apply mapping to LaTeX content
|
| 87 |
* @param {string} content - Original LaTeX content
|
|
@@ -92,17 +140,19 @@ function applyMapping(content, mapping) {
|
|
| 92 |
let cleanedContent = content;
|
| 93 |
let changesCount = 0;
|
| 94 |
|
| 95 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
for (const [original, clean] of mapping) {
|
| 97 |
if (original !== clean) {
|
| 98 |
-
// Replace \label{original} with \label{clean}
|
| 99 |
-
const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
|
| 100 |
-
const labelMatches = cleanedContent.match(labelRegex);
|
| 101 |
-
if (labelMatches) {
|
| 102 |
-
cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
|
| 103 |
-
changesCount += labelMatches.length;
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
// Replace \ref{original} with \ref{clean}
|
| 107 |
const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
|
| 108 |
const refMatches = cleanedContent.match(refRegex);
|
|
@@ -110,10 +160,24 @@ function applyMapping(content, mapping) {
|
|
| 110 |
cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
|
| 111 |
changesCount += refMatches.length;
|
| 112 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
}
|
| 115 |
|
| 116 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
}
|
| 118 |
|
| 119 |
/**
|
|
@@ -145,23 +209,27 @@ export function preprocessLatexReferences(latexContent) {
|
|
| 145 |
const result = applyMapping(latexContent, mapping);
|
| 146 |
|
| 147 |
if (result.changesCount > 0) {
|
| 148 |
-
console.log(` ✅
|
| 149 |
|
| 150 |
// Show some examples of changes
|
| 151 |
let exampleCount = 0;
|
| 152 |
for (const [original, clean] of mapping) {
|
| 153 |
if (original !== clean && exampleCount < 3) {
|
| 154 |
-
console.log(` ${original} → ${clean}`);
|
| 155 |
exampleCount++;
|
| 156 |
}
|
| 157 |
}
|
| 158 |
if (mapping.size > 3) {
|
| 159 |
-
console.log(` ... and ${mapping.size - 3} more`);
|
| 160 |
}
|
| 161 |
} else {
|
| 162 |
console.log(' ℹ️ No reference cleanup needed');
|
| 163 |
}
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
return {
|
| 166 |
content: result.content,
|
| 167 |
changesCount: result.changesCount,
|
|
|
|
| 82 |
return mapping;
|
| 83 |
}
|
| 84 |
|
| 85 |
+
/**
|
| 86 |
+
* Convert labels to HTML anchor spans for better MDX compatibility
|
| 87 |
+
* @param {string} content - LaTeX content
|
| 88 |
+
* @param {Map} mapping - Identifier mapping (original -> clean)
|
| 89 |
+
* @returns {Object} - Result with content and count of conversions
|
| 90 |
+
*/
|
| 91 |
+
function convertLabelsToAnchors(content, mapping) {
|
| 92 |
+
let processedContent = content;
|
| 93 |
+
let anchorsCreated = 0;
|
| 94 |
+
|
| 95 |
+
// Replace \label{...} with HTML anchor spans, but SKIP labels inside math environments
|
| 96 |
+
for (const [original, clean] of mapping) {
|
| 97 |
+
// Skip equation labels (they will be handled by the Lua filter)
|
| 98 |
+
if (original.startsWith('eq:')) {
|
| 99 |
+
continue;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
|
| 103 |
+
const labelMatches = processedContent.match(labelRegex);
|
| 104 |
+
|
| 105 |
+
if (labelMatches) {
|
| 106 |
+
// Replace \label{original} with HTML span anchor (invisible but accessible)
|
| 107 |
+
processedContent = processedContent.replace(labelRegex, `\n\n<span id="${clean}" style="position: absolute;"></span>\n\n`);
|
| 108 |
+
anchorsCreated += labelMatches.length;
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
return { content: processedContent, anchorsCreated };
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
/**
|
| 116 |
+
* Convert \highlight{...} commands to HTML spans with CSS class
|
| 117 |
+
* @param {string} content - LaTeX content
|
| 118 |
+
* @returns {Object} - Result with content and count of conversions
|
| 119 |
+
*/
|
| 120 |
+
function convertHighlightCommands(content) {
|
| 121 |
+
let processedContent = content;
|
| 122 |
+
let highlightsConverted = 0;
|
| 123 |
+
|
| 124 |
+
// Replace \highlight{...} with <span class="highlight">...</span>
|
| 125 |
+
processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
|
| 126 |
+
highlightsConverted++;
|
| 127 |
+
return `<span class="highlight">${text}</span>`;
|
| 128 |
+
});
|
| 129 |
+
|
| 130 |
+
return { content: processedContent, highlightsConverted };
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
/**
|
| 134 |
* Apply mapping to LaTeX content
|
| 135 |
* @param {string} content - Original LaTeX content
|
|
|
|
| 140 |
let cleanedContent = content;
|
| 141 |
let changesCount = 0;
|
| 142 |
|
| 143 |
+
// First, convert labels to anchor spans
|
| 144 |
+
const anchorResult = convertLabelsToAnchors(cleanedContent, mapping);
|
| 145 |
+
cleanedContent = anchorResult.content;
|
| 146 |
+
const anchorsCreated = anchorResult.anchorsCreated;
|
| 147 |
+
|
| 148 |
+
// Convert \highlight{} commands to spans
|
| 149 |
+
const highlightResult = convertHighlightCommands(cleanedContent);
|
| 150 |
+
cleanedContent = highlightResult.content;
|
| 151 |
+
const highlightsConverted = highlightResult.highlightsConverted;
|
| 152 |
+
|
| 153 |
+
// Then apply mapping to remaining references and equation labels
|
| 154 |
for (const [original, clean] of mapping) {
|
| 155 |
if (original !== clean) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
// Replace \ref{original} with \ref{clean}
|
| 157 |
const refRegex = new RegExp(`\\\\ref\\{${escapeRegex(original)}\\}`, 'g');
|
| 158 |
const refMatches = cleanedContent.match(refRegex);
|
|
|
|
| 160 |
cleanedContent = cleanedContent.replace(refRegex, `\\ref{${clean}}`);
|
| 161 |
changesCount += refMatches.length;
|
| 162 |
}
|
| 163 |
+
|
| 164 |
+
// For equation labels, still clean the labels themselves (for the Lua filter)
|
| 165 |
+
if (original.startsWith('eq:')) {
|
| 166 |
+
const labelRegex = new RegExp(`\\\\label\\{${escapeRegex(original)}\\}`, 'g');
|
| 167 |
+
const labelMatches = cleanedContent.match(labelRegex);
|
| 168 |
+
if (labelMatches) {
|
| 169 |
+
cleanedContent = cleanedContent.replace(labelRegex, `\\label{${clean}}`);
|
| 170 |
+
changesCount += labelMatches.length;
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
}
|
| 174 |
}
|
| 175 |
|
| 176 |
+
return {
|
| 177 |
+
content: cleanedContent,
|
| 178 |
+
changesCount: changesCount + anchorsCreated,
|
| 179 |
+
highlightsConverted: highlightsConverted
|
| 180 |
+
};
|
| 181 |
}
|
| 182 |
|
| 183 |
/**
|
|
|
|
| 209 |
const result = applyMapping(latexContent, mapping);
|
| 210 |
|
| 211 |
if (result.changesCount > 0) {
|
| 212 |
+
console.log(` ✅ Processed ${result.changesCount} reference(s) and created anchor spans`);
|
| 213 |
|
| 214 |
// Show some examples of changes
|
| 215 |
let exampleCount = 0;
|
| 216 |
for (const [original, clean] of mapping) {
|
| 217 |
if (original !== clean && exampleCount < 3) {
|
| 218 |
+
console.log(` ${original} → ${clean} (span + refs)`);
|
| 219 |
exampleCount++;
|
| 220 |
}
|
| 221 |
}
|
| 222 |
if (mapping.size > 3) {
|
| 223 |
+
console.log(` ... and ${mapping.size - 3} more anchor spans created`);
|
| 224 |
}
|
| 225 |
} else {
|
| 226 |
console.log(' ℹ️ No reference cleanup needed');
|
| 227 |
}
|
| 228 |
|
| 229 |
+
if (result.highlightsConverted > 0) {
|
| 230 |
+
console.log(` ✨ Converted ${result.highlightsConverted} \\highlight{} command(s) to <span class="highlight">`);
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
return {
|
| 234 |
content: result.content,
|
| 235 |
changesCount: result.changesCount,
|
app/src/content/article.mdx
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/src/styles/_base.css
CHANGED
|
@@ -109,4 +109,16 @@ html {
|
|
| 109 |
|
| 110 |
[data-footnote-ref] {
|
| 111 |
margin-left: 4px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
}
|
|
|
|
| 109 |
|
| 110 |
[data-footnote-ref] {
|
| 111 |
margin-left: 4px;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.highlight {
|
| 115 |
+
/* background-color: color-mix(in srgb, var(--primary-color) 26%, transparent); */
|
| 116 |
+
background-color: var(--primary-color);
|
| 117 |
+
color: var(--page-bg);
|
| 118 |
+
padding: 3px 6px;
|
| 119 |
+
border-radius: 3px;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
.highlight>* {
|
| 123 |
+
padding: 0;
|
| 124 |
}
|
app/src/styles/_variables.css
CHANGED
|
@@ -8,10 +8,10 @@
|
|
| 8 |
--neutral-300: rgb(228, 228, 228);
|
| 9 |
--neutral-200: rgb(245, 245, 245);
|
| 10 |
|
| 11 |
-
--default-font-family: Source Sans Pro,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
|
| 12 |
|
| 13 |
/* Brand (OKLCH base + derived states) */
|
| 14 |
-
--primary-base: oklch(0.
|
| 15 |
--primary-color: var(--primary-base);
|
| 16 |
--primary-color-hover: oklch(from var(--primary-color) calc(l - 0.05) c h);
|
| 17 |
--primary-color-active: oklch(from var(--primary-color) calc(l - 0.10) c h);
|
|
@@ -19,10 +19,10 @@
|
|
| 19 |
|
| 20 |
/* Text & Surfaces */
|
| 21 |
--page-bg: #ffffff;
|
| 22 |
-
--text-color: rgba(0,0,0
|
| 23 |
-
--transparent-page-contrast: rgba(255,255,255
|
| 24 |
-
--muted-color: rgba(0,0,0
|
| 25 |
-
--border-color: rgba(0,0,0
|
| 26 |
--surface-bg: #fafafa;
|
| 27 |
--code-bg: #f6f8fa;
|
| 28 |
|
|
@@ -52,8 +52,10 @@
|
|
| 52 |
@custom-media --bp-content-collapse (max-width: 1100px);
|
| 53 |
|
| 54 |
/* Layout */
|
| 55 |
-
--content-padding-x: 16px;
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
/* Config */
|
| 59 |
--palette-count: 8;
|
|
@@ -86,7 +88,7 @@
|
|
| 86 |
/* Charts (global) */
|
| 87 |
--axis-color: var(--muted-color);
|
| 88 |
--tick-color: var(--text-color);
|
| 89 |
-
--grid-color: rgba(0,0,0
|
| 90 |
}
|
| 91 |
|
| 92 |
/* ============================================================================ */
|
|
@@ -94,17 +96,17 @@
|
|
| 94 |
/* ============================================================================ */
|
| 95 |
[data-theme="dark"] {
|
| 96 |
--page-bg: #0f1115;
|
| 97 |
-
--text-color: rgba(255,255,255
|
| 98 |
-
--muted-color: rgba(255,255,255
|
| 99 |
-
--border-color: rgba(255,255,255
|
| 100 |
--surface-bg: #12151b;
|
| 101 |
--code-bg: #12151b;
|
| 102 |
-
--transparent-page-contrast: rgba(0,0,0
|
| 103 |
-
|
| 104 |
/* Charts (global) */
|
| 105 |
--axis-color: var(--muted-color);
|
| 106 |
--tick-color: var(--muted-color);
|
| 107 |
-
--grid-color: rgba(255,255,255
|
| 108 |
|
| 109 |
/* Primary (lower L in dark) */
|
| 110 |
--primary-color: oklch(from var(--primary-base) calc(l - 0.08) c h);
|
|
|
|
| 8 |
--neutral-300: rgb(228, 228, 228);
|
| 9 |
--neutral-200: rgb(245, 245, 245);
|
| 10 |
|
| 11 |
+
--default-font-family: Source Sans Pro, ui-sans-serif, system-ui, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
| 12 |
|
| 13 |
/* Brand (OKLCH base + derived states) */
|
| 14 |
+
--primary-base: oklch(0.74 0.12 60.17);
|
| 15 |
--primary-color: var(--primary-base);
|
| 16 |
--primary-color-hover: oklch(from var(--primary-color) calc(l - 0.05) c h);
|
| 17 |
--primary-color-active: oklch(from var(--primary-color) calc(l - 0.10) c h);
|
|
|
|
| 19 |
|
| 20 |
/* Text & Surfaces */
|
| 21 |
--page-bg: #ffffff;
|
| 22 |
+
--text-color: rgba(0, 0, 0, .85);
|
| 23 |
+
--transparent-page-contrast: rgba(255, 255, 255, .85);
|
| 24 |
+
--muted-color: rgba(0, 0, 0, .6);
|
| 25 |
+
--border-color: rgba(0, 0, 0, .1);
|
| 26 |
--surface-bg: #fafafa;
|
| 27 |
--code-bg: #f6f8fa;
|
| 28 |
|
|
|
|
| 52 |
@custom-media --bp-content-collapse (max-width: 1100px);
|
| 53 |
|
| 54 |
/* Layout */
|
| 55 |
+
--content-padding-x: 16px;
|
| 56 |
+
/* default page gutter */
|
| 57 |
+
--block-spacing-y: var(--spacing-4);
|
| 58 |
+
/* default vertical spacing between block components */
|
| 59 |
|
| 60 |
/* Config */
|
| 61 |
--palette-count: 8;
|
|
|
|
| 88 |
/* Charts (global) */
|
| 89 |
--axis-color: var(--muted-color);
|
| 90 |
--tick-color: var(--text-color);
|
| 91 |
+
--grid-color: rgba(0, 0, 0, .08);
|
| 92 |
}
|
| 93 |
|
| 94 |
/* ============================================================================ */
|
|
|
|
| 96 |
/* ============================================================================ */
|
| 97 |
[data-theme="dark"] {
|
| 98 |
--page-bg: #0f1115;
|
| 99 |
+
--text-color: rgba(255, 255, 255, .9);
|
| 100 |
+
--muted-color: rgba(255, 255, 255, .7);
|
| 101 |
+
--border-color: rgba(255, 255, 255, .15);
|
| 102 |
--surface-bg: #12151b;
|
| 103 |
--code-bg: #12151b;
|
| 104 |
+
--transparent-page-contrast: rgba(0, 0, 0, .85);
|
| 105 |
+
|
| 106 |
/* Charts (global) */
|
| 107 |
--axis-color: var(--muted-color);
|
| 108 |
--tick-color: var(--muted-color);
|
| 109 |
+
--grid-color: rgba(255, 255, 255, .10);
|
| 110 |
|
| 111 |
/* Primary (lower L in dark) */
|
| 112 |
--primary-color: oklch(from var(--primary-base) calc(l - 0.08) c h);
|