Fix exporter

#13
by lewtun HF Staff - opened
app/scripts/README-TXT-EXPORT.md CHANGED
@@ -5,13 +5,23 @@ This script exports the article to a simple text format suitable for book publis
5
  ## Usage
6
 
7
  ```bash
8
- npm run export:txt
9
  ```
10
 
11
  Or with custom filename:
12
 
13
  ```bash
14
- node scripts/export-txt.mjs --filename=my-article
 
 
 
 
 
 
 
 
 
 
15
  ```
16
 
17
  ## Output
@@ -65,7 +75,7 @@ Example:
65
  ```
66
  Example:
67
  ```
68
- Use the <ic>npm install</ic> command to install dependencies.
69
  ```
70
 
71
  #### LaTeX Formulas
 
5
  ## Usage
6
 
7
  ```bash
8
+ yarn export:txt
9
  ```
10
 
11
  Or with custom filename:
12
 
13
  ```bash
14
+ yarn export:txt -- --filename=my-article
15
+ ```
16
+
17
+ By default, code blocks inside `<c>...</c>` are hard-wrapped to 80 characters per line. You can disable or configure this:
18
+
19
+ ```bash
20
+ # Disable wrapping
21
+ yarn export:txt -- --wrap-code=false
22
+
23
+ # Change wrap width
24
+ yarn export:txt -- --code-width=100
25
  ```
26
 
27
  ## Output
 
75
  ```
76
  Example:
77
  ```
78
+ Use the <ic>yarn install</ic> command to install dependencies.
79
  ```
80
 
81
  #### LaTeX Formulas
app/scripts/export-docx.mjs CHANGED
@@ -14,7 +14,7 @@
14
  * npm run export:docx
15
  */
16
 
17
- import { Document, Packer, Paragraph, TextRun, HeadingLevel, AlignmentType } from 'docx';
18
  import { promises as fs } from 'node:fs';
19
  import { resolve } from 'node:path';
20
  import process from 'node:process';
@@ -93,6 +93,21 @@ function parseInlineFormatting(text) {
93
  return runs.length > 0 ? runs : [new TextRun(text)];
94
  }
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  async function convertTxtToDocx(txtPath, outputPath) {
97
  console.log(`📖 Reading TXT file: ${txtPath}`);
98
  const content = await fs.readFile(txtPath, 'utf-8');
@@ -112,12 +127,25 @@ async function convertTxtToDocx(txtPath, outputPath) {
112
  }
113
 
114
  // Handle code blocks <c>...</c>
115
- if (line.trim().startsWith('<c>')) {
116
  inCodeBlock = true;
117
  codeLines = [];
118
- const firstLine = line.replace(/^<c>\s*/, '');
119
- if (firstLine && !firstLine.startsWith('</c>')) {
120
- codeLines.push(firstLine);
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  }
122
  continue;
123
  }
@@ -128,13 +156,7 @@ async function convertTxtToDocx(txtPath, outputPath) {
128
 
129
  // Add code block as paragraph(s)
130
  if (codeLines.length > 0) {
131
- paragraphs.push(new Paragraph({
132
- text: codeLines.join('\n'),
133
- font: 'Courier New',
134
- size: 20,
135
- shading: { fill: 'F5F5F5', type: 'clear' },
136
- spacing: { before: 200, after: 200 }
137
- }));
138
  }
139
 
140
  inCodeBlock = false;
 
14
  * npm run export:docx
15
  */
16
 
17
+ import { Document, Packer, Paragraph, TextRun, HeadingLevel, AlignmentType, LineRuleType } from 'docx';
18
  import { promises as fs } from 'node:fs';
19
  import { resolve } from 'node:path';
20
  import process from 'node:process';
 
93
  return runs.length > 0 ? runs : [new TextRun(text)];
94
  }
95
 
96
+ function codeBlockToParagraph(codeLines) {
97
+ const codeRuns = codeLines.map((codeLine, idx) => new TextRun({
98
+ break: idx === 0 ? 0 : 1,
99
+ children: [codeLine],
100
+ font: 'Courier New',
101
+ size: 20
102
+ }));
103
+
104
+ return new Paragraph({
105
+ children: codeRuns,
106
+ shading: { fill: 'F5F5F5', type: 'clear' },
107
+ spacing: { before: 200, after: 200, line: 240, lineRule: LineRuleType.AUTO }
108
+ });
109
+ }
110
+
111
  async function convertTxtToDocx(txtPath, outputPath) {
112
  console.log(`📖 Reading TXT file: ${txtPath}`);
113
  const content = await fs.readFile(txtPath, 'utf-8');
 
127
  }
128
 
129
  // Handle code blocks <c>...</c>
130
+ if (/^\s*<c>/.test(line)) {
131
  inCodeBlock = true;
132
  codeLines = [];
133
+
134
+ const firstLine = line.replace(/^\s*<c>/, '');
135
+ if (firstLine) {
136
+ // Handle single-line code blocks like: <c>code...</c>
137
+ if (firstLine.trim().endsWith('</c>')) {
138
+ const singleLine = firstLine.replace(/<\/c>\s*$/, '');
139
+ codeLines.push(singleLine);
140
+ paragraphs.push(codeBlockToParagraph(codeLines));
141
+ inCodeBlock = false;
142
+ codeLines = [];
143
+ continue;
144
+ }
145
+
146
+ if (!firstLine.startsWith('</c>')) {
147
+ codeLines.push(firstLine);
148
+ }
149
  }
150
  continue;
151
  }
 
156
 
157
  // Add code block as paragraph(s)
158
  if (codeLines.length > 0) {
159
+ paragraphs.push(codeBlockToParagraph(codeLines));
 
 
 
 
 
 
160
  }
161
 
162
  inCodeBlock = false;
app/scripts/export-txt.mjs CHANGED
@@ -35,16 +35,23 @@ async function run(command, args = [], options = {}) {
35
  });
36
  }
37
 
38
- async function waitForServer(url, timeoutMs = 60000) {
 
39
  const start = Date.now();
40
  while (Date.now() - start < timeoutMs) {
41
  try {
 
 
 
 
 
42
  const res = await fetch(url);
43
  if (res.ok) return;
44
  } catch { }
45
  await delay(500);
46
  }
47
- throw new Error(`Server did not start in time: ${url}`);
 
48
  }
49
 
50
  function parseArgs(argv) {
@@ -57,6 +64,15 @@ function parseArgs(argv) {
57
  return out;
58
  }
59
 
 
 
 
 
 
 
 
 
 
60
  function slugify(text) {
61
  return String(text || '')
62
  .normalize('NFKD')
@@ -99,6 +115,82 @@ function headingToMarkdown(level, text) {
99
  return `${hashes} ${text}`;
100
  }
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  /**
103
  * Extract and convert article content to TXT format
104
  */
@@ -156,8 +248,12 @@ async function extractArticleContent(page) {
156
  const main = document.querySelector('main');
157
  if (!main) return 'Error: main element not found';
158
 
159
- // Helper: get all visual elements in DOM order (same as screenshot script)
160
- const allVisualElements = Array.from(main.querySelectorAll('.html-embed, .table-scroll > table, .image-wrapper, figure, .katex-display'));
 
 
 
 
161
  const elementIndexMap = new Map();
162
 
163
  // Pre-process: assign global indices to visual elements
@@ -168,7 +264,41 @@ async function extractArticleContent(page) {
168
  // Walk through all child nodes
169
  const processNode = (node) => {
170
  const tag = node.tagName?.toLowerCase();
171
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  // Headings
173
  if (/^h[1-6]$/.test(tag)) {
174
  const level = parseInt(tag[1]);
@@ -291,9 +421,20 @@ async function extractArticleContent(page) {
291
 
292
  // Figures (images, embeds)
293
  if (tag === 'figure') {
 
 
 
 
 
 
 
294
  const img = node.querySelector('img');
295
- const htmlEmbed = node.querySelector('.html-embed, .html-embed--screenshot');
296
- const imageWrapper = node.querySelector('.image-wrapper');
 
 
 
 
297
  const caption = node.querySelector('figcaption');
298
 
299
  // Skip if it's not really a figure (no img, no embed, no caption)
@@ -358,14 +499,15 @@ async function extractArticleContent(page) {
358
 
359
  // Notes (Note component and Sidenote)
360
  if (node.classList?.contains('note') || node.classList?.contains('sidenote')) {
361
- const title = node.querySelector('.note__title, .note-title')?.textContent || '';
362
- const content = cleanText(node.textContent);
363
-
364
- if (title) {
365
- output.push(`<n>${title} | ${content}</n>\n\n`);
366
- } else {
367
- output.push(`<n>${content}</n>\n\n`);
368
- }
 
369
  return;
370
  }
371
 
@@ -418,6 +560,22 @@ async function main() {
418
  console.log('> Starting Astro preview…');
419
  // Capture stdout to detect the actual port used
420
  let capturedPort = 8080;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  const preview = spawn('npm', ['run', 'preview'], {
422
  cwd,
423
  stdio: ['ignore', 'pipe', 'pipe'],
@@ -428,27 +586,28 @@ async function main() {
428
  preview.stdout.on('data', (data) => {
429
  const output = data.toString();
430
  process.stdout.write(output);
431
- const match = output.match(/http:\/\/localhost:(\d+)/);
432
- if (match) {
433
- capturedPort = parseInt(match[1]);
434
- }
435
  });
436
 
437
  preview.stderr.on('data', (data) => {
438
- process.stderr.write(data);
 
 
439
  });
440
 
441
  const previewExit = new Promise((resolvePreview) => {
442
  preview.on('close', (code, signal) => resolvePreview({ code, signal }));
443
  });
444
 
445
- // Wait a bit for the server to start and output the port
446
- await delay(3000);
447
- const baseUrl = `http://localhost:${capturedPort}/`;
 
448
 
449
  try {
450
- await waitForServer(baseUrl, 60000);
451
- console.log('> Server ready, extracting content…');
 
452
 
453
  const browser = await chromium.launch({ headless: true });
454
  try {
@@ -479,10 +638,13 @@ async function main() {
479
 
480
  console.log('> Extracting article content…');
481
  const txtContent = await extractArticleContent(page);
 
 
 
482
 
483
  // Write output
484
  const outPath = resolve(cwd, 'dist', `${outFileBase}.txt`);
485
- await fs.writeFile(outPath, txtContent, 'utf-8');
486
  console.log(`✅ TXT exported: ${outPath}`);
487
 
488
  // Copy to public folder
 
35
  });
36
  }
37
 
38
+ async function waitForServer(urlOrFn, timeoutMs = 60000) {
39
+ const getUrl = typeof urlOrFn === 'function' ? urlOrFn : () => urlOrFn;
40
  const start = Date.now();
41
  while (Date.now() - start < timeoutMs) {
42
  try {
43
+ const url = getUrl();
44
+ if (!url) {
45
+ await delay(200);
46
+ continue;
47
+ }
48
  const res = await fetch(url);
49
  if (res.ok) return;
50
  } catch { }
51
  await delay(500);
52
  }
53
+ const lastUrl = getUrl();
54
+ throw new Error(`Server did not start in time: ${lastUrl || '(unknown url)'}`);
55
  }
56
 
57
  function parseArgs(argv) {
 
64
  return out;
65
  }
66
 
67
+ function parseBoolean(value, defaultValue) {
68
+ if (value === undefined) return defaultValue;
69
+ if (value === true) return true;
70
+ const v = String(value).trim().toLowerCase();
71
+ if (['1', 'true', 'yes', 'y', 'on'].includes(v)) return true;
72
+ if (['0', 'false', 'no', 'n', 'off'].includes(v)) return false;
73
+ return defaultValue;
74
+ }
75
+
76
  function slugify(text) {
77
  return String(text || '')
78
  .normalize('NFKD')
 
115
  return `${hashes} ${text}`;
116
  }
117
 
118
+ function wrapLineWithIndent(line, maxWidth) {
119
+ if (line.length <= maxWidth) return [line];
120
+
121
+ const indentMatch = line.match(/^\s*/);
122
+ const indent = indentMatch ? indentMatch[0] : '';
123
+ const indentLen = indent.length;
124
+ const available = Math.max(10, maxWidth - indentLen);
125
+
126
+ let rest = line.slice(indentLen);
127
+ const out = [];
128
+
129
+ while (rest.length > available) {
130
+ let breakPos = -1;
131
+ for (let i = available; i >= 1; i--) {
132
+ if (/\s/.test(rest[i - 1])) {
133
+ breakPos = i;
134
+ break;
135
+ }
136
+ }
137
+ if (breakPos === -1) breakPos = available;
138
+
139
+ const chunk = rest.slice(0, breakPos).replace(/\s+$/g, '');
140
+ out.push(indent + chunk);
141
+ rest = rest.slice(breakPos).replace(/^\s+/g, '');
142
+ }
143
+
144
+ out.push(indent + rest);
145
+ return out;
146
+ }
147
+
148
+ function wrapCodeTextAccountingForTags(codeText, maxWidth) {
149
+ const width = Number(maxWidth);
150
+ if (!Number.isFinite(width) || width <= 0) return String(codeText || '');
151
+
152
+ const baseLines = String(codeText || '').split('\n');
153
+ const wrappedLines = [];
154
+ for (const line of baseLines) wrappedLines.push(...wrapLineWithIndent(line, width));
155
+
156
+ if (wrappedLines.length === 0) return '';
157
+
158
+ // If the whole block is a single line, account for both "<c>" and "</c>" on that line.
159
+ if (wrappedLines.length === 1) {
160
+ const maxInner = width - '<c>'.length - '</c>'.length;
161
+ if (wrappedLines[0].length > maxInner && maxInner > 0) {
162
+ return wrapLineWithIndent(wrappedLines[0], maxInner).join('\n');
163
+ }
164
+ return wrappedLines[0];
165
+ }
166
+
167
+ // Otherwise, "<c>" applies to the first line and "</c>" to the last line only.
168
+ const firstMaxInner = width - '<c>'.length;
169
+ if (firstMaxInner > 0 && wrappedLines[0].length > firstMaxInner) {
170
+ const rewrappedFirst = wrapLineWithIndent(wrappedLines[0], firstMaxInner);
171
+ wrappedLines.splice(0, 1, ...rewrappedFirst);
172
+ }
173
+
174
+ const lastMaxInner = width - '</c>'.length;
175
+ const lastIdx = wrappedLines.length - 1;
176
+ if (lastMaxInner > 0 && wrappedLines[lastIdx].length > lastMaxInner) {
177
+ const rewrappedLast = wrapLineWithIndent(wrappedLines[lastIdx], lastMaxInner);
178
+ wrappedLines.splice(lastIdx, 1, ...rewrappedLast);
179
+ }
180
+
181
+ return wrappedLines.join('\n');
182
+ }
183
+
184
+ function wrapCodeBlocksInTxt(txt, maxWidth = 80) {
185
+ const width = Number(maxWidth);
186
+ if (!Number.isFinite(width) || width <= 0) return txt;
187
+
188
+ return String(txt || '').replace(/<c>([\s\S]*?)<\/c>/g, (_m, inner) => {
189
+ const wrappedInner = wrapCodeTextAccountingForTags(inner, width);
190
+ return `<c>${wrappedInner}</c>`;
191
+ });
192
+ }
193
+
194
  /**
195
  * Extract and convert article content to TXT format
196
  */
 
248
  const main = document.querySelector('main');
249
  if (!main) return 'Error: main element not found';
250
 
251
+ // Helper: get all visual elements in DOM order (match screenshot-elements.mjs)
252
+ // NOTE: Don't include generic `figure` here. The screenshot script indexes only
253
+ // `.html-embed`, `.table-scroll > table`, `.image-wrapper`, and `.katex-display`.
254
+ const allVisualElements = Array.from(
255
+ document.querySelectorAll('.html-embed, .table-scroll > table, .image-wrapper, .katex-display'),
256
+ );
257
  const elementIndexMap = new Map();
258
 
259
  // Pre-process: assign global indices to visual elements
 
264
  // Walk through all child nodes
265
  const processNode = (node) => {
266
  const tag = node.tagName?.toLowerCase();
267
+
268
+ // Images (Image.astro renders a `.image-wrapper` container; it may or may not contain a <figure>)
269
+ if (node.classList?.contains('image-wrapper')) {
270
+ const globalIndex = elementIndexMap.get(node);
271
+ if (!globalIndex) return;
272
+
273
+ const img = node.querySelector('img');
274
+ const figure = node.querySelector('figure');
275
+ const caption = figure?.querySelector('figcaption') || node.querySelector('figcaption');
276
+
277
+ let name = '';
278
+ let anchor = '';
279
+ let description = '';
280
+
281
+ // Prefer an explicit figure ID (used for cross-references), otherwise wrapper ID.
282
+ if (figure?.id) anchor = figure.id;
283
+ else if (node.id) anchor = node.id;
284
+
285
+ if (caption) {
286
+ const captionText = stripHtml(caption.innerHTML);
287
+ const parsed = parseCaptionText(captionText, 'Figure');
288
+ name = parsed.name;
289
+ description = parsed.description;
290
+ }
291
+
292
+ if (!description && img?.alt) description = img.alt;
293
+ if (!name) name = `image-${globalIndex}`;
294
+
295
+ const parts = [name];
296
+ if (anchor) parts.push(anchor);
297
+ if (description) parts.push(description);
298
+ output.push(`<f>${parts.join(' | ')}</f>\n\n`);
299
+ return;
300
+ }
301
+
302
  // Headings
303
  if (/^h[1-6]$/.test(tag)) {
304
  const level = parseInt(tag[1]);
 
421
 
422
  // Figures (images, embeds)
423
  if (tag === 'figure') {
424
+ // If this <figure> is inside an `.image-wrapper`, the wrapper handler above will emit
425
+ // a single <f> tag for the whole image. Avoid double-emitting.
426
+ if (node.closest?.('.image-wrapper')) {
427
+ const isHtmlEmbedFigure = node.matches?.('.html-embed, .html-embed--screenshot');
428
+ if (!isHtmlEmbedFigure) return;
429
+ }
430
+
431
  const img = node.querySelector('img');
432
+ const htmlEmbed =
433
+ node.matches?.('.html-embed, .html-embed--screenshot')
434
+ ? node
435
+ : node.querySelector('.html-embed, .html-embed--screenshot');
436
+ // Images are wrapped in a sibling/parent `.image-wrapper` container in this codebase
437
+ const imageWrapper = node.closest?.('.image-wrapper') || null;
438
  const caption = node.querySelector('figcaption');
439
 
440
  // Skip if it's not really a figure (no img, no embed, no caption)
 
499
 
500
  // Notes (Note component and Sidenote)
501
  if (node.classList?.contains('note') || node.classList?.contains('sidenote')) {
502
+ // For Note.astro, avoid duplicating the header/title in exported text by
503
+ // extracting only the body content.
504
+ const contentNode =
505
+ node.classList?.contains('note')
506
+ ? (node.querySelector('.note__content, .note-content') ?? node)
507
+ : node;
508
+ const content = cleanText(contentNode.textContent);
509
+
510
+ if (content) output.push(`<n>${content}</n>\n\n`);
511
  return;
512
  }
513
 
 
560
  console.log('> Starting Astro preview…');
561
  // Capture stdout to detect the actual port used
562
  let capturedPort = 8080;
563
+ let sawPreviewUrl = false;
564
+
565
+ const maybeCapturePort = (output) => {
566
+ const match = output.match(/http:\/\/localhost:(\d+)\//);
567
+ if (match) {
568
+ capturedPort = parseInt(match[1]);
569
+ sawPreviewUrl = true;
570
+ }
571
+ };
572
+
573
+ const previewPortEnv = process.env.PREVIEW_PORT ? Number(process.env.PREVIEW_PORT) : null;
574
+ if (previewPortEnv) {
575
+ capturedPort = previewPortEnv;
576
+ sawPreviewUrl = true;
577
+ }
578
+
579
  const preview = spawn('npm', ['run', 'preview'], {
580
  cwd,
581
  stdio: ['ignore', 'pipe', 'pipe'],
 
586
  preview.stdout.on('data', (data) => {
587
  const output = data.toString();
588
  process.stdout.write(output);
589
+ maybeCapturePort(output);
 
 
 
590
  });
591
 
592
  preview.stderr.on('data', (data) => {
593
+ const output = data.toString();
594
+ process.stderr.write(output);
595
+ maybeCapturePort(output);
596
  });
597
 
598
  const previewExit = new Promise((resolvePreview) => {
599
  preview.on('close', (code, signal) => resolvePreview({ code, signal }));
600
  });
601
 
602
+ const getBaseUrl = () => {
603
+ if (!sawPreviewUrl) return null;
604
+ return `http://localhost:${capturedPort}/`;
605
+ };
606
 
607
  try {
608
+ await waitForServer(getBaseUrl, 60000);
609
+ const baseUrl = getBaseUrl();
610
+ console.log(`> Server ready (${baseUrl}), extracting content…`);
611
 
612
  const browser = await chromium.launch({ headless: true });
613
  try {
 
638
 
639
  console.log('> Extracting article content…');
640
  const txtContent = await extractArticleContent(page);
641
+ const shouldWrapCode = parseBoolean(args['wrap-code'], true);
642
+ const codeWidth = Number(args['code-width'] ?? 80);
643
+ const finalTxtContent = shouldWrapCode ? wrapCodeBlocksInTxt(txtContent, codeWidth) : txtContent;
644
 
645
  // Write output
646
  const outPath = resolve(cwd, 'dist', `${outFileBase}.txt`);
647
+ await fs.writeFile(outPath, finalTxtContent, 'utf-8');
648
  console.log(`✅ TXT exported: ${outPath}`);
649
 
650
  // Copy to public folder
app/scripts/screenshot-elements.mjs CHANGED
@@ -1,11 +1,15 @@
1
  import { chromium } from 'playwright';
2
  import { mkdir } from 'fs/promises';
3
- import { join } from 'path';
 
 
 
 
4
 
5
- const URL = 'http://localhost:4321/?viz=true';
6
  const OUTPUT_DIR = './screenshots';
7
  const DEVICE_SCALE_FACTOR = 4; // 4x for high-quality print
8
  const BASE_VIEWPORT = { width: 1200, height: 800 };
 
9
 
10
  const slugify = (value) =>
11
  String(value || '')
@@ -14,9 +18,107 @@ const slugify = (value) =>
14
  .replace(/[^a-z0-9]+/g, '-')
15
  .replace(/^-+|-+$/g, '');
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  async function main() {
18
  await mkdir(OUTPUT_DIR, { recursive: true });
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  console.log('🚀 Launching browser...');
21
  const browser = await chromium.launch({ headless: true });
22
  const context = await browser.newContext({
@@ -25,8 +127,15 @@ async function main() {
25
  });
26
  const page = await context.newPage();
27
 
28
- console.log(`📄 Navigating to ${URL}...`);
29
- await page.goto(URL, { waitUntil: 'domcontentloaded', timeout: 60000 });
 
 
 
 
 
 
 
30
  await page.waitForTimeout(3000);
31
 
32
  let totalCount = 0;
@@ -47,12 +156,10 @@ async function main() {
47
  return 'unknown';
48
  });
49
 
50
- if (type !== 'katex') {
51
- const isVisible = await element.isVisible();
52
- if (!isVisible) {
53
- console.log(` ⏭️ Skipping hidden ${type} ${i + 1}`);
54
- continue;
55
- }
56
  }
57
 
58
  const label = await element.evaluate((el) => {
@@ -205,7 +312,8 @@ async function main() {
205
 
206
  await page.locator(cloneSelector).screenshot({
207
  path: filepath,
208
- type: 'png'
 
209
  });
210
 
211
  await page.evaluate((selector) => {
@@ -262,7 +370,8 @@ async function main() {
262
 
263
  await page.locator(cloneSelector).screenshot({
264
  path: filepath,
265
- type: 'png'
 
266
  });
267
 
268
  await page.evaluate((selector) => {
@@ -272,7 +381,8 @@ async function main() {
272
  } else {
273
  await element.screenshot({
274
  path: filepath,
275
- type: 'png'
 
276
  });
277
  }
278
 
@@ -308,7 +418,7 @@ async function main() {
308
  });
309
 
310
  await page.waitForTimeout(150);
311
- await element.screenshot({ path: openFilepath, type: 'png' });
312
  console.log(` ✅ ${openFilename}`);
313
 
314
  await selectHandle.evaluate((el) => {
@@ -359,6 +469,12 @@ async function main() {
359
 
360
  await browser.close();
361
  console.log(`\n🎉 Done! Captured ${totalCount} screenshots in ${OUTPUT_DIR}/`);
 
 
 
 
 
 
362
  }
363
 
364
  main().catch(console.error);
 
1
  import { chromium } from 'playwright';
2
  import { mkdir } from 'fs/promises';
3
+ import { join, resolve } from 'path';
4
+ import { spawn } from 'node:child_process';
5
+ import { setTimeout as delay } from 'node:timers/promises';
6
+ import { promises as fs } from 'node:fs';
7
+ import net from 'node:net';
8
 
 
9
  const OUTPUT_DIR = './screenshots';
10
  const DEVICE_SCALE_FACTOR = 4; // 4x for high-quality print
11
  const BASE_VIEWPORT = { width: 1200, height: 800 };
12
+ const SCREENSHOT_TIMEOUT_MS = Number(process.env.SCREENSHOT_TIMEOUT_MS || 15000);
13
 
14
  const slugify = (value) =>
15
  String(value || '')
 
18
  .replace(/[^a-z0-9]+/g, '-')
19
  .replace(/^-+|-+$/g, '');
20
 
21
+ async function run(command, args = [], options = {}) {
22
+ return new Promise((resolvePromise, reject) => {
23
+ const child = spawn(command, args, { stdio: 'inherit', shell: false, ...options });
24
+ child.on('error', reject);
25
+ child.on('exit', (code) => {
26
+ if (code === 0) resolvePromise(undefined);
27
+ else reject(new Error(`${command} ${args.join(' ')} exited with code ${code}`));
28
+ });
29
+ });
30
+ }
31
+
32
+ async function waitForServer(url, timeoutMs = 60000) {
33
+ const start = Date.now();
34
+ while (Date.now() - start < timeoutMs) {
35
+ try {
36
+ const res = await fetch(url);
37
+ if (res.ok) return;
38
+ } catch { }
39
+ await delay(500);
40
+ }
41
+ throw new Error(`Server did not start in time: ${url}`);
42
+ }
43
+
44
+ async function getFreePort(preferredPort) {
45
+ const tryListen = (port) =>
46
+ new Promise((resolvePromise, reject) => {
47
+ const server = net.createServer();
48
+ server.unref();
49
+ server.on('error', reject);
50
+ server.listen(port, () => {
51
+ const addr = server.address();
52
+ const resolvedPort = typeof addr === 'object' && addr ? addr.port : port;
53
+ server.close(() => resolvePromise(resolvedPort));
54
+ });
55
+ });
56
+
57
+ if (typeof preferredPort === 'number' && Number.isFinite(preferredPort) && preferredPort > 0) {
58
+ try {
59
+ return await tryListen(preferredPort);
60
+ } catch { }
61
+ }
62
+
63
+ return await tryListen(0);
64
+ }
65
+
66
  async function main() {
67
  await mkdir(OUTPUT_DIR, { recursive: true });
68
 
69
+ const cwd = process.cwd();
70
+ const distDir = resolve(cwd, 'dist');
71
+ let hasDist = false;
72
+ try {
73
+ const st = await fs.stat(distDir);
74
+ hasDist = st && st.isDirectory();
75
+ } catch { }
76
+ if (!hasDist) {
77
+ console.log('> Building Astro site…');
78
+ await run('npm', ['run', 'build'], { cwd });
79
+ } else {
80
+ console.log('> Skipping build (dist/ exists)…');
81
+ }
82
+
83
+ const startPreview =
84
+ !process.env.SCREENSHOT_BASE_URL &&
85
+ String(process.env.SCREENSHOT_START_PREVIEW || 'true').toLowerCase() !== 'false';
86
+
87
+ const preferredPort = process.env.PREVIEW_PORT ? Number(process.env.PREVIEW_PORT) : undefined;
88
+ const previewPort = startPreview ? await getFreePort(preferredPort) : undefined;
89
+ const baseUrl = process.env.SCREENSHOT_BASE_URL || `http://localhost:${previewPort}/`;
90
+ const url = `${baseUrl.replace(/\/?$/, '/')}` + '?viz=true';
91
+
92
+ let preview = null;
93
+ if (startPreview) {
94
+ console.log(`> Starting Astro preview (port ${previewPort})…`);
95
+ const astroBin = resolve(
96
+ cwd,
97
+ 'node_modules',
98
+ '.bin',
99
+ process.platform === 'win32' ? 'astro.cmd' : 'astro',
100
+ );
101
+ preview = spawn(astroBin, ['preview', '--host', '--port', String(previewPort)], {
102
+ cwd,
103
+ stdio: 'inherit',
104
+ detached: true,
105
+ });
106
+ } else {
107
+ console.log(`> Using existing server: ${baseUrl}`);
108
+ }
109
+
110
+ try {
111
+ await waitForServer(baseUrl, 60000);
112
+ } catch (err) {
113
+ if (preview) {
114
+ try {
115
+ // Ensure we don't leave the preview process behind if startup failed
116
+ process.kill(-preview.pid, 'SIGTERM');
117
+ } catch { }
118
+ }
119
+ throw err;
120
+ }
121
+
122
  console.log('🚀 Launching browser...');
123
  const browser = await chromium.launch({ headless: true });
124
  const context = await browser.newContext({
 
127
  });
128
  const page = await context.newPage();
129
 
130
+ console.log(`📄 Navigating to ${url}...`);
131
+ const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
132
+ if (!resp || !resp.ok()) {
133
+ const status = resp ? `${resp.status()} ${resp.statusText()}` : 'NO_RESPONSE';
134
+ throw new Error(
135
+ `Failed to load ${url} (${status}). ` +
136
+ `If you are running the site elsewhere, set SCREENSHOT_BASE_URL (e.g. http://localhost:4322/).`,
137
+ );
138
+ }
139
  await page.waitForTimeout(3000);
140
 
141
  let totalCount = 0;
 
156
  return 'unknown';
157
  });
158
 
159
+ const isVisible = await element.isVisible();
160
+ if (!isVisible) {
161
+ console.log(` ⏭️ Skipping hidden ${type} ${i + 1}`);
162
+ continue;
 
 
163
  }
164
 
165
  const label = await element.evaluate((el) => {
 
312
 
313
  await page.locator(cloneSelector).screenshot({
314
  path: filepath,
315
+ type: 'png',
316
+ timeout: SCREENSHOT_TIMEOUT_MS
317
  });
318
 
319
  await page.evaluate((selector) => {
 
370
 
371
  await page.locator(cloneSelector).screenshot({
372
  path: filepath,
373
+ type: 'png',
374
+ timeout: SCREENSHOT_TIMEOUT_MS
375
  });
376
 
377
  await page.evaluate((selector) => {
 
381
  } else {
382
  await element.screenshot({
383
  path: filepath,
384
+ type: 'png',
385
+ timeout: SCREENSHOT_TIMEOUT_MS
386
  });
387
  }
388
 
 
418
  });
419
 
420
  await page.waitForTimeout(150);
421
+ await element.screenshot({ path: openFilepath, type: 'png', timeout: SCREENSHOT_TIMEOUT_MS });
422
  console.log(` ✅ ${openFilename}`);
423
 
424
  await selectHandle.evaluate((el) => {
 
469
 
470
  await browser.close();
471
  console.log(`\n🎉 Done! Captured ${totalCount} screenshots in ${OUTPUT_DIR}/`);
472
+
473
+ if (preview) {
474
+ try {
475
+ process.kill(-preview.pid, 'SIGTERM');
476
+ } catch { }
477
+ }
478
  }
479
 
480
  main().catch(console.error);