bibliopagebench / scripts /make_manifest.js
vojtam's picture
feat: add qwen3-vl-32b-instruct results
7cb79aa
import fs from 'node:fs/promises';
import path from 'node:path';
const IMAGES_DIR = 'static/images/full';
const THUMBS_DIR = 'static/images/thumbs';
const OUT = 'static/data/manifest.json';
const GT_DIR = 'data/GT/test';
const PRED_DIR = 'data/predictions/';
const OCR_DIR = 'data/ocr/';
const ocr_models = await fs.readdir(OCR_DIR);
const pred_models = await fs.readdir(PRED_DIR);
await fs.mkdir(THUMBS_DIR, { recursive: true });
const files = (await fs.readdir(IMAGES_DIR)).filter(f => /\.(jpe?g|png|webp)$/i.test(f));
const manifest = [];
for (const file of files) {
const id = path.parse(file).name;
const thumbPath = path.join(THUMBS_DIR, file.replace(/\.(\w+)$/, '.jpg'));
// load JSONs
const gt = JSON.parse(await fs.readFile(path.join(GT_DIR, `${id}.json`), 'utf8'));
manifest.push({
id,
file,
model: PRED_DIR.split('/').pop(),
thumb: `/images/thumbs/${path.basename(thumbPath)}`,
full: `/images/full/${file}`,
ground_truth: gt,
predictions: {
...Object.fromEntries(await Promise.all(pred_models.map(async pred_model => [
pred_model,
JSON.parse(await fs.readFile(path.join(PRED_DIR, pred_model, `${id}.json`), 'utf8'))
])))
},
ocr: {
...Object.fromEntries(await Promise.all(ocr_models.map(async ocr_model => [
ocr_model,
await fs.readFile(path.join(OCR_DIR, ocr_model, `${id}.txt`), 'utf8')
])))
}
});
}
await fs.writeFile(OUT, JSON.stringify(manifest), 'utf8');
console.log(`Wrote ${manifest.length} entries to ${OUT}`);