File size: 4,162 Bytes
fad3187 2739b3a fad3187 2739b3a fad3187 2739b3a fad3187 2739b3a fad3187 2739b3a fad3187 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/usr/bin/env node
// scripts/cache_report.mjs
// Summarize cache status (questions/generations/verifications/rewards).
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const PROJECT_ROOT = path.join(__dirname, '..');
const DEFAULT_CACHE_DIR = path.join(PROJECT_ROOT, 'data', 'cache');
const INSTRUCT_CACHE_DIR = path.join(PROJECT_ROOT, 'data', 'cache_instruct');
const MODE = (() => {
const v = process.env.CACHE_REPORT_MODE;
if (!v) return 'both';
const s = String(v).toLowerCase();
if (['thinking', 'default'].includes(s)) return 'thinking';
if (['instruct'].includes(s)) return 'instruct';
if (['both', 'all'].includes(s)) return 'both';
return 'both';
})();
const customDir = process.env.PIPELINE_CACHE_DIR
? (path.isAbsolute(process.env.PIPELINE_CACHE_DIR)
? process.env.PIPELINE_CACHE_DIR
: path.join(PROJECT_ROOT, process.env.PIPELINE_CACHE_DIR))
: null;
const CACHE_DIRS = (() => {
if (customDir) {
return [{ label: 'custom', dir: customDir }];
}
if (MODE === 'thinking') {
return [{ label: 'thinking (default)', dir: DEFAULT_CACHE_DIR }];
}
if (MODE === 'instruct') {
return [{ label: 'instruct', dir: INSTRUCT_CACHE_DIR }];
}
return [
{ label: 'thinking (default)', dir: DEFAULT_CACHE_DIR },
{ label: 'instruct', dir: INSTRUCT_CACHE_DIR },
];
})();
const FILES = {
questions: 'questions.jsonl',
generations: 'generations.jsonl',
verifications: 'verifications.jsonl',
rewards: 'rewards.jsonl',
};
async function readJsonl(cacheDir, fileName) {
const filePath = path.join(cacheDir, fileName);
try {
const txt = await fs.readFile(filePath, 'utf8');
return txt
.split('\n')
.map((l) => l.trim())
.filter(Boolean)
.map((line) => {
try {
return JSON.parse(line);
} catch {
return null;
}
})
.filter(Boolean);
} catch (e) {
if (e.code === 'ENOENT') return [];
throw e;
}
}
function uniq(arr) {
return [...new Set(arr)];
}
async function main() {
if (customDir) {
console.log(`CACHE_REPORT_MODE=custom (PIPELINE_CACHE_DIR=${customDir})`);
} else {
console.log(`CACHE_REPORT_MODE=${MODE}`);
}
for (const { label, dir } of CACHE_DIRS) {
const questions = await readJsonl(dir, FILES.questions);
const generations = await readJsonl(dir, FILES.generations);
const verifications = await readJsonl(dir, FILES.verifications);
const rewards = await readJsonl(dir, FILES.rewards);
const chunkIds = uniq([
...questions.map((r) => r.chunk_id),
...generations.map((r) => r.chunk_id),
...verifications.map((r) => r.chunk_id),
...rewards.map((r) => r.chunk_id),
].filter(Boolean));
const totalQuestions = questions.reduce((acc, r) => {
if (Array.isArray(r.questions)) return acc + r.questions.length;
if (Array.isArray(r.question_ids)) return acc + r.question_ids.length;
return acc + 1;
}, 0);
const totalGenerations = generations.length;
const totalVerifications = verifications.length;
const totalRewards = rewards.length;
const passedVerifications = verifications.filter((v) => v.ok === true).length;
const passedRewards = rewards.filter((r) => r.ok === true).length;
console.log(`\n== ${label} cache ==`);
const rows = [
['Cache dir', dir],
['Unique chunks', chunkIds.length],
['Question records', questions.length],
['Questions total', totalQuestions],
['Generation records', totalGenerations],
['Verification records', totalVerifications],
['Verifications ok', passedVerifications],
['Reward records', totalRewards],
['Rewards ok', passedRewards],
];
const colWidth = Math.max(...rows.map(([k]) => k.length)) + 2;
for (const [key, val] of rows) {
const pad = ' '.repeat(colWidth - key.length);
console.log(`${key}:${pad}${val}`);
}
}
}
main().catch((err) => {
console.error('Cache report error:', err);
process.exit(1);
});
|