|
|
#!/usr/bin/env node |
|
|
|
|
|
|
|
|
|
|
|
import fs from 'fs/promises'; |
|
|
import path from 'path'; |
|
|
import { fileURLToPath } from 'url'; |
|
|
|
|
|
const __filename = fileURLToPath(import.meta.url); |
|
|
const __dirname = path.dirname(__filename); |
|
|
const PROJECT_ROOT = path.join(__dirname, '..'); |
|
|
|
|
|
const DEFAULT_CACHE_DIR = path.join(PROJECT_ROOT, 'data', 'cache'); |
|
|
const INSTRUCT_CACHE_DIR = path.join(PROJECT_ROOT, 'data', 'cache_instruct'); |
|
|
|
|
|
const MODE = (() => { |
|
|
const v = process.env.CACHE_REPORT_MODE; |
|
|
if (!v) return 'both'; |
|
|
const s = String(v).toLowerCase(); |
|
|
if (['thinking', 'default'].includes(s)) return 'thinking'; |
|
|
if (['instruct'].includes(s)) return 'instruct'; |
|
|
if (['both', 'all'].includes(s)) return 'both'; |
|
|
return 'both'; |
|
|
})(); |
|
|
|
|
|
const customDir = process.env.PIPELINE_CACHE_DIR |
|
|
? (path.isAbsolute(process.env.PIPELINE_CACHE_DIR) |
|
|
? process.env.PIPELINE_CACHE_DIR |
|
|
: path.join(PROJECT_ROOT, process.env.PIPELINE_CACHE_DIR)) |
|
|
: null; |
|
|
|
|
|
const CACHE_DIRS = (() => { |
|
|
if (customDir) { |
|
|
return [{ label: 'custom', dir: customDir }]; |
|
|
} |
|
|
if (MODE === 'thinking') { |
|
|
return [{ label: 'thinking (default)', dir: DEFAULT_CACHE_DIR }]; |
|
|
} |
|
|
if (MODE === 'instruct') { |
|
|
return [{ label: 'instruct', dir: INSTRUCT_CACHE_DIR }]; |
|
|
} |
|
|
return [ |
|
|
{ label: 'thinking (default)', dir: DEFAULT_CACHE_DIR }, |
|
|
{ label: 'instruct', dir: INSTRUCT_CACHE_DIR }, |
|
|
]; |
|
|
})(); |
|
|
|
|
|
const FILES = { |
|
|
questions: 'questions.jsonl', |
|
|
generations: 'generations.jsonl', |
|
|
verifications: 'verifications.jsonl', |
|
|
rewards: 'rewards.jsonl', |
|
|
}; |
|
|
|
|
|
async function readJsonl(cacheDir, fileName) { |
|
|
const filePath = path.join(cacheDir, fileName); |
|
|
try { |
|
|
const txt = await fs.readFile(filePath, 'utf8'); |
|
|
return txt |
|
|
.split('\n') |
|
|
.map((l) => l.trim()) |
|
|
.filter(Boolean) |
|
|
.map((line) => { |
|
|
try { |
|
|
return JSON.parse(line); |
|
|
} catch { |
|
|
return null; |
|
|
} |
|
|
}) |
|
|
.filter(Boolean); |
|
|
} catch (e) { |
|
|
if (e.code === 'ENOENT') return []; |
|
|
throw e; |
|
|
} |
|
|
} |
|
|
|
|
|
function uniq(arr) { |
|
|
return [...new Set(arr)]; |
|
|
} |
|
|
|
|
|
async function main() { |
|
|
if (customDir) { |
|
|
console.log(`CACHE_REPORT_MODE=custom (PIPELINE_CACHE_DIR=${customDir})`); |
|
|
} else { |
|
|
console.log(`CACHE_REPORT_MODE=${MODE}`); |
|
|
} |
|
|
|
|
|
for (const { label, dir } of CACHE_DIRS) { |
|
|
const questions = await readJsonl(dir, FILES.questions); |
|
|
const generations = await readJsonl(dir, FILES.generations); |
|
|
const verifications = await readJsonl(dir, FILES.verifications); |
|
|
const rewards = await readJsonl(dir, FILES.rewards); |
|
|
|
|
|
const chunkIds = uniq([ |
|
|
...questions.map((r) => r.chunk_id), |
|
|
...generations.map((r) => r.chunk_id), |
|
|
...verifications.map((r) => r.chunk_id), |
|
|
...rewards.map((r) => r.chunk_id), |
|
|
].filter(Boolean)); |
|
|
|
|
|
const totalQuestions = questions.reduce((acc, r) => { |
|
|
if (Array.isArray(r.questions)) return acc + r.questions.length; |
|
|
if (Array.isArray(r.question_ids)) return acc + r.question_ids.length; |
|
|
return acc + 1; |
|
|
}, 0); |
|
|
|
|
|
const totalGenerations = generations.length; |
|
|
const totalVerifications = verifications.length; |
|
|
const totalRewards = rewards.length; |
|
|
|
|
|
const passedVerifications = verifications.filter((v) => v.ok === true).length; |
|
|
const passedRewards = rewards.filter((r) => r.ok === true).length; |
|
|
|
|
|
console.log(`\n== ${label} cache ==`); |
|
|
const rows = [ |
|
|
['Cache dir', dir], |
|
|
['Unique chunks', chunkIds.length], |
|
|
['Question records', questions.length], |
|
|
['Questions total', totalQuestions], |
|
|
['Generation records', totalGenerations], |
|
|
['Verification records', totalVerifications], |
|
|
['Verifications ok', passedVerifications], |
|
|
['Reward records', totalRewards], |
|
|
['Rewards ok', passedRewards], |
|
|
]; |
|
|
|
|
|
const colWidth = Math.max(...rows.map(([k]) => k.length)) + 2; |
|
|
for (const [key, val] of rows) { |
|
|
const pad = ' '.repeat(colWidth - key.length); |
|
|
console.log(`${key}:${pad}${val}`); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
main().catch((err) => { |
|
|
console.error('Cache report error:', err); |
|
|
process.exit(1); |
|
|
}); |
|
|
|