#!/usr/bin/env node import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { loadProviderFor } from '../providers/provider.mjs'; import { runQuestionGenerator } from './question_core.mjs'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); function parseArgs(argv) { const opts = { input: null, output: null, max: 5, }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === '--input' || a === '-i') { opts.input = argv[++i]; } else if (a === '--output' || a === '-o') { opts.output = argv[++i]; } else if (a === '--max' || a === '-m') { opts.max = Number(argv[++i] || '5'); } } if (!opts.input) { console.error( 'Usage: node src/question/question_cli.mjs --input chunks.jsonl --output seed_questions.jsonl [--max 5]', ); process.exit(1); } if (!opts.output) { opts.output = path.join(__dirname, '..', '..', 'test_samples', 'seed_questions.generated.jsonl'); } return opts; } async function* readJsonl(filePath) { const txt = await fs.readFile(filePath, 'utf8'); const lines = txt.split('\n'); for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; try { yield JSON.parse(trimmed); } catch { // treat raw string as content yield { content: trimmed }; } } } async function main() { const args = parseArgs(process.argv.slice(2)); const provider = loadProviderFor('question'); console.log('🧩 Question generation'); console.log(` Input chunks: ${args.input}`); console.log(` Output seeds: ${args.output}`); console.log(` Max questions/chunk: ${args.max}`); console.log(''); const outDir = path.dirname(args.output); await fs.mkdir(outDir, { recursive: true }); let chunkCount = 0; let questionCount = 0; let lines = ''; for await (const rec of readJsonl(args.input)) { chunkCount += 1; const id = rec.id ?? rec.doc_id ?? chunkCount; const contextText = rec.content ?? rec.text ?? ''; if (!contextText || !contextText.trim()) continue; console.log(`→ [${chunkCount}] generating questions for chunk id=${id}…`); const { questions } = await runQuestionGenerator(contextText, provider, { maxQuestions: args.max, }); for (const q of questions) { questionCount += 1; const seed = { question: q, source_id: id, // keep some trace of where it came from meta: { doc_id: rec.doc_id ?? null, chunk_index: rec.chunk_index ?? null, }, }; lines += JSON.stringify(seed) + '\n'; } } await fs.writeFile(args.output, lines, 'utf8'); console.log(''); console.log('✅ Done'); console.log(` Chunks processed: ${chunkCount}`); console.log(` Questions generated: ${questionCount}`); console.log(` Seeds JSONL: ${args.output}`); } main().catch((err) => { console.error('❌ Question generation error:', err); process.exit(1); });