tightened up question limits
Browse files- AGENTS.md +36 -0
- src/core/llm_stage.mjs +70 -0
- src/pipeline/batch.mjs +66 -19
- src/pipeline/pipeline_cli.js +17 -1
- src/pipeline/pipeline_spec.mjs +52 -0
- src/pipeline/step.mjs +21 -2
- src/question/question_core.mjs +199 -95
- src/retrieval/jsonl_chunks.mjs +105 -0
- src/retrieval/retrieval.mjs +8 -3
- tests/llm_stage.test.mjs +58 -0
- tests/ollama_provider_reasoning.test.mjs +54 -0
- tests/pipeline_behaviour.test.mjs +182 -0
AGENTS.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Repository Guidelines
|
| 2 |
+
|
| 3 |
+
## Project Structure & Module Organization
|
| 4 |
+
- Core pipeline lives in `src/`, with stage logic in `src/generator`, `src/verifier`, `src/reward`, `src/question`, retrieval helpers in `src/retrieval`, and the CLI entrypoint at `src/pipeline/pipeline_cli.js`.
|
| 5 |
+
- Prompts are in `prompts/`; tweak these before changing stage behaviour.
|
| 6 |
+
- Tests sit in `tests/` (Vitest), with sample seeds in `test_samples/`; pipeline outputs write to `gold/`.
|
| 7 |
+
- Config baselines (models, limits) are in `configs/pipeline.json`; run scripts live at the repo root (`run.sh`, `try_prompt.sh`).
|
| 8 |
+
|
| 9 |
+
## Build, Test, and Development Commands
|
| 10 |
+
- `npm install` – install dependencies.
|
| 11 |
+
- `npm run pipeline -- --limit 20 --verbose` – run the default pipeline using static seeds.
|
| 12 |
+
- `PIPELINE_SEED_MODE=question-first npm run pipeline -- --limit 20 --verbose` – enable question-first seeding.
|
| 13 |
+
- `npm test` – run all unit tests (mocked by default).
|
| 14 |
+
- `REAL_ES=1 npm test` – exercise retrieval against a live Elasticsearch + embedding endpoint.
|
| 15 |
+
|
| 16 |
+
## Coding Style & Naming Conventions
|
| 17 |
+
- ECMAScript modules (`type: "module"`); prefer `.mjs` for shared code.
|
| 18 |
+
- Two-space indentation, single quotes unless template strings add clarity, and keep functions small and pure where possible (CLI glue stays in `pipeline_cli.js`).
|
| 19 |
+
- Use descriptive, lower_snake or camelCase for variables; exported helpers use camelCase.
|
| 20 |
+
- Keep prompts and stage logic separate; place reusable utilities in `src/pipeline/util.mjs`.
|
| 21 |
+
|
| 22 |
+
## Testing Guidelines
|
| 23 |
+
- Vitest is the test runner; add new tests under `tests/` with `.test.mjs` suffix.
|
| 24 |
+
- Mirror stage names in test files (e.g., `generator_core.test.mjs`), and include both happy-path and malformed-input cases.
|
| 25 |
+
- For retrieval, default mocks cover most cases; only opt into `REAL_ES=1` when you have a running distill-rag stack.
|
| 26 |
+
- Aim to keep tests deterministic—mock providers and network calls unless explicitly validating integrations.
|
| 27 |
+
|
| 28 |
+
## Commit & Pull Request Guidelines
|
| 29 |
+
- Follow the existing history: short, present-tense summaries (e.g., `add generator test`, `maintain chunk ordering`); include scoped prefixes only when they improve clarity.
|
| 30 |
+
- Keep commits focused (one concern each) and ensure `npm test` passes before pushing.
|
| 31 |
+
- PRs should state the goal, main changes, test evidence, and any required `.env` or config updates; include sample command/output paths when relevant (e.g., `gold/pipeline_gold.jsonl`).
|
| 32 |
+
- Link issues when applicable and note any provider/model assumptions or external services needed for validation.
|
| 33 |
+
|
| 34 |
+
## Configuration & Environment Tips
|
| 35 |
+
- Runtime configuration comes from `.env` (ES node, embedding endpoint, provider selections, stage models); avoid committing secrets.
|
| 36 |
+
- When changing model or provider choices, update `configs/pipeline.json` if you want a sharable default, and document overrides in your PR description.
|
src/core/llm_stage.mjs
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/core/llm_stage.mjs
|
| 2 |
+
import fs from 'fs/promises';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
|
| 5 |
+
import { PROJECT_ROOT } from '../pipeline/util.mjs';
|
| 6 |
+
import { loadProviderFor } from '../providers/provider.mjs';
|
| 7 |
+
|
| 8 |
+
/**
|
| 9 |
+
* Load a prompt template from disk.
|
| 10 |
+
* Paths are relative to the project root by default (e.g. "prompts/question_prompt.txt").
|
| 11 |
+
*/
|
| 12 |
+
async function loadTemplate(templatePath) {
|
| 13 |
+
const abs = path.isAbsolute(templatePath)
|
| 14 |
+
? templatePath
|
| 15 |
+
: path.join(PROJECT_ROOT, templatePath);
|
| 16 |
+
|
| 17 |
+
const txt = await fs.readFile(abs, 'utf8');
|
| 18 |
+
return txt;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
/**
|
| 22 |
+
* Simple {{VAR}} replacement.
|
| 23 |
+
* Keeps your current prompt style but makes it generic.
|
| 24 |
+
*/
|
| 25 |
+
function renderTemplate(template, vars = {}) {
|
| 26 |
+
return template.replace(/{{\s*([A-Z0-9_]+)\s*}}/gi, (_, key) => {
|
| 27 |
+
const v = vars[key];
|
| 28 |
+
return v == null ? '' : String(v);
|
| 29 |
+
});
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/**
|
| 33 |
+
* Generic LLM stage runner.
|
| 34 |
+
*
|
| 35 |
+
* @param {object} opts
|
| 36 |
+
* @param {string} opts.stage - logical stage name: "question" | "generator" | "verifier" | "reward" | "expert_x"
|
| 37 |
+
* @param {string} opts.template - path to prompt template (e.g. "prompts/question_prompt.txt")
|
| 38 |
+
* @param {object} [opts.vars] - interpolation vars for the template (QUESTION, CONTEXT, ANSWER, etc.)
|
| 39 |
+
* @param {object} [opts.provider] - optional provider instance; if omitted, uses loadProviderFor(stage)
|
| 40 |
+
* @param {boolean} [opts.verbose]
|
| 41 |
+
* @param {Console} [opts.logger]
|
| 42 |
+
*
|
| 43 |
+
* @returns {Promise<{ prompt: string, raw: string }>}
|
| 44 |
+
*/
|
| 45 |
+
export async function runLLMStage({
|
| 46 |
+
stage,
|
| 47 |
+
template,
|
| 48 |
+
vars = {},
|
| 49 |
+
provider,
|
| 50 |
+
verbose = false,
|
| 51 |
+
logger = console,
|
| 52 |
+
} = {}) {
|
| 53 |
+
if (!stage) throw new Error('runLLMStage: "stage" is required');
|
| 54 |
+
if (!template) throw new Error('runLLMStage: "template" is required');
|
| 55 |
+
|
| 56 |
+
const log = logger?.log?.bind(logger) || console.log;
|
| 57 |
+
|
| 58 |
+
const prov = provider || loadProviderFor(stage);
|
| 59 |
+
const tpl = await loadTemplate(template);
|
| 60 |
+
const prompt = renderTemplate(tpl, vars);
|
| 61 |
+
|
| 62 |
+
if (verbose) {
|
| 63 |
+
log(`[llm_stage] stage=${stage}`);
|
| 64 |
+
log('[llm_stage] prompt preview:\n' + prompt.slice(0, 400));
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
const raw = await prov.generate(prompt);
|
| 68 |
+
|
| 69 |
+
return { prompt, raw };
|
| 70 |
+
}
|
src/pipeline/batch.mjs
CHANGED
|
@@ -8,12 +8,12 @@ import {
|
|
| 8 |
DEFAULT_OUT_PATH,
|
| 9 |
loadSeedQuestions,
|
| 10 |
seedToQuestion,
|
| 11 |
-
// seedToContextText, // <-- no longer needed for QG mode
|
| 12 |
} from './seeds.mjs';
|
| 13 |
import { runPipelineStep } from './step.mjs';
|
| 14 |
import { loadProviderFor } from '../providers/provider.mjs';
|
| 15 |
import { runQuestionGenerator } from '../question/question_core.mjs';
|
| 16 |
import { fetchChunksFromIndex } from '../retrieval/retrieval.mjs';
|
|
|
|
| 17 |
|
| 18 |
/**
|
| 19 |
* Append a single accepted record to a JSONL file.
|
|
@@ -30,7 +30,7 @@ export async function appendGoldRecord(outPath, record) {
|
|
| 30 |
*
|
| 31 |
* Modes:
|
| 32 |
* - question-first (default): seeds are CHUNKS; we generate questions from each chunk
|
| 33 |
-
* - static:
|
| 34 |
*
|
| 35 |
* Options:
|
| 36 |
* - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
|
|
@@ -56,9 +56,10 @@ export async function runPipelineBatch({
|
|
| 56 |
seedsPath = DEFAULT_SEEDS_PATH,
|
| 57 |
outPath = DEFAULT_OUT_PATH,
|
| 58 |
limit,
|
|
|
|
| 59 |
verbose = false,
|
| 60 |
logger = console,
|
| 61 |
-
seedMode = process.env.PIPELINE_SEED_MODE || '
|
| 62 |
} = {}) {
|
| 63 |
const log = logger?.log?.bind(logger) || console.log;
|
| 64 |
const errLog = logger?.error?.bind(logger) || console.error;
|
|
@@ -137,32 +138,76 @@ export async function runPipelineBatch({
|
|
| 137 |
}
|
| 138 |
|
| 139 |
// ----------------------------------------
|
| 140 |
-
// MODE 2: question-first (ES chunks → QG → pipeline)
|
| 141 |
// ----------------------------------------
|
| 142 |
if (seedMode === 'question-first') {
|
| 143 |
const questionProvider = loadProviderFor('question');
|
| 144 |
|
| 145 |
-
// support both env names
|
| 146 |
const maxQuestionsPerChunk = Number(
|
| 147 |
process.env.QUESTION_MAX_PER_CHUNK ||
|
| 148 |
process.env.QUESTION_MAX ||
|
| 149 |
'5',
|
| 150 |
);
|
| 151 |
|
| 152 |
-
const
|
| 153 |
-
typeof limit === 'number' ? limit :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
if (verbose) {
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
-
const chunks = await fetchChunksFromIndex(chunkLimit);
|
| 162 |
const totalChunks = chunks.length;
|
| 163 |
let processedSeeds = 0;
|
| 164 |
|
| 165 |
for (let idx = 0; idx < chunks.length; idx++) {
|
|
|
|
|
|
|
| 166 |
const chunk = chunks[idx];
|
| 167 |
const label = `[chunk ${idx + 1}/${chunks.length}]`;
|
| 168 |
const contextText = chunk.content;
|
|
@@ -177,8 +222,10 @@ export async function runPipelineBatch({
|
|
| 177 |
processedSeeds += 1;
|
| 178 |
|
| 179 |
if (verbose) {
|
| 180 |
-
log(`\n🧩 ${label} generating questions from
|
| 181 |
-
|
|
|
|
|
|
|
| 182 |
log(
|
| 183 |
' [question] chunk preview:\n ' +
|
| 184 |
preview(contextText, 300).replace(/\n/g, '\n '),
|
|
@@ -223,6 +270,7 @@ export async function runPipelineBatch({
|
|
| 223 |
|
| 224 |
// 2) run full pipeline for each generated question
|
| 225 |
for (const q of questions) {
|
|
|
|
| 226 |
if (!q || !q.trim()) continue;
|
| 227 |
|
| 228 |
const qLabel = `[q ${processed + 1}]`;
|
|
@@ -233,8 +281,7 @@ export async function runPipelineBatch({
|
|
| 233 |
try {
|
| 234 |
const result = await runPipelineStep({
|
| 235 |
question: q,
|
| 236 |
-
//
|
| 237 |
-
initialContext: [chunk],
|
| 238 |
verbose,
|
| 239 |
logger,
|
| 240 |
});
|
|
@@ -251,8 +298,8 @@ export async function runPipelineBatch({
|
|
| 251 |
const record = {
|
| 252 |
question: q,
|
| 253 |
sourceChunkId: chunk.id,
|
| 254 |
-
sourceChunk: contextText,
|
| 255 |
-
sourceDoc: chunk.source,
|
| 256 |
context: result.context,
|
| 257 |
sample: result.gen,
|
| 258 |
verifier: result.ver,
|
|
@@ -279,8 +326,8 @@ export async function runPipelineBatch({
|
|
| 279 |
return {
|
| 280 |
mode: seedMode,
|
| 281 |
total: totalChunks,
|
| 282 |
-
processed,
|
| 283 |
-
processedSeeds,
|
| 284 |
processedQuestions: processed,
|
| 285 |
accepted,
|
| 286 |
outPath,
|
|
|
|
| 8 |
DEFAULT_OUT_PATH,
|
| 9 |
loadSeedQuestions,
|
| 10 |
seedToQuestion,
|
|
|
|
| 11 |
} from './seeds.mjs';
|
| 12 |
import { runPipelineStep } from './step.mjs';
|
| 13 |
import { loadProviderFor } from '../providers/provider.mjs';
|
| 14 |
import { runQuestionGenerator } from '../question/question_core.mjs';
|
| 15 |
import { fetchChunksFromIndex } from '../retrieval/retrieval.mjs';
|
| 16 |
+
import { loadRagChunks } from '../retrieval/jsonl_chunks.mjs';
|
| 17 |
|
| 18 |
/**
|
| 19 |
* Append a single accepted record to a JSONL file.
|
|
|
|
| 30 |
*
|
| 31 |
* Modes:
|
| 32 |
* - question-first (default): seeds are CHUNKS; we generate questions from each chunk
|
| 33 |
+
* - static: seeds are questions (legacy / low-priority mode)
|
| 34 |
*
|
| 35 |
* Options:
|
| 36 |
* - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
|
|
|
|
| 56 |
seedsPath = DEFAULT_SEEDS_PATH,
|
| 57 |
outPath = DEFAULT_OUT_PATH,
|
| 58 |
limit,
|
| 59 |
+
chunkLimit,
|
| 60 |
verbose = false,
|
| 61 |
logger = console,
|
| 62 |
+
seedMode = process.env.PIPELINE_SEED_MODE || 'question-first',
|
| 63 |
} = {}) {
|
| 64 |
const log = logger?.log?.bind(logger) || console.log;
|
| 65 |
const errLog = logger?.error?.bind(logger) || console.error;
|
|
|
|
| 138 |
}
|
| 139 |
|
| 140 |
// ----------------------------------------
|
| 141 |
+
// MODE 2: question-first (JSONL or ES chunks → QG → pipeline)
|
| 142 |
// ----------------------------------------
|
| 143 |
if (seedMode === 'question-first') {
|
| 144 |
const questionProvider = loadProviderFor('question');
|
| 145 |
|
|
|
|
| 146 |
const maxQuestionsPerChunk = Number(
|
| 147 |
process.env.QUESTION_MAX_PER_CHUNK ||
|
| 148 |
process.env.QUESTION_MAX ||
|
| 149 |
'5',
|
| 150 |
);
|
| 151 |
|
| 152 |
+
const questionCap =
|
| 153 |
+
typeof limit === 'number' ? limit : Number.POSITIVE_INFINITY;
|
| 154 |
+
|
| 155 |
+
const effectiveChunkLimit = (() => {
|
| 156 |
+
if (typeof chunkLimit === 'number') return chunkLimit;
|
| 157 |
+
const envLimit =
|
| 158 |
+
process.env.PIPELINE_CHUNK_LIMIT ||
|
| 159 |
+
process.env.PIPELINE_CHUNKS ||
|
| 160 |
+
process.env.PIPELINE_CHUNK_SAMPLE;
|
| 161 |
+
const parsed = envLimit != null ? Number(envLimit) : NaN;
|
| 162 |
+
return Number.isFinite(parsed) ? parsed : undefined;
|
| 163 |
+
})();
|
| 164 |
+
|
| 165 |
+
// Decide where chunks come from:
|
| 166 |
+
// PIPELINE_CHUNK_SOURCE = 'jsonl' | 'es'
|
| 167 |
+
const chunkSource =
|
| 168 |
+
process.env.PIPELINE_CHUNK_SOURCE || 'es';
|
| 169 |
|
| 170 |
if (verbose) {
|
| 171 |
+
if (chunkSource === 'jsonl') {
|
| 172 |
+
const p =
|
| 173 |
+
process.env.RAG_CHUNKS_PATH ||
|
| 174 |
+
'data/rag_chunks.jsonl';
|
| 175 |
+
log(
|
| 176 |
+
`[pipeline] loading chunks from JSONL (${p}), limit=${effectiveChunkLimit ?? 'all'}`,
|
| 177 |
+
);
|
| 178 |
+
} else {
|
| 179 |
+
log(
|
| 180 |
+
`[pipeline] fetching chunks from ES (limit=${effectiveChunkLimit ?? 'all'})`,
|
| 181 |
+
);
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
let chunks = [];
|
| 186 |
+
try {
|
| 187 |
+
if (chunkSource === 'jsonl') {
|
| 188 |
+
chunks = await loadRagChunks(effectiveChunkLimit);
|
| 189 |
+
} else {
|
| 190 |
+
chunks = await fetchChunksFromIndex(effectiveChunkLimit);
|
| 191 |
+
}
|
| 192 |
+
} catch (e) {
|
| 193 |
+
const msg = e?.message || String(e);
|
| 194 |
+
errLog('[pipeline] ERROR loading chunks:', msg);
|
| 195 |
+
return {
|
| 196 |
+
mode: seedMode,
|
| 197 |
+
total: 0,
|
| 198 |
+
processed: 0,
|
| 199 |
+
accepted: 0,
|
| 200 |
+
outPath,
|
| 201 |
+
statusCounts: { chunk_load_error: 1 },
|
| 202 |
+
};
|
| 203 |
}
|
| 204 |
|
|
|
|
| 205 |
const totalChunks = chunks.length;
|
| 206 |
let processedSeeds = 0;
|
| 207 |
|
| 208 |
for (let idx = 0; idx < chunks.length; idx++) {
|
| 209 |
+
if (processed >= questionCap) break;
|
| 210 |
+
|
| 211 |
const chunk = chunks[idx];
|
| 212 |
const label = `[chunk ${idx + 1}/${chunks.length}]`;
|
| 213 |
const contextText = chunk.content;
|
|
|
|
| 222 |
processedSeeds += 1;
|
| 223 |
|
| 224 |
if (verbose) {
|
| 225 |
+
log(`\n🧩 ${label} generating questions from chunk…`);
|
| 226 |
+
if (chunk.id) {
|
| 227 |
+
log(` [question] chunk id: ${chunk.id}`);
|
| 228 |
+
}
|
| 229 |
log(
|
| 230 |
' [question] chunk preview:\n ' +
|
| 231 |
preview(contextText, 300).replace(/\n/g, '\n '),
|
|
|
|
| 270 |
|
| 271 |
// 2) run full pipeline for each generated question
|
| 272 |
for (const q of questions) {
|
| 273 |
+
if (processed >= questionCap) break;
|
| 274 |
if (!q || !q.trim()) continue;
|
| 275 |
|
| 276 |
const qLabel = `[q ${processed + 1}]`;
|
|
|
|
| 281 |
try {
|
| 282 |
const result = await runPipelineStep({
|
| 283 |
question: q,
|
| 284 |
+
initialContext: [chunk], // IMPORTANT: reuse SAME chunk, no second retrieval
|
|
|
|
| 285 |
verbose,
|
| 286 |
logger,
|
| 287 |
});
|
|
|
|
| 298 |
const record = {
|
| 299 |
question: q,
|
| 300 |
sourceChunkId: chunk.id,
|
| 301 |
+
sourceChunk: contextText,
|
| 302 |
+
sourceDoc: chunk.source,
|
| 303 |
context: result.context,
|
| 304 |
sample: result.gen,
|
| 305 |
verifier: result.ver,
|
|
|
|
| 326 |
return {
|
| 327 |
mode: seedMode,
|
| 328 |
total: totalChunks,
|
| 329 |
+
processed, // number of questions processed
|
| 330 |
+
processedSeeds,
|
| 331 |
processedQuestions: processed,
|
| 332 |
accepted,
|
| 333 |
outPath,
|
src/pipeline/pipeline_cli.js
CHANGED
|
@@ -26,6 +26,7 @@ function parseArgs(argv) {
|
|
| 26 |
let outPath;
|
| 27 |
let verbose = false;
|
| 28 |
let seedMode; // optional CLI override
|
|
|
|
| 29 |
|
| 30 |
for (let i = 0; i < args.length; i++) {
|
| 31 |
const a = args[i];
|
|
@@ -39,6 +40,10 @@ function parseArgs(argv) {
|
|
| 39 |
} else if (a === '--out') {
|
| 40 |
outPath = args[i + 1];
|
| 41 |
i++;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
} else if (a === '--mode') {
|
| 43 |
seedMode = args[i + 1]; // "question-first" | "static"
|
| 44 |
i++;
|
|
@@ -61,6 +66,7 @@ function parseArgs(argv) {
|
|
| 61 |
outPath: outPath || DEFAULT_OUT,
|
| 62 |
verbose,
|
| 63 |
seedMode,
|
|
|
|
| 64 |
};
|
| 65 |
}
|
| 66 |
|
|
@@ -71,6 +77,7 @@ async function main() {
|
|
| 71 |
outPath,
|
| 72 |
verbose,
|
| 73 |
seedMode: cliSeedMode,
|
|
|
|
| 74 |
} = parseArgs(process.argv);
|
| 75 |
|
| 76 |
const generatorProvider = process.env.GENERATOR_PROVIDER || 'ollama';
|
|
@@ -105,7 +112,15 @@ async function main() {
|
|
| 105 |
console.log(
|
| 106 |
` reward: ${rewardProvider} (${rewardModel})`,
|
| 107 |
);
|
| 108 |
-
console.log(`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
console.log(` Verbose: ${verbose ? 'yes' : 'no'}`);
|
| 110 |
console.log('');
|
| 111 |
|
|
@@ -114,6 +129,7 @@ async function main() {
|
|
| 114 |
seedsPath,
|
| 115 |
outPath,
|
| 116 |
limit,
|
|
|
|
| 117 |
verbose,
|
| 118 |
logger: console,
|
| 119 |
seedMode: mode,
|
|
|
|
| 26 |
let outPath;
|
| 27 |
let verbose = false;
|
| 28 |
let seedMode; // optional CLI override
|
| 29 |
+
let chunkLimit;
|
| 30 |
|
| 31 |
for (let i = 0; i < args.length; i++) {
|
| 32 |
const a = args[i];
|
|
|
|
| 40 |
} else if (a === '--out') {
|
| 41 |
outPath = args[i + 1];
|
| 42 |
i++;
|
| 43 |
+
} else if (a === '--chunk-limit') {
|
| 44 |
+
const v = Number(args[i + 1]);
|
| 45 |
+
if (!Number.isNaN(v)) chunkLimit = v;
|
| 46 |
+
i++;
|
| 47 |
} else if (a === '--mode') {
|
| 48 |
seedMode = args[i + 1]; // "question-first" | "static"
|
| 49 |
i++;
|
|
|
|
| 66 |
outPath: outPath || DEFAULT_OUT,
|
| 67 |
verbose,
|
| 68 |
seedMode,
|
| 69 |
+
chunkLimit,
|
| 70 |
};
|
| 71 |
}
|
| 72 |
|
|
|
|
| 77 |
outPath,
|
| 78 |
verbose,
|
| 79 |
seedMode: cliSeedMode,
|
| 80 |
+
chunkLimit,
|
| 81 |
} = parseArgs(process.argv);
|
| 82 |
|
| 83 |
const generatorProvider = process.env.GENERATOR_PROVIDER || 'ollama';
|
|
|
|
| 112 |
console.log(
|
| 113 |
` reward: ${rewardProvider} (${rewardModel})`,
|
| 114 |
);
|
| 115 |
+
console.log(` Question limit: ${limit ?? 'all'}`);
|
| 116 |
+
if (mode === 'question-first') {
|
| 117 |
+
const chunkLimitEffective =
|
| 118 |
+
chunkLimit ??
|
| 119 |
+
(process.env.PIPELINE_CHUNK_LIMIT ||
|
| 120 |
+
process.env.PIPELINE_CHUNKS ||
|
| 121 |
+
process.env.PIPELINE_CHUNK_SAMPLE);
|
| 122 |
+
console.log(` Chunk limit: ${chunkLimitEffective ?? 'all'}`);
|
| 123 |
+
}
|
| 124 |
console.log(` Verbose: ${verbose ? 'yes' : 'no'}`);
|
| 125 |
console.log('');
|
| 126 |
|
|
|
|
| 129 |
seedsPath,
|
| 130 |
outPath,
|
| 131 |
limit,
|
| 132 |
+
chunkLimit,
|
| 133 |
verbose,
|
| 134 |
logger: console,
|
| 135 |
seedMode: mode,
|
src/pipeline/pipeline_spec.mjs
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/pipeline_spec.mjs
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* High-level pipeline description.
|
| 5 |
+
*
|
| 6 |
+
* This doesn’t execute anything; it just documents how
|
| 7 |
+
* stages should be wired together.
|
| 8 |
+
*
|
| 9 |
+
* You can later have a generic “orchestrator” that reads this
|
| 10 |
+
* and calls runLLMStage + parse functions dynamically.
|
| 11 |
+
*/
|
| 12 |
+
export const DISTILLATION_PIPELINE = {
|
| 13 |
+
id: 'confederation_distillation_v1',
|
| 14 |
+
stages: [
|
| 15 |
+
{
|
| 16 |
+
id: 'question',
|
| 17 |
+
kind: 'question_generator',
|
| 18 |
+
stageName: 'question', // maps to provider type
|
| 19 |
+
template: 'prompts/question_prompt.txt',
|
| 20 |
+
inputs: ['chunk'], // variables: { CONTEXT: chunk.content }
|
| 21 |
+
outputKey: 'questions',
|
| 22 |
+
parser: 'parseQuestions', // from question_core.mjs
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
id: 'generator',
|
| 26 |
+
kind: 'answer_generator',
|
| 27 |
+
stageName: 'generator',
|
| 28 |
+
template: 'prompts/generator_prompt.txt',
|
| 29 |
+
inputs: ['question', 'chunk'],
|
| 30 |
+
outputKey: 'gen',
|
| 31 |
+
parser: 'parseGeneratorOutput',
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
id: 'verifier',
|
| 35 |
+
kind: 'verifier',
|
| 36 |
+
stageName: 'verifier',
|
| 37 |
+
template: 'prompts/verifier_prompt.txt',
|
| 38 |
+
inputs: ['question', 'chunk', 'gen'],
|
| 39 |
+
outputKey: 'ver',
|
| 40 |
+
parser: 'parseVerifierOutput',
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
id: 'reward',
|
| 44 |
+
kind: 'reward_model',
|
| 45 |
+
stageName: 'reward',
|
| 46 |
+
template: 'prompts/reward_prompt.txt',
|
| 47 |
+
inputs: ['question', 'chunk', 'gen', 'ver'],
|
| 48 |
+
outputKey: 'rew',
|
| 49 |
+
parser: 'parseRewardOutput',
|
| 50 |
+
},
|
| 51 |
+
],
|
| 52 |
+
};
|
src/pipeline/step.mjs
CHANGED
|
@@ -1,6 +1,11 @@
|
|
| 1 |
// src/pipeline/step.mjs
|
| 2 |
import { loadProviderFor } from '../providers/provider.mjs';
|
| 3 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import { runGenerator } from '../generator/generator_core.mjs';
|
| 5 |
import { runVerifier } from '../verifier/verifier_core.mjs';
|
| 6 |
import { runReward } from '../reward/reward_core.mjs';
|
|
@@ -82,7 +87,21 @@ export async function runPipelineStep({
|
|
| 82 |
// Go to ES exactly once
|
| 83 |
try {
|
| 84 |
if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
if (verbose) {
|
| 87 |
log(` [retrieval] got ${hits.length} chunks from ES`);
|
| 88 |
}
|
|
|
|
| 1 |
// src/pipeline/step.mjs
|
| 2 |
import { loadProviderFor } from '../providers/provider.mjs';
|
| 3 |
+
import {
|
| 4 |
+
hybridSearch,
|
| 5 |
+
bm25Search,
|
| 6 |
+
vectorSearch,
|
| 7 |
+
hydeHybrid,
|
| 8 |
+
} from '../retrieval/retrieval.mjs';
|
| 9 |
import { runGenerator } from '../generator/generator_core.mjs';
|
| 10 |
import { runVerifier } from '../verifier/verifier_core.mjs';
|
| 11 |
import { runReward } from '../reward/reward_core.mjs';
|
|
|
|
| 87 |
// Go to ES exactly once
|
| 88 |
try {
|
| 89 |
if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
|
| 90 |
+
|
| 91 |
+
const hits = await (async () => {
|
| 92 |
+
switch (retrievalMode) {
|
| 93 |
+
case 'bm25':
|
| 94 |
+
return bm25Search(question, k);
|
| 95 |
+
case 'vector':
|
| 96 |
+
return vectorSearch(question, k);
|
| 97 |
+
case 'hyde':
|
| 98 |
+
return hydeHybrid(question, k, genProv);
|
| 99 |
+
case 'hybrid':
|
| 100 |
+
default:
|
| 101 |
+
return hybridSearch(question, k);
|
| 102 |
+
}
|
| 103 |
+
})();
|
| 104 |
+
|
| 105 |
if (verbose) {
|
| 106 |
log(` [retrieval] got ${hits.length} chunks from ES`);
|
| 107 |
}
|
src/question/question_core.mjs
CHANGED
|
@@ -1,126 +1,230 @@
|
|
| 1 |
// src/question/question_core.mjs
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
'
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
}
|
| 24 |
|
| 25 |
/**
|
| 26 |
-
* Extract questions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
*
|
| 28 |
-
*
|
| 29 |
-
*
|
| 30 |
-
*
|
| 31 |
*/
|
| 32 |
-
function
|
| 33 |
-
|
| 34 |
-
let questions = [];
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
-
//
|
| 41 |
-
try
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
else if (Array.isArray(json)) {
|
| 53 |
-
questions = json
|
| 54 |
-
.map((q) => String(q).trim())
|
| 55 |
-
.filter((q) => q.length > 0);
|
| 56 |
}
|
| 57 |
-
} catch (e) {
|
| 58 |
-
parsed = { error: 'invalid_json', message: e?.message };
|
| 59 |
}
|
| 60 |
|
| 61 |
-
//
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
// keep lines that look like questions
|
| 69 |
-
.filter((l) => l.length > 0 && /[??!]$/.test(l));
|
| 70 |
-
|
| 71 |
-
questions = lines;
|
| 72 |
}
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}
|
| 80 |
|
| 81 |
/**
|
| 82 |
-
*
|
| 83 |
-
*
|
| 84 |
-
* @param {string} contextText - chunk from ES
|
| 85 |
-
* @param {object} provider - { generate(prompt) → string }
|
| 86 |
-
* @param {object} opts
|
| 87 |
-
* - maxQuestions?: number (defaults QUESTION_MAX or 5)
|
| 88 |
*
|
| 89 |
-
*
|
| 90 |
-
* raw: string,
|
| 91 |
-
* prompt: string,
|
| 92 |
-
* questions: string[],
|
| 93 |
-
* maxQuestions: number,
|
| 94 |
-
* parsed: any
|
| 95 |
-
* }>}
|
| 96 |
*/
|
| 97 |
export async function runQuestionGenerator(
|
| 98 |
contextText,
|
| 99 |
provider,
|
| 100 |
-
|
| 101 |
) {
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
const raw = await provider.generate(prompt);
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
return {
|
| 116 |
-
raw,
|
| 117 |
-
prompt,
|
| 118 |
-
questions,
|
| 119 |
-
maxQuestions,
|
| 120 |
-
parsed,
|
| 121 |
-
};
|
| 122 |
}
|
| 123 |
-
|
| 124 |
-
export default {
|
| 125 |
-
runQuestionGenerator,
|
| 126 |
-
};
|
|
|
|
| 1 |
// src/question/question_core.mjs
|
| 2 |
+
import { preview } from '../pipeline/util.mjs';
|
| 3 |
+
|
| 4 |
+
/**
|
| 5 |
+
* Safely parse JSON. Returns:
|
| 6 |
+
* - a parsed value on success
|
| 7 |
+
* - null on failure (and optionally an error object if needed)
|
| 8 |
+
*/
|
| 9 |
+
function tryParseJson(raw) {
|
| 10 |
+
if (!raw || typeof raw !== 'string') return null;
|
| 11 |
+
const trimmed = raw.trim();
|
| 12 |
+
|
| 13 |
+
// Quick sanity: must start with { or [
|
| 14 |
+
if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) {
|
| 15 |
+
return null;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
try {
|
| 19 |
+
return JSON.parse(trimmed);
|
| 20 |
+
} catch {
|
| 21 |
+
return null;
|
| 22 |
+
}
|
| 23 |
}
|
| 24 |
|
| 25 |
/**
|
| 26 |
+
* Extract questions from a plain-text response.
|
| 27 |
+
*
|
| 28 |
+
* This is designed to handle real LLM outputs like:
|
| 29 |
+
*
|
| 30 |
+
* What is the primary purpose of practicing presence according to the text?
|
| 31 |
+
* How does Q'uo characterize the physical vehicle's limitations?
|
| 32 |
+
* What is the role of pain and struggle in spiritual growth?
|
| 33 |
+
*
|
| 34 |
+
* as well as numbered/bulleted lists:
|
| 35 |
*
|
| 36 |
+
* 1. What is ... ?
|
| 37 |
+
* - How does ... ?
|
| 38 |
+
* * Why is ... ?
|
| 39 |
*/
|
| 40 |
+
function extractQuestionsFromText(rawText) {
|
| 41 |
+
if (!rawText || typeof rawText !== 'string') return [];
|
|
|
|
| 42 |
|
| 43 |
+
// Strip trivial XML/HTML-ish tags like <analysis>, <reasoning>, etc.
|
| 44 |
+
const stripped = rawText.replace(/<\/?[a-zA-Z0-9_:-]+>/g, ' ');
|
| 45 |
+
|
| 46 |
+
const lines = stripped
|
| 47 |
+
.split(/\r?\n/)
|
| 48 |
+
.map((l) => l.trim())
|
| 49 |
+
.filter(Boolean);
|
| 50 |
+
|
| 51 |
+
const questions = [];
|
| 52 |
+
|
| 53 |
+
for (const line of lines) {
|
| 54 |
+
// Must contain a question mark somewhere
|
| 55 |
+
if (!line.includes('?')) continue;
|
| 56 |
+
|
| 57 |
+
// Common prefixes: "1. ", "1) ", "- ", "* "
|
| 58 |
+
const cleaned = line.replace(/^(?:\d+\s*[.)]\s*|[-*]\s*)/, '').trim();
|
| 59 |
+
|
| 60 |
+
// Take up to the first '?' as the end of the question
|
| 61 |
+
const qPart = cleaned.split('?')[0].trim();
|
| 62 |
+
if (!qPart) continue;
|
| 63 |
+
|
| 64 |
+
const q = (qPart + '?').trim();
|
| 65 |
+
|
| 66 |
+
// Filter out tiny or degenerate things
|
| 67 |
+
if (q.length < 10) continue;
|
| 68 |
+
if (!/[a-zA-Z]/.test(q)) continue;
|
| 69 |
+
|
| 70 |
+
questions.push(q);
|
| 71 |
}
|
| 72 |
|
| 73 |
+
// If we didn't find anything line-based, optional fallback:
|
| 74 |
+
// try to split the whole text by '?' and recover sentence-like chunks.
|
| 75 |
+
if (questions.length === 0) {
|
| 76 |
+
const segments = stripped.split('?');
|
| 77 |
+
for (let i = 0; i < segments.length - 1; i++) {
|
| 78 |
+
const seg = segments[i].trim();
|
| 79 |
+
if (!seg) continue;
|
| 80 |
+
// Consider only reasonable-length segments
|
| 81 |
+
if (seg.length < 10) continue;
|
| 82 |
+
const candidate = seg + '?';
|
| 83 |
+
if (!/[a-zA-Z]/.test(candidate)) continue;
|
| 84 |
+
questions.push(candidate);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
}
|
|
|
|
|
|
|
| 86 |
}
|
| 87 |
|
| 88 |
+
// Deduplicate while preserving order
|
| 89 |
+
const seen = new Set();
|
| 90 |
+
const deduped = [];
|
| 91 |
+
for (const q of questions) {
|
| 92 |
+
if (seen.has(q)) continue;
|
| 93 |
+
seen.add(q);
|
| 94 |
+
deduped.push(q);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
}
|
| 96 |
|
| 97 |
+
return deduped;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
/**
|
| 101 |
+
* Core helper: take raw model string and return:
|
| 102 |
+
* {
|
| 103 |
+
* questions: string[],
|
| 104 |
+
* raw: string,
|
| 105 |
+
* parsed: any | { error: 'invalid_json', rawSnippet?: string }
|
| 106 |
+
* }
|
| 107 |
+
*
|
| 108 |
+
* - Tries JSON first: { questions: [...] } or [...] array root.
|
| 109 |
+
* - If JSON fails, falls back to text-based extraction.
|
| 110 |
+
*/
|
| 111 |
+
export function parseQuestionResponse(raw, { maxQuestions } = {}) {
|
| 112 |
+
const result = {
|
| 113 |
+
questions: [],
|
| 114 |
+
raw: raw ?? '',
|
| 115 |
+
parsed: null,
|
| 116 |
+
};
|
| 117 |
+
|
| 118 |
+
if (!raw || typeof raw !== 'string') {
|
| 119 |
+
result.parsed = { error: 'empty_response' };
|
| 120 |
+
return result;
|
| 121 |
}
|
| 122 |
|
| 123 |
+
const parsed = tryParseJson(raw);
|
| 124 |
+
if (parsed != null) {
|
| 125 |
+
result.parsed = parsed;
|
| 126 |
+
|
| 127 |
+
// Case 1: { questions: [...] }
|
| 128 |
+
if (
|
| 129 |
+
parsed &&
|
| 130 |
+
typeof parsed === 'object' &&
|
| 131 |
+
Array.isArray(parsed.questions)
|
| 132 |
+
) {
|
| 133 |
+
const qs = parsed.questions
|
| 134 |
+
.map((q) => (typeof q === 'string' ? q.trim() : ''))
|
| 135 |
+
.filter((q) => q && q.endsWith('?'));
|
| 136 |
+
result.questions = maxQuestions
|
| 137 |
+
? qs.slice(0, maxQuestions)
|
| 138 |
+
: qs;
|
| 139 |
+
return result;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
// Case 2: array root
|
| 143 |
+
if (Array.isArray(parsed)) {
|
| 144 |
+
const qs = parsed
|
| 145 |
+
.map((item) => {
|
| 146 |
+
if (typeof item === 'string') return item.trim();
|
| 147 |
+
if (item && typeof item === 'object') {
|
| 148 |
+
if (typeof item.question === 'string') {
|
| 149 |
+
return item.question.trim();
|
| 150 |
+
}
|
| 151 |
+
if (typeof item.question_text === 'string') {
|
| 152 |
+
return item.question_text.trim();
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
return '';
|
| 156 |
+
})
|
| 157 |
+
.filter((q) => q && q.endsWith('?'));
|
| 158 |
+
result.questions = maxQuestions
|
| 159 |
+
? qs.slice(0, maxQuestions)
|
| 160 |
+
: qs;
|
| 161 |
+
return result;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
// Parsed JSON but not in a recognized shape
|
| 165 |
+
result.parsed = {
|
| 166 |
+
error: 'unrecognized_json_shape',
|
| 167 |
+
rawSnippet: preview(raw, 200),
|
| 168 |
+
};
|
| 169 |
+
} else {
|
| 170 |
+
// Not valid JSON at all
|
| 171 |
+
result.parsed = {
|
| 172 |
+
error: 'invalid_json',
|
| 173 |
+
rawSnippet: preview(raw, 200),
|
| 174 |
+
};
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
// Fallback: extract questions from plain text
|
| 178 |
+
const textQs = extractQuestionsFromText(raw);
|
| 179 |
+
result.questions = maxQuestions
|
| 180 |
+
? textQs.slice(0, maxQuestions)
|
| 181 |
+
: textQs;
|
| 182 |
+
|
| 183 |
+
return result;
|
| 184 |
}
|
| 185 |
|
| 186 |
/**
|
| 187 |
+
* High-level helper used by the pipeline:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
*
|
| 189 |
+
* const { questions, raw, parsed } = await runQuestionGenerator(contextText, provider, { maxQuestions })
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
*/
|
| 191 |
export async function runQuestionGenerator(
|
| 192 |
contextText,
|
| 193 |
provider,
|
| 194 |
+
{ maxQuestions = 5 } = {},
|
| 195 |
) {
|
| 196 |
+
if (!provider || typeof provider.generate !== 'function') {
|
| 197 |
+
throw new Error('Question provider must implement .generate(prompt)');
|
| 198 |
+
}
|
| 199 |
|
| 200 |
+
if (!contextText || !contextText.trim()) {
|
| 201 |
+
return { questions: [], raw: '', parsed: { error: 'empty_context' } };
|
| 202 |
+
}
|
| 203 |
|
| 204 |
+
// Minimal built-in prompt; if you have a richer prompt file, you can
|
| 205 |
+
// load it and inject {{CONTEXT}} before calling provider.generate.
|
| 206 |
+
const prompt = [
|
| 207 |
+
'You are a question generation assistant.',
|
| 208 |
+
'',
|
| 209 |
+
'You will be given a chunk of spiritual teaching text as CONTEXT.',
|
| 210 |
+
'Generate diverse, high-quality questions that:',
|
| 211 |
+
'- are answerable from the context only,',
|
| 212 |
+
'- require some thinking, not just copying a sentence,',
|
| 213 |
+
'- are phrased as clear, direct questions.',
|
| 214 |
+
'',
|
| 215 |
+
'Return either:',
|
| 216 |
+
'- JSON: { "questions": ["Q1?", "Q2?", ...] }',
|
| 217 |
+
' or an array of question-like objects/strings; OR',
|
| 218 |
+
'- Plain text with one question per line.',
|
| 219 |
+
'',
|
| 220 |
+
'---',
|
| 221 |
+
'CONTEXT:',
|
| 222 |
+
contextText,
|
| 223 |
+
'---',
|
| 224 |
+
].join('\n');
|
| 225 |
|
| 226 |
const raw = await provider.generate(prompt);
|
| 227 |
+
const parsed = parseQuestionResponse(raw, { maxQuestions });
|
| 228 |
|
| 229 |
+
return parsed;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
}
|
|
|
|
|
|
|
|
|
|
|
|
src/retrieval/jsonl_chunks.mjs
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/retrieval/jsonl_chunks.mjs
|
| 2 |
+
import fs from 'fs/promises';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
import crypto from 'crypto';
|
| 5 |
+
import { PROJECT_ROOT } from '../pipeline/util.mjs';
|
| 6 |
+
|
| 7 |
+
const DEFAULT_RAG_PATH = path.join(
|
| 8 |
+
PROJECT_ROOT,
|
| 9 |
+
'data',
|
| 10 |
+
'rag_chunks.jsonl',
|
| 11 |
+
);
|
| 12 |
+
|
| 13 |
+
// simple in-memory cache of parsed chunks
|
| 14 |
+
let cachedChunks = null;
|
| 15 |
+
|
| 16 |
+
/**
|
| 17 |
+
* Parse rag_chunks.jsonl into an array of
|
| 18 |
+
* { id, content, source } records.
|
| 19 |
+
*
|
| 20 |
+
* We are deliberately tolerant about field names so this works
|
| 21 |
+
* with different builders:
|
| 22 |
+
* - content: obj.content || obj.text || obj.chunk || ''
|
| 23 |
+
* - id: obj.id || obj.session_key || obj.title || `jsonl-${idx}`
|
| 24 |
+
* - source: whole original object
|
| 25 |
+
*/
|
| 26 |
+
async function loadAllChunksFromJsonl(filePath = DEFAULT_RAG_PATH) {
|
| 27 |
+
if (cachedChunks) return cachedChunks;
|
| 28 |
+
|
| 29 |
+
const absPath = path.isAbsolute(filePath)
|
| 30 |
+
? filePath
|
| 31 |
+
: path.join(PROJECT_ROOT, filePath);
|
| 32 |
+
|
| 33 |
+
const raw = await fs.readFile(absPath, 'utf8');
|
| 34 |
+
const lines = raw
|
| 35 |
+
.split('\n')
|
| 36 |
+
.map((l) => l.trim())
|
| 37 |
+
.filter(Boolean);
|
| 38 |
+
|
| 39 |
+
const chunks = lines.map((line, idx) => {
|
| 40 |
+
let obj;
|
| 41 |
+
try {
|
| 42 |
+
obj = JSON.parse(line);
|
| 43 |
+
} catch (e) {
|
| 44 |
+
// Skip bad lines instead of exploding
|
| 45 |
+
return null;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
const content =
|
| 49 |
+
obj.content ||
|
| 50 |
+
obj.text ||
|
| 51 |
+
obj.chunk ||
|
| 52 |
+
obj.body ||
|
| 53 |
+
'';
|
| 54 |
+
|
| 55 |
+
const id =
|
| 56 |
+
obj.id ||
|
| 57 |
+
obj.session_key ||
|
| 58 |
+
obj.title ||
|
| 59 |
+
`jsonl-${idx}`;
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
id,
|
| 63 |
+
content,
|
| 64 |
+
source: obj,
|
| 65 |
+
};
|
| 66 |
+
});
|
| 67 |
+
|
| 68 |
+
cachedChunks = chunks.filter(Boolean);
|
| 69 |
+
return cachedChunks;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/**
|
| 73 |
+
* Hardware-random sampling without replacement using crypto.randomInt.
|
| 74 |
+
* `k >= n` ⇒ returns full array.
|
| 75 |
+
*/
|
| 76 |
+
function sampleWithoutReplacement(arr, k) {
|
| 77 |
+
const n = arr.length;
|
| 78 |
+
if (k == null || k >= n) return arr.slice();
|
| 79 |
+
|
| 80 |
+
const chosen = new Set();
|
| 81 |
+
const out = [];
|
| 82 |
+
|
| 83 |
+
while (out.length < k && chosen.size < n) {
|
| 84 |
+
const idx = crypto.randomInt(0, n);
|
| 85 |
+
if (chosen.has(idx)) continue;
|
| 86 |
+
chosen.add(idx);
|
| 87 |
+
out.push(arr[idx]);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return out;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/**
|
| 94 |
+
* Public API: load RAG chunks for pipeline seeding.
|
| 95 |
+
*
|
| 96 |
+
* @param {number|undefined} limit Max chunks to return
|
| 97 |
+
* @param {string|undefined} filePath Override path (defaults to env or data/rag_chunks.jsonl)
|
| 98 |
+
* @returns {Promise<Array<{id, content, source}>>}
|
| 99 |
+
*/
|
| 100 |
+
export async function loadRagChunks(limit, filePath) {
|
| 101 |
+
const envPath = process.env.RAG_CHUNKS_PATH;
|
| 102 |
+
const chunks = await loadAllChunksFromJsonl(filePath || envPath || DEFAULT_RAG_PATH);
|
| 103 |
+
if (!chunks || chunks.length === 0) return [];
|
| 104 |
+
return sampleWithoutReplacement(chunks, limit ?? chunks.length);
|
| 105 |
+
}
|
src/retrieval/retrieval.mjs
CHANGED
|
@@ -143,8 +143,14 @@ Do NOT include JSON or formatting.
|
|
| 143 |
// ----------------------------------------
|
| 144 |
// Chunk sampling from ES (for QG pipeline)
|
| 145 |
// ----------------------------------------
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
const res = await client.search({
|
| 150 |
index: ES_INDEX,
|
|
@@ -176,4 +182,3 @@ export default {
|
|
| 176 |
hydeHybrid,
|
| 177 |
fetchChunksFromIndex,
|
| 178 |
};
|
| 179 |
-
|
|
|
|
| 143 |
// ----------------------------------------
|
| 144 |
// Chunk sampling from ES (for QG pipeline)
|
| 145 |
// ----------------------------------------
|
| 146 |
+
const DEFAULT_CHUNK_LIMIT = Number(
|
| 147 |
+
process.env.PIPELINE_CHUNK_LIMIT ||
|
| 148 |
+
process.env.RETRIEVAL_CHUNK_LIMIT ||
|
| 149 |
+
'100',
|
| 150 |
+
);
|
| 151 |
+
|
| 152 |
+
export async function fetchChunksFromIndex(limit) {
|
| 153 |
+
const size = Number.isFinite(limit) ? limit : DEFAULT_CHUNK_LIMIT;
|
| 154 |
|
| 155 |
const res = await client.search({
|
| 156 |
index: ES_INDEX,
|
|
|
|
| 182 |
hydeHybrid,
|
| 183 |
fetchChunksFromIndex,
|
| 184 |
};
|
|
|
tests/llm_stage.test.mjs
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// tests/llm_stage.test.mjs
|
| 2 |
+
import { describe, it, expect, vi } from 'vitest';
|
| 3 |
+
import { runLLMStage } from '../src/core/llm_stage.mjs';
|
| 4 |
+
|
| 5 |
+
describe('runLLMStage (core LLM wrapper)', () => {
|
| 6 |
+
it('fills the template, calls provider.generate, and returns raw + prompt', async () => {
|
| 7 |
+
const fakeProvider = {
|
| 8 |
+
generate: vi.fn(async (prompt) => `ECHO::${prompt.slice(0, 40)}`),
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
const question = 'What is love?';
|
| 12 |
+
const context = 'Love is the field in which all beings move.';
|
| 13 |
+
|
| 14 |
+
const { raw, prompt } = await runLLMStage({
|
| 15 |
+
stage: 'generator',
|
| 16 |
+
// Use an existing prompt template so we’re exercising real I/O
|
| 17 |
+
template: 'prompts/generator_prompt.txt',
|
| 18 |
+
vars: {
|
| 19 |
+
QUESTION: question,
|
| 20 |
+
CONTEXT: context,
|
| 21 |
+
},
|
| 22 |
+
provider: fakeProvider, // avoid hitting Ollama in tests
|
| 23 |
+
verbose: false,
|
| 24 |
+
logger: { log: () => {} },
|
| 25 |
+
});
|
| 26 |
+
|
| 27 |
+
// Provider was called exactly once
|
| 28 |
+
expect(fakeProvider.generate).toHaveBeenCalledTimes(1);
|
| 29 |
+
|
| 30 |
+
const calledPrompt = fakeProvider.generate.mock.calls[0][0];
|
| 31 |
+
|
| 32 |
+
// The rendered prompt should contain our substituted vars
|
| 33 |
+
expect(calledPrompt).toContain(question);
|
| 34 |
+
expect(calledPrompt).toContain(context);
|
| 35 |
+
|
| 36 |
+
// Returned prompt should match what we sent to the provider
|
| 37 |
+
expect(prompt).toBe(calledPrompt);
|
| 38 |
+
|
| 39 |
+
// raw should be whatever the provider returned
|
| 40 |
+
expect(raw).toBe(`ECHO::${calledPrompt.slice(0, 40)}`);
|
| 41 |
+
});
|
| 42 |
+
|
| 43 |
+
it('throws if provider has no generate() method', async () => {
|
| 44 |
+
await expect(
|
| 45 |
+
runLLMStage({
|
| 46 |
+
stage: 'generator',
|
| 47 |
+
template: 'prompts/generator_prompt.txt',
|
| 48 |
+
vars: {
|
| 49 |
+
QUESTION: 'Test?',
|
| 50 |
+
CONTEXT: 'Some context',
|
| 51 |
+
},
|
| 52 |
+
provider: {}, // missing generate()
|
| 53 |
+
verbose: false,
|
| 54 |
+
logger: { log: () => {} },
|
| 55 |
+
}),
|
| 56 |
+
).rejects.toThrow(/generate/i);
|
| 57 |
+
});
|
| 58 |
+
});
|
tests/ollama_provider_reasoning.test.mjs
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// tests/ollama_provider_reasoning.test.mjs
|
| 2 |
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
| 3 |
+
|
| 4 |
+
let originalFetch;
|
| 5 |
+
let originalEnv;
|
| 6 |
+
|
| 7 |
+
beforeEach(() => {
|
| 8 |
+
originalFetch = globalThis.fetch;
|
| 9 |
+
// shallow clone is fine for test mutations
|
| 10 |
+
originalEnv = { ...process.env };
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
afterEach(() => {
|
| 14 |
+
globalThis.fetch = originalFetch;
|
| 15 |
+
process.env = originalEnv;
|
| 16 |
+
});
|
| 17 |
+
|
| 18 |
+
describe('OllamaProvider reasoning flag', () => {
|
| 19 |
+
it('adds options.reasoning=true when OLLAMA_REASONING is enabled', async () => {
|
| 20 |
+
// Arrange env *before* importing the module (ENABLE_REASONING is computed at import time)
|
| 21 |
+
process.env.OLLAMA_REASONING = 'true';
|
| 22 |
+
process.env.OLLAMA_URL = 'http://ollama.local';
|
| 23 |
+
process.env.GENERATOR_MODEL = 'test-model';
|
| 24 |
+
|
| 25 |
+
const fetchMock = vi.fn(async () => ({
|
| 26 |
+
ok: true,
|
| 27 |
+
json: async () => ({ response: 'ok' }),
|
| 28 |
+
}));
|
| 29 |
+
globalThis.fetch = fetchMock;
|
| 30 |
+
|
| 31 |
+
// Dynamic import so env is read fresh for this test
|
| 32 |
+
const { OllamaProvider } = await import(
|
| 33 |
+
'../src/providers/ollama_provider.mjs'
|
| 34 |
+
);
|
| 35 |
+
|
| 36 |
+
const provider = new OllamaProvider({ model: 'test-model' });
|
| 37 |
+
|
| 38 |
+
// Act
|
| 39 |
+
await provider.generate('Hello, world');
|
| 40 |
+
|
| 41 |
+
// Assert
|
| 42 |
+
expect(fetchMock).toHaveBeenCalledTimes(1);
|
| 43 |
+
const [url, options] = fetchMock.mock.calls[0];
|
| 44 |
+
|
| 45 |
+
expect(url).toMatch(/\/api\/generate$/);
|
| 46 |
+
|
| 47 |
+
const body = JSON.parse(options.body);
|
| 48 |
+
|
| 49 |
+
expect(body).toHaveProperty('model', 'test-model');
|
| 50 |
+
expect(body).toHaveProperty('prompt');
|
| 51 |
+
expect(body).toHaveProperty('options');
|
| 52 |
+
expect(body.options).toHaveProperty('reasoning', true);
|
| 53 |
+
});
|
| 54 |
+
});
|
tests/pipeline_behaviour.test.mjs
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
| 2 |
+
import os from 'os';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
|
| 5 |
+
describe('runPipelineStep retrieval modes', () => {
|
| 6 |
+
beforeEach(() => {
|
| 7 |
+
vi.resetModules();
|
| 8 |
+
});
|
| 9 |
+
|
| 10 |
+
afterEach(() => {
|
| 11 |
+
vi.restoreAllMocks();
|
| 12 |
+
vi.unmock('../src/retrieval/retrieval.mjs');
|
| 13 |
+
vi.unmock('../src/providers/provider.mjs');
|
| 14 |
+
vi.unmock('../src/generator/generator_core.mjs');
|
| 15 |
+
vi.unmock('../src/verifier/verifier_core.mjs');
|
| 16 |
+
vi.unmock('../src/reward/reward_core.mjs');
|
| 17 |
+
});
|
| 18 |
+
|
| 19 |
+
async function setupRetrievalMocks() {
|
| 20 |
+
const hybridSearch = vi.fn(async () => [{ content: 'hybrid' }]);
|
| 21 |
+
const bm25Search = vi.fn(async () => [{ content: 'bm25' }]);
|
| 22 |
+
const vectorSearch = vi.fn(async () => [{ content: 'vector' }]);
|
| 23 |
+
const hydeHybrid = vi.fn(async () => [{ content: 'hyde' }]);
|
| 24 |
+
|
| 25 |
+
vi.doMock('../src/retrieval/retrieval.mjs', () => ({
|
| 26 |
+
hybridSearch,
|
| 27 |
+
bm25Search,
|
| 28 |
+
vectorSearch,
|
| 29 |
+
hydeHybrid,
|
| 30 |
+
}));
|
| 31 |
+
|
| 32 |
+
vi.doMock('../src/providers/provider.mjs', () => ({
|
| 33 |
+
loadProviderFor: () => ({ generate: async () => 'ok' }),
|
| 34 |
+
}));
|
| 35 |
+
|
| 36 |
+
vi.doMock('../src/generator/generator_core.mjs', () => ({
|
| 37 |
+
runGenerator: vi.fn(async () => ({ answer: 'a', raw: 'a' })),
|
| 38 |
+
}));
|
| 39 |
+
vi.doMock('../src/verifier/verifier_core.mjs', () => ({
|
| 40 |
+
runVerifier: vi.fn(async () => ({ ok: true, raw: 'yes' })),
|
| 41 |
+
}));
|
| 42 |
+
vi.doMock('../src/reward/reward_core.mjs', () => ({
|
| 43 |
+
runReward: vi.fn(async () => ({ ok: true, score: 1, raw: '1' })),
|
| 44 |
+
}));
|
| 45 |
+
|
| 46 |
+
const mod = await import('../src/pipeline/step.mjs');
|
| 47 |
+
return { hybridSearch, bm25Search, vectorSearch, hydeHybrid, runPipelineStep: mod.runPipelineStep };
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
it('uses bm25 when retrievalMode=bm25', async () => {
|
| 51 |
+
const mocks = await setupRetrievalMocks();
|
| 52 |
+
|
| 53 |
+
await mocks.runPipelineStep({ question: 'q', retrievalMode: 'bm25' });
|
| 54 |
+
|
| 55 |
+
expect(mocks.bm25Search).toHaveBeenCalledTimes(1);
|
| 56 |
+
expect(mocks.vectorSearch).not.toHaveBeenCalled();
|
| 57 |
+
expect(mocks.hybridSearch).not.toHaveBeenCalled();
|
| 58 |
+
expect(mocks.hydeHybrid).not.toHaveBeenCalled();
|
| 59 |
+
});
|
| 60 |
+
|
| 61 |
+
it('uses hyde when retrievalMode=hyde', async () => {
|
| 62 |
+
const mocks = await setupRetrievalMocks();
|
| 63 |
+
|
| 64 |
+
await mocks.runPipelineStep({ question: 'q', retrievalMode: 'hyde' });
|
| 65 |
+
|
| 66 |
+
expect(mocks.hydeHybrid).toHaveBeenCalledTimes(1);
|
| 67 |
+
expect(mocks.bm25Search).not.toHaveBeenCalled();
|
| 68 |
+
expect(mocks.vectorSearch).not.toHaveBeenCalled();
|
| 69 |
+
expect(mocks.hybridSearch).not.toHaveBeenCalled();
|
| 70 |
+
});
|
| 71 |
+
});
|
| 72 |
+
|
| 73 |
+
describe('runPipelineBatch question cap', () => {
|
| 74 |
+
beforeEach(() => {
|
| 75 |
+
vi.resetModules();
|
| 76 |
+
process.env.PIPELINE_CHUNK_SOURCE = 'jsonl';
|
| 77 |
+
});
|
| 78 |
+
|
| 79 |
+
afterEach(() => {
|
| 80 |
+
delete process.env.PIPELINE_CHUNK_SOURCE;
|
| 81 |
+
vi.restoreAllMocks();
|
| 82 |
+
vi.unmock('../src/providers/provider.mjs');
|
| 83 |
+
vi.unmock('../src/retrieval/jsonl_chunks.mjs');
|
| 84 |
+
vi.unmock('../src/pipeline/step.mjs');
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
it('stops once the question limit is reached in question-first mode', async () => {
|
| 88 |
+
const questionsPerChunk = 'Q1?\nQ2?\nQ3?';
|
| 89 |
+
|
| 90 |
+
// Mock question provider + retrieval
|
| 91 |
+
vi.doMock('../src/providers/provider.mjs', () => ({
|
| 92 |
+
loadProviderFor: (stage) =>
|
| 93 |
+
stage === 'question'
|
| 94 |
+
? { generate: async () => questionsPerChunk }
|
| 95 |
+
: { generate: async () => '' },
|
| 96 |
+
}));
|
| 97 |
+
|
| 98 |
+
// Force question generator to return a fixed list to avoid parser variability
|
| 99 |
+
const runQuestionGenerator = vi.fn(async () => ({
|
| 100 |
+
questions: ['Q1?', 'Q2?', 'Q3?'],
|
| 101 |
+
raw: questionsPerChunk,
|
| 102 |
+
parsed: null,
|
| 103 |
+
}));
|
| 104 |
+
vi.doMock('../src/question/question_core.mjs', () => ({ runQuestionGenerator }));
|
| 105 |
+
|
| 106 |
+
vi.doMock('../src/retrieval/jsonl_chunks.mjs', () => ({
|
| 107 |
+
loadRagChunks: vi.fn(async (limit) =>
|
| 108 |
+
Array.from({ length: limit ?? 3 }, (_, i) => ({
|
| 109 |
+
id: `c-${i}`,
|
| 110 |
+
content: `chunk ${i}`,
|
| 111 |
+
})),
|
| 112 |
+
),
|
| 113 |
+
}));
|
| 114 |
+
|
| 115 |
+
// Stub runPipelineStep to avoid model calls; count invocations
|
| 116 |
+
const runPipelineStep = vi.fn(async () => ({
|
| 117 |
+
status: 'accepted',
|
| 118 |
+
context: [],
|
| 119 |
+
gen: { answer: 'a' },
|
| 120 |
+
ver: { ok: true },
|
| 121 |
+
rew: { ok: true },
|
| 122 |
+
}));
|
| 123 |
+
vi.doMock('../src/pipeline/step.mjs', () => ({ runPipelineStep }));
|
| 124 |
+
|
| 125 |
+
const outPath = path.join(os.tmpdir(), `test-gold-${Date.now()}.jsonl`);
|
| 126 |
+
const { runPipelineBatch } = await import('../src/pipeline/batch.mjs');
|
| 127 |
+
const result = await runPipelineBatch({
|
| 128 |
+
limit: 2, // question cap
|
| 129 |
+
chunkLimit: 3,
|
| 130 |
+
seedMode: 'question-first',
|
| 131 |
+
outPath,
|
| 132 |
+
verbose: false,
|
| 133 |
+
logger: { log() {}, error() {} },
|
| 134 |
+
});
|
| 135 |
+
|
| 136 |
+
expect(result.processed).toBe(2);
|
| 137 |
+
expect(runPipelineStep).toHaveBeenCalledTimes(2);
|
| 138 |
+
expect(result.accepted).toBe(2);
|
| 139 |
+
});
|
| 140 |
+
});
|
| 141 |
+
|
| 142 |
+
describe('fetchChunksFromIndex default size', () => {
|
| 143 |
+
beforeEach(() => {
|
| 144 |
+
vi.resetModules();
|
| 145 |
+
vi.unmock('../src/retrieval/retrieval.mjs');
|
| 146 |
+
});
|
| 147 |
+
|
| 148 |
+
afterEach(() => {
|
| 149 |
+
vi.restoreAllMocks();
|
| 150 |
+
});
|
| 151 |
+
|
| 152 |
+
it('uses a larger default than 10 when no limit is provided', async () => {
|
| 153 |
+
let capturedOpts = null;
|
| 154 |
+
|
| 155 |
+
vi.unmock('../src/retrieval/retrieval.mjs');
|
| 156 |
+
|
| 157 |
+
vi.doMock('@elastic/elasticsearch', () => ({
|
| 158 |
+
Client: vi.fn().mockImplementation(() => ({
|
| 159 |
+
search: vi.fn(async (opts) => {
|
| 160 |
+
capturedOpts = opts;
|
| 161 |
+
return { hits: { hits: [] } };
|
| 162 |
+
}),
|
| 163 |
+
})),
|
| 164 |
+
}));
|
| 165 |
+
|
| 166 |
+
// avoid accidental network fetch calls in this test
|
| 167 |
+
vi.doMock('node-fetch', () => ({
|
| 168 |
+
default: vi.fn(async () => ({ ok: true, json: async () => ({ embedding: [] }) })),
|
| 169 |
+
}));
|
| 170 |
+
|
| 171 |
+
// Re-export the real module so the fetch function exists, but with mocked deps above.
|
| 172 |
+
vi.doMock('../src/retrieval/retrieval.mjs', async (importOriginal) => {
|
| 173 |
+
const actual = await importOriginal();
|
| 174 |
+
return { ...actual };
|
| 175 |
+
});
|
| 176 |
+
|
| 177 |
+
const { fetchChunksFromIndex } = await import('../src/retrieval/retrieval.mjs');
|
| 178 |
+
await fetchChunksFromIndex();
|
| 179 |
+
|
| 180 |
+
expect(capturedOpts?.size).toBe(100);
|
| 181 |
+
});
|
| 182 |
+
});
|