htaf commited on
Commit
68e4117
·
1 Parent(s): 2baa954

tightened up question limits

Browse files
AGENTS.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Repository Guidelines
2
+
3
+ ## Project Structure & Module Organization
4
+ - Core pipeline lives in `src/`, with stage logic in `src/generator`, `src/verifier`, `src/reward`, `src/question`, retrieval helpers in `src/retrieval`, and the CLI entrypoint at `src/pipeline/pipeline_cli.js`.
5
+ - Prompts are in `prompts/`; tweak these before changing stage behaviour.
6
+ - Tests sit in `tests/` (Vitest), with sample seeds in `test_samples/`; pipeline outputs write to `gold/`.
7
+ - Config baselines (models, limits) are in `configs/pipeline.json`; run scripts live at the repo root (`run.sh`, `try_prompt.sh`).
8
+
9
+ ## Build, Test, and Development Commands
10
+ - `npm install` – install dependencies.
11
+ - `npm run pipeline -- --limit 20 --verbose` – run the default pipeline using static seeds.
12
+ - `PIPELINE_SEED_MODE=question-first npm run pipeline -- --limit 20 --verbose` – enable question-first seeding.
13
+ - `npm test` – run all unit tests (mocked by default).
14
+ - `REAL_ES=1 npm test` – exercise retrieval against a live Elasticsearch + embedding endpoint.
15
+
16
+ ## Coding Style & Naming Conventions
17
+ - ECMAScript modules (`type: "module"`); prefer `.mjs` for shared code.
18
+ - Two-space indentation, single quotes unless template strings add clarity, and keep functions small and pure where possible (CLI glue stays in `pipeline_cli.js`).
19
+ - Use descriptive, lower_snake or camelCase for variables; exported helpers use camelCase.
20
+ - Keep prompts and stage logic separate; place reusable utilities in `src/pipeline/util.mjs`.
21
+
22
+ ## Testing Guidelines
23
+ - Vitest is the test runner; add new tests under `tests/` with `.test.mjs` suffix.
24
+ - Mirror stage names in test files (e.g., `generator_core.test.mjs`), and include both happy-path and malformed-input cases.
25
+ - For retrieval, default mocks cover most cases; only opt into `REAL_ES=1` when you have a running distill-rag stack.
26
+ - Aim to keep tests deterministic—mock providers and network calls unless explicitly validating integrations.
27
+
28
+ ## Commit & Pull Request Guidelines
29
+ - Follow the existing history: short, present-tense summaries (e.g., `add generator test`, `maintain chunk ordering`); include scoped prefixes only when they improve clarity.
30
+ - Keep commits focused (one concern each) and ensure `npm test` passes before pushing.
31
+ - PRs should state the goal, main changes, test evidence, and any required `.env` or config updates; include sample command/output paths when relevant (e.g., `gold/pipeline_gold.jsonl`).
32
+ - Link issues when applicable and note any provider/model assumptions or external services needed for validation.
33
+
34
+ ## Configuration & Environment Tips
35
+ - Runtime configuration comes from `.env` (ES node, embedding endpoint, provider selections, stage models); avoid committing secrets.
36
+ - When changing model or provider choices, update `configs/pipeline.json` if you want a sharable default, and document overrides in your PR description.
src/core/llm_stage.mjs ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // src/core/llm_stage.mjs
2
+ import fs from 'fs/promises';
3
+ import path from 'path';
4
+
5
+ import { PROJECT_ROOT } from '../pipeline/util.mjs';
6
+ import { loadProviderFor } from '../providers/provider.mjs';
7
+
8
+ /**
9
+ * Load a prompt template from disk.
10
+ * Paths are relative to the project root by default (e.g. "prompts/question_prompt.txt").
11
+ */
12
+ async function loadTemplate(templatePath) {
13
+ const abs = path.isAbsolute(templatePath)
14
+ ? templatePath
15
+ : path.join(PROJECT_ROOT, templatePath);
16
+
17
+ const txt = await fs.readFile(abs, 'utf8');
18
+ return txt;
19
+ }
20
+
21
+ /**
22
+ * Simple {{VAR}} replacement.
23
+ * Keeps your current prompt style but makes it generic.
24
+ */
25
+ function renderTemplate(template, vars = {}) {
26
+ return template.replace(/{{\s*([A-Z0-9_]+)\s*}}/gi, (_, key) => {
27
+ const v = vars[key];
28
+ return v == null ? '' : String(v);
29
+ });
30
+ }
31
+
32
+ /**
33
+ * Generic LLM stage runner.
34
+ *
35
+ * @param {object} opts
36
+ * @param {string} opts.stage - logical stage name: "question" | "generator" | "verifier" | "reward" | "expert_x"
37
+ * @param {string} opts.template - path to prompt template (e.g. "prompts/question_prompt.txt")
38
+ * @param {object} [opts.vars] - interpolation vars for the template (QUESTION, CONTEXT, ANSWER, etc.)
39
+ * @param {object} [opts.provider] - optional provider instance; if omitted, uses loadProviderFor(stage)
40
+ * @param {boolean} [opts.verbose]
41
+ * @param {Console} [opts.logger]
42
+ *
43
+ * @returns {Promise<{ prompt: string, raw: string }>}
44
+ */
45
+ export async function runLLMStage({
46
+ stage,
47
+ template,
48
+ vars = {},
49
+ provider,
50
+ verbose = false,
51
+ logger = console,
52
+ } = {}) {
53
+ if (!stage) throw new Error('runLLMStage: "stage" is required');
54
+ if (!template) throw new Error('runLLMStage: "template" is required');
55
+
56
+ const log = logger?.log?.bind(logger) || console.log;
57
+
58
+ const prov = provider || loadProviderFor(stage);
59
+ const tpl = await loadTemplate(template);
60
+ const prompt = renderTemplate(tpl, vars);
61
+
62
+ if (verbose) {
63
+ log(`[llm_stage] stage=${stage}`);
64
+ log('[llm_stage] prompt preview:\n' + prompt.slice(0, 400));
65
+ }
66
+
67
+ const raw = await prov.generate(prompt);
68
+
69
+ return { prompt, raw };
70
+ }
src/pipeline/batch.mjs CHANGED
@@ -8,12 +8,12 @@ import {
8
  DEFAULT_OUT_PATH,
9
  loadSeedQuestions,
10
  seedToQuestion,
11
- // seedToContextText, // <-- no longer needed for QG mode
12
  } from './seeds.mjs';
13
  import { runPipelineStep } from './step.mjs';
14
  import { loadProviderFor } from '../providers/provider.mjs';
15
  import { runQuestionGenerator } from '../question/question_core.mjs';
16
  import { fetchChunksFromIndex } from '../retrieval/retrieval.mjs';
 
17
 
18
  /**
19
  * Append a single accepted record to a JSONL file.
@@ -30,7 +30,7 @@ export async function appendGoldRecord(outPath, record) {
30
  *
31
  * Modes:
32
  * - question-first (default): seeds are CHUNKS; we generate questions from each chunk
33
- * - static: seeds are questions (legacy / low-priority mode)
34
  *
35
  * Options:
36
  * - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
@@ -56,9 +56,10 @@ export async function runPipelineBatch({
56
  seedsPath = DEFAULT_SEEDS_PATH,
57
  outPath = DEFAULT_OUT_PATH,
58
  limit,
 
59
  verbose = false,
60
  logger = console,
61
- seedMode = process.env.PIPELINE_SEED_MODE || 'static',
62
  } = {}) {
63
  const log = logger?.log?.bind(logger) || console.log;
64
  const errLog = logger?.error?.bind(logger) || console.error;
@@ -137,32 +138,76 @@ export async function runPipelineBatch({
137
  }
138
 
139
  // ----------------------------------------
140
- // MODE 2: question-first (ES chunks → QG → pipeline)
141
  // ----------------------------------------
142
  if (seedMode === 'question-first') {
143
  const questionProvider = loadProviderFor('question');
144
 
145
- // support both env names
146
  const maxQuestionsPerChunk = Number(
147
  process.env.QUESTION_MAX_PER_CHUNK ||
148
  process.env.QUESTION_MAX ||
149
  '5',
150
  );
151
 
152
- const chunkLimit =
153
- typeof limit === 'number' ? limit : undefined;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  if (verbose) {
156
- log(
157
- `[pipeline] fetching chunks from ES (limit=${chunkLimit ?? 'default'})`,
158
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
 
161
- const chunks = await fetchChunksFromIndex(chunkLimit);
162
  const totalChunks = chunks.length;
163
  let processedSeeds = 0;
164
 
165
  for (let idx = 0; idx < chunks.length; idx++) {
 
 
166
  const chunk = chunks[idx];
167
  const label = `[chunk ${idx + 1}/${chunks.length}]`;
168
  const contextText = chunk.content;
@@ -177,8 +222,10 @@ export async function runPipelineBatch({
177
  processedSeeds += 1;
178
 
179
  if (verbose) {
180
- log(`\n🧩 ${label} generating questions from ES chunk…`);
181
- log(` [question] es _id: ${chunk.id}`);
 
 
182
  log(
183
  ' [question] chunk preview:\n ' +
184
  preview(contextText, 300).replace(/\n/g, '\n '),
@@ -223,6 +270,7 @@ export async function runPipelineBatch({
223
 
224
  // 2) run full pipeline for each generated question
225
  for (const q of questions) {
 
226
  if (!q || !q.trim()) continue;
227
 
228
  const qLabel = `[q ${processed + 1}]`;
@@ -233,8 +281,7 @@ export async function runPipelineBatch({
233
  try {
234
  const result = await runPipelineStep({
235
  question: q,
236
- // 🔑 KEY FIX: reuse this ES chunk as the *only* context
237
- initialContext: [chunk],
238
  verbose,
239
  logger,
240
  });
@@ -251,8 +298,8 @@ export async function runPipelineBatch({
251
  const record = {
252
  question: q,
253
  sourceChunkId: chunk.id,
254
- sourceChunk: contextText, // raw ES chunk text
255
- sourceDoc: chunk.source, // full ES _source
256
  context: result.context,
257
  sample: result.gen,
258
  verifier: result.ver,
@@ -279,8 +326,8 @@ export async function runPipelineBatch({
279
  return {
280
  mode: seedMode,
281
  total: totalChunks,
282
- processed, // number of questions processed
283
- processedSeeds, // how many chunks we actually used
284
  processedQuestions: processed,
285
  accepted,
286
  outPath,
 
8
  DEFAULT_OUT_PATH,
9
  loadSeedQuestions,
10
  seedToQuestion,
 
11
  } from './seeds.mjs';
12
  import { runPipelineStep } from './step.mjs';
13
  import { loadProviderFor } from '../providers/provider.mjs';
14
  import { runQuestionGenerator } from '../question/question_core.mjs';
15
  import { fetchChunksFromIndex } from '../retrieval/retrieval.mjs';
16
+ import { loadRagChunks } from '../retrieval/jsonl_chunks.mjs';
17
 
18
  /**
19
  * Append a single accepted record to a JSONL file.
 
30
  *
31
  * Modes:
32
  * - question-first (default): seeds are CHUNKS; we generate questions from each chunk
33
+ * - static: seeds are questions (legacy / low-priority mode)
34
  *
35
  * Options:
36
  * - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
 
56
  seedsPath = DEFAULT_SEEDS_PATH,
57
  outPath = DEFAULT_OUT_PATH,
58
  limit,
59
+ chunkLimit,
60
  verbose = false,
61
  logger = console,
62
+ seedMode = process.env.PIPELINE_SEED_MODE || 'question-first',
63
  } = {}) {
64
  const log = logger?.log?.bind(logger) || console.log;
65
  const errLog = logger?.error?.bind(logger) || console.error;
 
138
  }
139
 
140
  // ----------------------------------------
141
+ // MODE 2: question-first (JSONL or ES chunks → QG → pipeline)
142
  // ----------------------------------------
143
  if (seedMode === 'question-first') {
144
  const questionProvider = loadProviderFor('question');
145
 
 
146
  const maxQuestionsPerChunk = Number(
147
  process.env.QUESTION_MAX_PER_CHUNK ||
148
  process.env.QUESTION_MAX ||
149
  '5',
150
  );
151
 
152
+ const questionCap =
153
+ typeof limit === 'number' ? limit : Number.POSITIVE_INFINITY;
154
+
155
+ const effectiveChunkLimit = (() => {
156
+ if (typeof chunkLimit === 'number') return chunkLimit;
157
+ const envLimit =
158
+ process.env.PIPELINE_CHUNK_LIMIT ||
159
+ process.env.PIPELINE_CHUNKS ||
160
+ process.env.PIPELINE_CHUNK_SAMPLE;
161
+ const parsed = envLimit != null ? Number(envLimit) : NaN;
162
+ return Number.isFinite(parsed) ? parsed : undefined;
163
+ })();
164
+
165
+ // Decide where chunks come from:
166
+ // PIPELINE_CHUNK_SOURCE = 'jsonl' | 'es'
167
+ const chunkSource =
168
+ process.env.PIPELINE_CHUNK_SOURCE || 'es';
169
 
170
  if (verbose) {
171
+ if (chunkSource === 'jsonl') {
172
+ const p =
173
+ process.env.RAG_CHUNKS_PATH ||
174
+ 'data/rag_chunks.jsonl';
175
+ log(
176
+ `[pipeline] loading chunks from JSONL (${p}), limit=${effectiveChunkLimit ?? 'all'}`,
177
+ );
178
+ } else {
179
+ log(
180
+ `[pipeline] fetching chunks from ES (limit=${effectiveChunkLimit ?? 'all'})`,
181
+ );
182
+ }
183
+ }
184
+
185
+ let chunks = [];
186
+ try {
187
+ if (chunkSource === 'jsonl') {
188
+ chunks = await loadRagChunks(effectiveChunkLimit);
189
+ } else {
190
+ chunks = await fetchChunksFromIndex(effectiveChunkLimit);
191
+ }
192
+ } catch (e) {
193
+ const msg = e?.message || String(e);
194
+ errLog('[pipeline] ERROR loading chunks:', msg);
195
+ return {
196
+ mode: seedMode,
197
+ total: 0,
198
+ processed: 0,
199
+ accepted: 0,
200
+ outPath,
201
+ statusCounts: { chunk_load_error: 1 },
202
+ };
203
  }
204
 
 
205
  const totalChunks = chunks.length;
206
  let processedSeeds = 0;
207
 
208
  for (let idx = 0; idx < chunks.length; idx++) {
209
+ if (processed >= questionCap) break;
210
+
211
  const chunk = chunks[idx];
212
  const label = `[chunk ${idx + 1}/${chunks.length}]`;
213
  const contextText = chunk.content;
 
222
  processedSeeds += 1;
223
 
224
  if (verbose) {
225
+ log(`\n🧩 ${label} generating questions from chunk…`);
226
+ if (chunk.id) {
227
+ log(` [question] chunk id: ${chunk.id}`);
228
+ }
229
  log(
230
  ' [question] chunk preview:\n ' +
231
  preview(contextText, 300).replace(/\n/g, '\n '),
 
270
 
271
  // 2) run full pipeline for each generated question
272
  for (const q of questions) {
273
+ if (processed >= questionCap) break;
274
  if (!q || !q.trim()) continue;
275
 
276
  const qLabel = `[q ${processed + 1}]`;
 
281
  try {
282
  const result = await runPipelineStep({
283
  question: q,
284
+ initialContext: [chunk], // IMPORTANT: reuse SAME chunk, no second retrieval
 
285
  verbose,
286
  logger,
287
  });
 
298
  const record = {
299
  question: q,
300
  sourceChunkId: chunk.id,
301
+ sourceChunk: contextText,
302
+ sourceDoc: chunk.source,
303
  context: result.context,
304
  sample: result.gen,
305
  verifier: result.ver,
 
326
  return {
327
  mode: seedMode,
328
  total: totalChunks,
329
+ processed, // number of questions processed
330
+ processedSeeds,
331
  processedQuestions: processed,
332
  accepted,
333
  outPath,
src/pipeline/pipeline_cli.js CHANGED
@@ -26,6 +26,7 @@ function parseArgs(argv) {
26
  let outPath;
27
  let verbose = false;
28
  let seedMode; // optional CLI override
 
29
 
30
  for (let i = 0; i < args.length; i++) {
31
  const a = args[i];
@@ -39,6 +40,10 @@ function parseArgs(argv) {
39
  } else if (a === '--out') {
40
  outPath = args[i + 1];
41
  i++;
 
 
 
 
42
  } else if (a === '--mode') {
43
  seedMode = args[i + 1]; // "question-first" | "static"
44
  i++;
@@ -61,6 +66,7 @@ function parseArgs(argv) {
61
  outPath: outPath || DEFAULT_OUT,
62
  verbose,
63
  seedMode,
 
64
  };
65
  }
66
 
@@ -71,6 +77,7 @@ async function main() {
71
  outPath,
72
  verbose,
73
  seedMode: cliSeedMode,
 
74
  } = parseArgs(process.argv);
75
 
76
  const generatorProvider = process.env.GENERATOR_PROVIDER || 'ollama';
@@ -105,7 +112,15 @@ async function main() {
105
  console.log(
106
  ` reward: ${rewardProvider} (${rewardModel})`,
107
  );
108
- console.log(` Limit: ${limit ?? 'all'}`);
 
 
 
 
 
 
 
 
109
  console.log(` Verbose: ${verbose ? 'yes' : 'no'}`);
110
  console.log('');
111
 
@@ -114,6 +129,7 @@ async function main() {
114
  seedsPath,
115
  outPath,
116
  limit,
 
117
  verbose,
118
  logger: console,
119
  seedMode: mode,
 
26
  let outPath;
27
  let verbose = false;
28
  let seedMode; // optional CLI override
29
+ let chunkLimit;
30
 
31
  for (let i = 0; i < args.length; i++) {
32
  const a = args[i];
 
40
  } else if (a === '--out') {
41
  outPath = args[i + 1];
42
  i++;
43
+ } else if (a === '--chunk-limit') {
44
+ const v = Number(args[i + 1]);
45
+ if (!Number.isNaN(v)) chunkLimit = v;
46
+ i++;
47
  } else if (a === '--mode') {
48
  seedMode = args[i + 1]; // "question-first" | "static"
49
  i++;
 
66
  outPath: outPath || DEFAULT_OUT,
67
  verbose,
68
  seedMode,
69
+ chunkLimit,
70
  };
71
  }
72
 
 
77
  outPath,
78
  verbose,
79
  seedMode: cliSeedMode,
80
+ chunkLimit,
81
  } = parseArgs(process.argv);
82
 
83
  const generatorProvider = process.env.GENERATOR_PROVIDER || 'ollama';
 
112
  console.log(
113
  ` reward: ${rewardProvider} (${rewardModel})`,
114
  );
115
+ console.log(` Question limit: ${limit ?? 'all'}`);
116
+ if (mode === 'question-first') {
117
+ const chunkLimitEffective =
118
+ chunkLimit ??
119
+ (process.env.PIPELINE_CHUNK_LIMIT ||
120
+ process.env.PIPELINE_CHUNKS ||
121
+ process.env.PIPELINE_CHUNK_SAMPLE);
122
+ console.log(` Chunk limit: ${chunkLimitEffective ?? 'all'}`);
123
+ }
124
  console.log(` Verbose: ${verbose ? 'yes' : 'no'}`);
125
  console.log('');
126
 
 
129
  seedsPath,
130
  outPath,
131
  limit,
132
+ chunkLimit,
133
  verbose,
134
  logger: console,
135
  seedMode: mode,
src/pipeline/pipeline_spec.mjs ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // src/pipeline/pipeline_spec.mjs
2
+
3
+ /**
4
+ * High-level pipeline description.
5
+ *
6
+ * This doesn’t execute anything; it just documents how
7
+ * stages should be wired together.
8
+ *
9
+ * You can later have a generic “orchestrator” that reads this
10
+ * and calls runLLMStage + parse functions dynamically.
11
+ */
12
+ export const DISTILLATION_PIPELINE = {
13
+ id: 'confederation_distillation_v1',
14
+ stages: [
15
+ {
16
+ id: 'question',
17
+ kind: 'question_generator',
18
+ stageName: 'question', // maps to provider type
19
+ template: 'prompts/question_prompt.txt',
20
+ inputs: ['chunk'], // variables: { CONTEXT: chunk.content }
21
+ outputKey: 'questions',
22
+ parser: 'parseQuestions', // from question_core.mjs
23
+ },
24
+ {
25
+ id: 'generator',
26
+ kind: 'answer_generator',
27
+ stageName: 'generator',
28
+ template: 'prompts/generator_prompt.txt',
29
+ inputs: ['question', 'chunk'],
30
+ outputKey: 'gen',
31
+ parser: 'parseGeneratorOutput',
32
+ },
33
+ {
34
+ id: 'verifier',
35
+ kind: 'verifier',
36
+ stageName: 'verifier',
37
+ template: 'prompts/verifier_prompt.txt',
38
+ inputs: ['question', 'chunk', 'gen'],
39
+ outputKey: 'ver',
40
+ parser: 'parseVerifierOutput',
41
+ },
42
+ {
43
+ id: 'reward',
44
+ kind: 'reward_model',
45
+ stageName: 'reward',
46
+ template: 'prompts/reward_prompt.txt',
47
+ inputs: ['question', 'chunk', 'gen', 'ver'],
48
+ outputKey: 'rew',
49
+ parser: 'parseRewardOutput',
50
+ },
51
+ ],
52
+ };
src/pipeline/step.mjs CHANGED
@@ -1,6 +1,11 @@
1
  // src/pipeline/step.mjs
2
  import { loadProviderFor } from '../providers/provider.mjs';
3
- import { hybridSearch } from '../retrieval/retrieval.mjs';
 
 
 
 
 
4
  import { runGenerator } from '../generator/generator_core.mjs';
5
  import { runVerifier } from '../verifier/verifier_core.mjs';
6
  import { runReward } from '../reward/reward_core.mjs';
@@ -82,7 +87,21 @@ export async function runPipelineStep({
82
  // Go to ES exactly once
83
  try {
84
  if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
85
- const hits = await hybridSearch(question, k);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  if (verbose) {
87
  log(` [retrieval] got ${hits.length} chunks from ES`);
88
  }
 
1
  // src/pipeline/step.mjs
2
  import { loadProviderFor } from '../providers/provider.mjs';
3
+ import {
4
+ hybridSearch,
5
+ bm25Search,
6
+ vectorSearch,
7
+ hydeHybrid,
8
+ } from '../retrieval/retrieval.mjs';
9
  import { runGenerator } from '../generator/generator_core.mjs';
10
  import { runVerifier } from '../verifier/verifier_core.mjs';
11
  import { runReward } from '../reward/reward_core.mjs';
 
87
  // Go to ES exactly once
88
  try {
89
  if (verbose) log(` [retrieval] mode=${retrievalMode} k=${k}`);
90
+
91
+ const hits = await (async () => {
92
+ switch (retrievalMode) {
93
+ case 'bm25':
94
+ return bm25Search(question, k);
95
+ case 'vector':
96
+ return vectorSearch(question, k);
97
+ case 'hyde':
98
+ return hydeHybrid(question, k, genProv);
99
+ case 'hybrid':
100
+ default:
101
+ return hybridSearch(question, k);
102
+ }
103
+ })();
104
+
105
  if (verbose) {
106
  log(` [retrieval] got ${hits.length} chunks from ES`);
107
  }
src/question/question_core.mjs CHANGED
@@ -1,126 +1,230 @@
1
  // src/question/question_core.mjs
2
- import fs from 'fs/promises';
3
- import path from 'path';
4
- import { fileURLToPath } from 'url';
5
-
6
- const __filename = fileURLToPath(import.meta.url);
7
- const __dirname = path.dirname(__filename);
8
-
9
- const TEMPLATE_PATH = path.resolve(
10
- __dirname,
11
- '..',
12
- '..',
13
- 'prompts',
14
- 'question_prompt.txt',
15
- );
16
-
17
- let cachedTemplate = null;
18
-
19
- async function loadQuestionTemplate() {
20
- if (cachedTemplate) return cachedTemplate;
21
- cachedTemplate = await fs.readFile(TEMPLATE_PATH, 'utf8');
22
- return cachedTemplate;
23
  }
24
 
25
  /**
26
- * Extract questions using JSON-first, then plain-text fallback.
 
 
 
 
 
 
 
 
27
  *
28
- * @param {string} raw
29
- * @param {number} maxQuestions
30
- * @returns {{ questions: string[], parsed: any }}
31
  */
32
- function parseQuestions(raw, maxQuestions) {
33
- let parsed = null;
34
- let questions = [];
35
 
36
- if (!raw || typeof raw !== 'string') {
37
- return { questions, parsed };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
 
40
- // ----- 1) Try JSON -----
41
- try {
42
- const json = JSON.parse(raw);
43
- parsed = json;
44
-
45
- // Case A: { questions: [...] }
46
- if (json && Array.isArray(json.questions)) {
47
- questions = json.questions
48
- .map((q) => String(q).trim())
49
- .filter((q) => q.length > 0);
50
- }
51
- // Case B: root is an array: [ "Q1?", "Q2?" ]
52
- else if (Array.isArray(json)) {
53
- questions = json
54
- .map((q) => String(q).trim())
55
- .filter((q) => q.length > 0);
56
  }
57
- } catch (e) {
58
- parsed = { error: 'invalid_json', message: e?.message };
59
  }
60
 
61
- // ----- 2) Plain-text fallback if we still have no questions -----
62
- if (!questions.length) {
63
- const lines = raw
64
- .split('\n')
65
- .map((l) => l.trim())
66
- // strip bullets / numbering: "1. ", "- ", "* ", "• "
67
- .map((l) => l.replace(/^[-•*()\d.\s]+/, ''))
68
- // keep lines that look like questions
69
- .filter((l) => l.length > 0 && /[??!]$/.test(l));
70
-
71
- questions = lines;
72
  }
73
 
74
- if (questions.length > maxQuestions) {
75
- questions = questions.slice(0, maxQuestions);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
 
78
- return { questions, parsed };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
80
 
81
  /**
82
- * Build prompt and generate questions from a context chunk.
83
- *
84
- * @param {string} contextText - chunk from ES
85
- * @param {object} provider - { generate(prompt) → string }
86
- * @param {object} opts
87
- * - maxQuestions?: number (defaults QUESTION_MAX or 5)
88
  *
89
- * @returns {Promise<{
90
- * raw: string,
91
- * prompt: string,
92
- * questions: string[],
93
- * maxQuestions: number,
94
- * parsed: any
95
- * }>}
96
  */
97
  export async function runQuestionGenerator(
98
  contextText,
99
  provider,
100
- opts = {},
101
  ) {
102
- const maxQuestions =
103
- opts.maxQuestions ?? Number(process.env.QUESTION_MAX || '5');
 
104
 
105
- const template = await loadQuestionTemplate();
 
 
106
 
107
- const prompt = template
108
- .replace(/{{CONTEXT}}/g, contextText)
109
- .replace(/{{MAX_QUESTIONS}}/g, String(maxQuestions));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  const raw = await provider.generate(prompt);
 
112
 
113
- const { questions, parsed } = parseQuestions(raw, maxQuestions);
114
-
115
- return {
116
- raw,
117
- prompt,
118
- questions,
119
- maxQuestions,
120
- parsed,
121
- };
122
  }
123
-
124
- export default {
125
- runQuestionGenerator,
126
- };
 
1
  // src/question/question_core.mjs
2
+ import { preview } from '../pipeline/util.mjs';
3
+
4
+ /**
5
+ * Safely parse JSON. Returns:
6
+ * - a parsed value on success
7
+ * - null on failure (and optionally an error object if needed)
8
+ */
9
+ function tryParseJson(raw) {
10
+ if (!raw || typeof raw !== 'string') return null;
11
+ const trimmed = raw.trim();
12
+
13
+ // Quick sanity: must start with { or [
14
+ if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) {
15
+ return null;
16
+ }
17
+
18
+ try {
19
+ return JSON.parse(trimmed);
20
+ } catch {
21
+ return null;
22
+ }
23
  }
24
 
25
  /**
26
+ * Extract questions from a plain-text response.
27
+ *
28
+ * This is designed to handle real LLM outputs like:
29
+ *
30
+ * What is the primary purpose of practicing presence according to the text?
31
+ * How does Q'uo characterize the physical vehicle's limitations?
32
+ * What is the role of pain and struggle in spiritual growth?
33
+ *
34
+ * as well as numbered/bulleted lists:
35
  *
36
+ * 1. What is ... ?
37
+ * - How does ... ?
38
+ * * Why is ... ?
39
  */
40
+ function extractQuestionsFromText(rawText) {
41
+ if (!rawText || typeof rawText !== 'string') return [];
 
42
 
43
+ // Strip trivial XML/HTML-ish tags like <analysis>, <reasoning>, etc.
44
+ const stripped = rawText.replace(/<\/?[a-zA-Z0-9_:-]+>/g, ' ');
45
+
46
+ const lines = stripped
47
+ .split(/\r?\n/)
48
+ .map((l) => l.trim())
49
+ .filter(Boolean);
50
+
51
+ const questions = [];
52
+
53
+ for (const line of lines) {
54
+ // Must contain a question mark somewhere
55
+ if (!line.includes('?')) continue;
56
+
57
+ // Common prefixes: "1. ", "1) ", "- ", "* "
58
+ const cleaned = line.replace(/^(?:\d+\s*[.)]\s*|[-*]\s*)/, '').trim();
59
+
60
+ // Take up to the first '?' as the end of the question
61
+ const qPart = cleaned.split('?')[0].trim();
62
+ if (!qPart) continue;
63
+
64
+ const q = (qPart + '?').trim();
65
+
66
+ // Filter out tiny or degenerate things
67
+ if (q.length < 10) continue;
68
+ if (!/[a-zA-Z]/.test(q)) continue;
69
+
70
+ questions.push(q);
71
  }
72
 
73
+ // If we didn't find anything line-based, optional fallback:
74
+ // try to split the whole text by '?' and recover sentence-like chunks.
75
+ if (questions.length === 0) {
76
+ const segments = stripped.split('?');
77
+ for (let i = 0; i < segments.length - 1; i++) {
78
+ const seg = segments[i].trim();
79
+ if (!seg) continue;
80
+ // Consider only reasonable-length segments
81
+ if (seg.length < 10) continue;
82
+ const candidate = seg + '?';
83
+ if (!/[a-zA-Z]/.test(candidate)) continue;
84
+ questions.push(candidate);
 
 
 
 
85
  }
 
 
86
  }
87
 
88
+ // Deduplicate while preserving order
89
+ const seen = new Set();
90
+ const deduped = [];
91
+ for (const q of questions) {
92
+ if (seen.has(q)) continue;
93
+ seen.add(q);
94
+ deduped.push(q);
 
 
 
 
95
  }
96
 
97
+ return deduped;
98
+ }
99
+
100
+ /**
101
+ * Core helper: take raw model string and return:
102
+ * {
103
+ * questions: string[],
104
+ * raw: string,
105
+ * parsed: any | { error: 'invalid_json', rawSnippet?: string }
106
+ * }
107
+ *
108
+ * - Tries JSON first: { questions: [...] } or [...] array root.
109
+ * - If JSON fails, falls back to text-based extraction.
110
+ */
111
+ export function parseQuestionResponse(raw, { maxQuestions } = {}) {
112
+ const result = {
113
+ questions: [],
114
+ raw: raw ?? '',
115
+ parsed: null,
116
+ };
117
+
118
+ if (!raw || typeof raw !== 'string') {
119
+ result.parsed = { error: 'empty_response' };
120
+ return result;
121
  }
122
 
123
+ const parsed = tryParseJson(raw);
124
+ if (parsed != null) {
125
+ result.parsed = parsed;
126
+
127
+ // Case 1: { questions: [...] }
128
+ if (
129
+ parsed &&
130
+ typeof parsed === 'object' &&
131
+ Array.isArray(parsed.questions)
132
+ ) {
133
+ const qs = parsed.questions
134
+ .map((q) => (typeof q === 'string' ? q.trim() : ''))
135
+ .filter((q) => q && q.endsWith('?'));
136
+ result.questions = maxQuestions
137
+ ? qs.slice(0, maxQuestions)
138
+ : qs;
139
+ return result;
140
+ }
141
+
142
+ // Case 2: array root
143
+ if (Array.isArray(parsed)) {
144
+ const qs = parsed
145
+ .map((item) => {
146
+ if (typeof item === 'string') return item.trim();
147
+ if (item && typeof item === 'object') {
148
+ if (typeof item.question === 'string') {
149
+ return item.question.trim();
150
+ }
151
+ if (typeof item.question_text === 'string') {
152
+ return item.question_text.trim();
153
+ }
154
+ }
155
+ return '';
156
+ })
157
+ .filter((q) => q && q.endsWith('?'));
158
+ result.questions = maxQuestions
159
+ ? qs.slice(0, maxQuestions)
160
+ : qs;
161
+ return result;
162
+ }
163
+
164
+ // Parsed JSON but not in a recognized shape
165
+ result.parsed = {
166
+ error: 'unrecognized_json_shape',
167
+ rawSnippet: preview(raw, 200),
168
+ };
169
+ } else {
170
+ // Not valid JSON at all
171
+ result.parsed = {
172
+ error: 'invalid_json',
173
+ rawSnippet: preview(raw, 200),
174
+ };
175
+ }
176
+
177
+ // Fallback: extract questions from plain text
178
+ const textQs = extractQuestionsFromText(raw);
179
+ result.questions = maxQuestions
180
+ ? textQs.slice(0, maxQuestions)
181
+ : textQs;
182
+
183
+ return result;
184
  }
185
 
186
  /**
187
+ * High-level helper used by the pipeline:
 
 
 
 
 
188
  *
189
+ * const { questions, raw, parsed } = await runQuestionGenerator(contextText, provider, { maxQuestions })
 
 
 
 
 
 
190
  */
191
  export async function runQuestionGenerator(
192
  contextText,
193
  provider,
194
+ { maxQuestions = 5 } = {},
195
  ) {
196
+ if (!provider || typeof provider.generate !== 'function') {
197
+ throw new Error('Question provider must implement .generate(prompt)');
198
+ }
199
 
200
+ if (!contextText || !contextText.trim()) {
201
+ return { questions: [], raw: '', parsed: { error: 'empty_context' } };
202
+ }
203
 
204
+ // Minimal built-in prompt; if you have a richer prompt file, you can
205
+ // load it and inject {{CONTEXT}} before calling provider.generate.
206
+ const prompt = [
207
+ 'You are a question generation assistant.',
208
+ '',
209
+ 'You will be given a chunk of spiritual teaching text as CONTEXT.',
210
+ 'Generate diverse, high-quality questions that:',
211
+ '- are answerable from the context only,',
212
+ '- require some thinking, not just copying a sentence,',
213
+ '- are phrased as clear, direct questions.',
214
+ '',
215
+ 'Return either:',
216
+ '- JSON: { "questions": ["Q1?", "Q2?", ...] }',
217
+ ' or an array of question-like objects/strings; OR',
218
+ '- Plain text with one question per line.',
219
+ '',
220
+ '---',
221
+ 'CONTEXT:',
222
+ contextText,
223
+ '---',
224
+ ].join('\n');
225
 
226
  const raw = await provider.generate(prompt);
227
+ const parsed = parseQuestionResponse(raw, { maxQuestions });
228
 
229
+ return parsed;
 
 
 
 
 
 
 
 
230
  }
 
 
 
 
src/retrieval/jsonl_chunks.mjs ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // src/retrieval/jsonl_chunks.mjs
2
+ import fs from 'fs/promises';
3
+ import path from 'path';
4
+ import crypto from 'crypto';
5
+ import { PROJECT_ROOT } from '../pipeline/util.mjs';
6
+
7
+ const DEFAULT_RAG_PATH = path.join(
8
+ PROJECT_ROOT,
9
+ 'data',
10
+ 'rag_chunks.jsonl',
11
+ );
12
+
13
+ // simple in-memory cache of parsed chunks
14
+ let cachedChunks = null;
15
+
16
+ /**
17
+ * Parse rag_chunks.jsonl into an array of
18
+ * { id, content, source } records.
19
+ *
20
+ * We are deliberately tolerant about field names so this works
21
+ * with different builders:
22
+ * - content: obj.content || obj.text || obj.chunk || ''
23
+ * - id: obj.id || obj.session_key || obj.title || `jsonl-${idx}`
24
+ * - source: whole original object
25
+ */
26
+ async function loadAllChunksFromJsonl(filePath = DEFAULT_RAG_PATH) {
27
+ if (cachedChunks) return cachedChunks;
28
+
29
+ const absPath = path.isAbsolute(filePath)
30
+ ? filePath
31
+ : path.join(PROJECT_ROOT, filePath);
32
+
33
+ const raw = await fs.readFile(absPath, 'utf8');
34
+ const lines = raw
35
+ .split('\n')
36
+ .map((l) => l.trim())
37
+ .filter(Boolean);
38
+
39
+ const chunks = lines.map((line, idx) => {
40
+ let obj;
41
+ try {
42
+ obj = JSON.parse(line);
43
+ } catch (e) {
44
+ // Skip bad lines instead of exploding
45
+ return null;
46
+ }
47
+
48
+ const content =
49
+ obj.content ||
50
+ obj.text ||
51
+ obj.chunk ||
52
+ obj.body ||
53
+ '';
54
+
55
+ const id =
56
+ obj.id ||
57
+ obj.session_key ||
58
+ obj.title ||
59
+ `jsonl-${idx}`;
60
+
61
+ return {
62
+ id,
63
+ content,
64
+ source: obj,
65
+ };
66
+ });
67
+
68
+ cachedChunks = chunks.filter(Boolean);
69
+ return cachedChunks;
70
+ }
71
+
72
+ /**
73
+ * Hardware-random sampling without replacement using crypto.randomInt.
74
+ * `k >= n` ⇒ returns full array.
75
+ */
76
+ function sampleWithoutReplacement(arr, k) {
77
+ const n = arr.length;
78
+ if (k == null || k >= n) return arr.slice();
79
+
80
+ const chosen = new Set();
81
+ const out = [];
82
+
83
+ while (out.length < k && chosen.size < n) {
84
+ const idx = crypto.randomInt(0, n);
85
+ if (chosen.has(idx)) continue;
86
+ chosen.add(idx);
87
+ out.push(arr[idx]);
88
+ }
89
+
90
+ return out;
91
+ }
92
+
93
+ /**
94
+ * Public API: load RAG chunks for pipeline seeding.
95
+ *
96
+ * @param {number|undefined} limit Max chunks to return
97
+ * @param {string|undefined} filePath Override path (defaults to env or data/rag_chunks.jsonl)
98
+ * @returns {Promise<Array<{id, content, source}>>}
99
+ */
100
+ export async function loadRagChunks(limit, filePath) {
101
+ const envPath = process.env.RAG_CHUNKS_PATH;
102
+ const chunks = await loadAllChunksFromJsonl(filePath || envPath || DEFAULT_RAG_PATH);
103
+ if (!chunks || chunks.length === 0) return [];
104
+ return sampleWithoutReplacement(chunks, limit ?? chunks.length);
105
+ }
src/retrieval/retrieval.mjs CHANGED
@@ -143,8 +143,14 @@ Do NOT include JSON or formatting.
143
  // ----------------------------------------
144
  // Chunk sampling from ES (for QG pipeline)
145
  // ----------------------------------------
146
- export async function fetchChunksFromIndex(limit = 10) {
147
- const size = Number.isFinite(limit) ? limit : 10;
 
 
 
 
 
 
148
 
149
  const res = await client.search({
150
  index: ES_INDEX,
@@ -176,4 +182,3 @@ export default {
176
  hydeHybrid,
177
  fetchChunksFromIndex,
178
  };
179
-
 
143
  // ----------------------------------------
144
  // Chunk sampling from ES (for QG pipeline)
145
  // ----------------------------------------
146
+ const DEFAULT_CHUNK_LIMIT = Number(
147
+ process.env.PIPELINE_CHUNK_LIMIT ||
148
+ process.env.RETRIEVAL_CHUNK_LIMIT ||
149
+ '100',
150
+ );
151
+
152
+ export async function fetchChunksFromIndex(limit) {
153
+ const size = Number.isFinite(limit) ? limit : DEFAULT_CHUNK_LIMIT;
154
 
155
  const res = await client.search({
156
  index: ES_INDEX,
 
182
  hydeHybrid,
183
  fetchChunksFromIndex,
184
  };
 
tests/llm_stage.test.mjs ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // tests/llm_stage.test.mjs
2
+ import { describe, it, expect, vi } from 'vitest';
3
+ import { runLLMStage } from '../src/core/llm_stage.mjs';
4
+
5
+ describe('runLLMStage (core LLM wrapper)', () => {
6
+ it('fills the template, calls provider.generate, and returns raw + prompt', async () => {
7
+ const fakeProvider = {
8
+ generate: vi.fn(async (prompt) => `ECHO::${prompt.slice(0, 40)}`),
9
+ };
10
+
11
+ const question = 'What is love?';
12
+ const context = 'Love is the field in which all beings move.';
13
+
14
+ const { raw, prompt } = await runLLMStage({
15
+ stage: 'generator',
16
+ // Use an existing prompt template so we’re exercising real I/O
17
+ template: 'prompts/generator_prompt.txt',
18
+ vars: {
19
+ QUESTION: question,
20
+ CONTEXT: context,
21
+ },
22
+ provider: fakeProvider, // avoid hitting Ollama in tests
23
+ verbose: false,
24
+ logger: { log: () => {} },
25
+ });
26
+
27
+ // Provider was called exactly once
28
+ expect(fakeProvider.generate).toHaveBeenCalledTimes(1);
29
+
30
+ const calledPrompt = fakeProvider.generate.mock.calls[0][0];
31
+
32
+ // The rendered prompt should contain our substituted vars
33
+ expect(calledPrompt).toContain(question);
34
+ expect(calledPrompt).toContain(context);
35
+
36
+ // Returned prompt should match what we sent to the provider
37
+ expect(prompt).toBe(calledPrompt);
38
+
39
+ // raw should be whatever the provider returned
40
+ expect(raw).toBe(`ECHO::${calledPrompt.slice(0, 40)}`);
41
+ });
42
+
43
+ it('throws if provider has no generate() method', async () => {
44
+ await expect(
45
+ runLLMStage({
46
+ stage: 'generator',
47
+ template: 'prompts/generator_prompt.txt',
48
+ vars: {
49
+ QUESTION: 'Test?',
50
+ CONTEXT: 'Some context',
51
+ },
52
+ provider: {}, // missing generate()
53
+ verbose: false,
54
+ logger: { log: () => {} },
55
+ }),
56
+ ).rejects.toThrow(/generate/i);
57
+ });
58
+ });
tests/ollama_provider_reasoning.test.mjs ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // tests/ollama_provider_reasoning.test.mjs
2
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
3
+
4
+ let originalFetch;
5
+ let originalEnv;
6
+
7
+ beforeEach(() => {
8
+ originalFetch = globalThis.fetch;
9
+ // shallow clone is fine for test mutations
10
+ originalEnv = { ...process.env };
11
+ });
12
+
13
+ afterEach(() => {
14
+ globalThis.fetch = originalFetch;
15
+ process.env = originalEnv;
16
+ });
17
+
18
+ describe('OllamaProvider reasoning flag', () => {
19
+ it('adds options.reasoning=true when OLLAMA_REASONING is enabled', async () => {
20
+ // Arrange env *before* importing the module (ENABLE_REASONING is computed at import time)
21
+ process.env.OLLAMA_REASONING = 'true';
22
+ process.env.OLLAMA_URL = 'http://ollama.local';
23
+ process.env.GENERATOR_MODEL = 'test-model';
24
+
25
+ const fetchMock = vi.fn(async () => ({
26
+ ok: true,
27
+ json: async () => ({ response: 'ok' }),
28
+ }));
29
+ globalThis.fetch = fetchMock;
30
+
31
+ // Dynamic import so env is read fresh for this test
32
+ const { OllamaProvider } = await import(
33
+ '../src/providers/ollama_provider.mjs'
34
+ );
35
+
36
+ const provider = new OllamaProvider({ model: 'test-model' });
37
+
38
+ // Act
39
+ await provider.generate('Hello, world');
40
+
41
+ // Assert
42
+ expect(fetchMock).toHaveBeenCalledTimes(1);
43
+ const [url, options] = fetchMock.mock.calls[0];
44
+
45
+ expect(url).toMatch(/\/api\/generate$/);
46
+
47
+ const body = JSON.parse(options.body);
48
+
49
+ expect(body).toHaveProperty('model', 'test-model');
50
+ expect(body).toHaveProperty('prompt');
51
+ expect(body).toHaveProperty('options');
52
+ expect(body.options).toHaveProperty('reasoning', true);
53
+ });
54
+ });
tests/pipeline_behaviour.test.mjs ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import os from 'os';
3
+ import path from 'path';
4
+
5
+ describe('runPipelineStep retrieval modes', () => {
6
+ beforeEach(() => {
7
+ vi.resetModules();
8
+ });
9
+
10
+ afterEach(() => {
11
+ vi.restoreAllMocks();
12
+ vi.unmock('../src/retrieval/retrieval.mjs');
13
+ vi.unmock('../src/providers/provider.mjs');
14
+ vi.unmock('../src/generator/generator_core.mjs');
15
+ vi.unmock('../src/verifier/verifier_core.mjs');
16
+ vi.unmock('../src/reward/reward_core.mjs');
17
+ });
18
+
19
+ async function setupRetrievalMocks() {
20
+ const hybridSearch = vi.fn(async () => [{ content: 'hybrid' }]);
21
+ const bm25Search = vi.fn(async () => [{ content: 'bm25' }]);
22
+ const vectorSearch = vi.fn(async () => [{ content: 'vector' }]);
23
+ const hydeHybrid = vi.fn(async () => [{ content: 'hyde' }]);
24
+
25
+ vi.doMock('../src/retrieval/retrieval.mjs', () => ({
26
+ hybridSearch,
27
+ bm25Search,
28
+ vectorSearch,
29
+ hydeHybrid,
30
+ }));
31
+
32
+ vi.doMock('../src/providers/provider.mjs', () => ({
33
+ loadProviderFor: () => ({ generate: async () => 'ok' }),
34
+ }));
35
+
36
+ vi.doMock('../src/generator/generator_core.mjs', () => ({
37
+ runGenerator: vi.fn(async () => ({ answer: 'a', raw: 'a' })),
38
+ }));
39
+ vi.doMock('../src/verifier/verifier_core.mjs', () => ({
40
+ runVerifier: vi.fn(async () => ({ ok: true, raw: 'yes' })),
41
+ }));
42
+ vi.doMock('../src/reward/reward_core.mjs', () => ({
43
+ runReward: vi.fn(async () => ({ ok: true, score: 1, raw: '1' })),
44
+ }));
45
+
46
+ const mod = await import('../src/pipeline/step.mjs');
47
+ return { hybridSearch, bm25Search, vectorSearch, hydeHybrid, runPipelineStep: mod.runPipelineStep };
48
+ }
49
+
50
+ it('uses bm25 when retrievalMode=bm25', async () => {
51
+ const mocks = await setupRetrievalMocks();
52
+
53
+ await mocks.runPipelineStep({ question: 'q', retrievalMode: 'bm25' });
54
+
55
+ expect(mocks.bm25Search).toHaveBeenCalledTimes(1);
56
+ expect(mocks.vectorSearch).not.toHaveBeenCalled();
57
+ expect(mocks.hybridSearch).not.toHaveBeenCalled();
58
+ expect(mocks.hydeHybrid).not.toHaveBeenCalled();
59
+ });
60
+
61
+ it('uses hyde when retrievalMode=hyde', async () => {
62
+ const mocks = await setupRetrievalMocks();
63
+
64
+ await mocks.runPipelineStep({ question: 'q', retrievalMode: 'hyde' });
65
+
66
+ expect(mocks.hydeHybrid).toHaveBeenCalledTimes(1);
67
+ expect(mocks.bm25Search).not.toHaveBeenCalled();
68
+ expect(mocks.vectorSearch).not.toHaveBeenCalled();
69
+ expect(mocks.hybridSearch).not.toHaveBeenCalled();
70
+ });
71
+ });
72
+
73
+ describe('runPipelineBatch question cap', () => {
74
+ beforeEach(() => {
75
+ vi.resetModules();
76
+ process.env.PIPELINE_CHUNK_SOURCE = 'jsonl';
77
+ });
78
+
79
+ afterEach(() => {
80
+ delete process.env.PIPELINE_CHUNK_SOURCE;
81
+ vi.restoreAllMocks();
82
+ vi.unmock('../src/providers/provider.mjs');
83
+ vi.unmock('../src/retrieval/jsonl_chunks.mjs');
84
+ vi.unmock('../src/pipeline/step.mjs');
85
+ });
86
+
87
+ it('stops once the question limit is reached in question-first mode', async () => {
88
+ const questionsPerChunk = 'Q1?\nQ2?\nQ3?';
89
+
90
+ // Mock question provider + retrieval
91
+ vi.doMock('../src/providers/provider.mjs', () => ({
92
+ loadProviderFor: (stage) =>
93
+ stage === 'question'
94
+ ? { generate: async () => questionsPerChunk }
95
+ : { generate: async () => '' },
96
+ }));
97
+
98
+ // Force question generator to return a fixed list to avoid parser variability
99
+ const runQuestionGenerator = vi.fn(async () => ({
100
+ questions: ['Q1?', 'Q2?', 'Q3?'],
101
+ raw: questionsPerChunk,
102
+ parsed: null,
103
+ }));
104
+ vi.doMock('../src/question/question_core.mjs', () => ({ runQuestionGenerator }));
105
+
106
+ vi.doMock('../src/retrieval/jsonl_chunks.mjs', () => ({
107
+ loadRagChunks: vi.fn(async (limit) =>
108
+ Array.from({ length: limit ?? 3 }, (_, i) => ({
109
+ id: `c-${i}`,
110
+ content: `chunk ${i}`,
111
+ })),
112
+ ),
113
+ }));
114
+
115
+ // Stub runPipelineStep to avoid model calls; count invocations
116
+ const runPipelineStep = vi.fn(async () => ({
117
+ status: 'accepted',
118
+ context: [],
119
+ gen: { answer: 'a' },
120
+ ver: { ok: true },
121
+ rew: { ok: true },
122
+ }));
123
+ vi.doMock('../src/pipeline/step.mjs', () => ({ runPipelineStep }));
124
+
125
+ const outPath = path.join(os.tmpdir(), `test-gold-${Date.now()}.jsonl`);
126
+ const { runPipelineBatch } = await import('../src/pipeline/batch.mjs');
127
+ const result = await runPipelineBatch({
128
+ limit: 2, // question cap
129
+ chunkLimit: 3,
130
+ seedMode: 'question-first',
131
+ outPath,
132
+ verbose: false,
133
+ logger: { log() {}, error() {} },
134
+ });
135
+
136
+ expect(result.processed).toBe(2);
137
+ expect(runPipelineStep).toHaveBeenCalledTimes(2);
138
+ expect(result.accepted).toBe(2);
139
+ });
140
+ });
141
+
142
+ describe('fetchChunksFromIndex default size', () => {
143
+ beforeEach(() => {
144
+ vi.resetModules();
145
+ vi.unmock('../src/retrieval/retrieval.mjs');
146
+ });
147
+
148
+ afterEach(() => {
149
+ vi.restoreAllMocks();
150
+ });
151
+
152
+ it('uses a larger default than 10 when no limit is provided', async () => {
153
+ let capturedOpts = null;
154
+
155
+ vi.unmock('../src/retrieval/retrieval.mjs');
156
+
157
+ vi.doMock('@elastic/elasticsearch', () => ({
158
+ Client: vi.fn().mockImplementation(() => ({
159
+ search: vi.fn(async (opts) => {
160
+ capturedOpts = opts;
161
+ return { hits: { hits: [] } };
162
+ }),
163
+ })),
164
+ }));
165
+
166
+ // avoid accidental network fetch calls in this test
167
+ vi.doMock('node-fetch', () => ({
168
+ default: vi.fn(async () => ({ ok: true, json: async () => ({ embedding: [] }) })),
169
+ }));
170
+
171
+ // Re-export the real module so the fetch function exists, but with mocked deps above.
172
+ vi.doMock('../src/retrieval/retrieval.mjs', async (importOriginal) => {
173
+ const actual = await importOriginal();
174
+ return { ...actual };
175
+ });
176
+
177
+ const { fetchChunksFromIndex } = await import('../src/retrieval/retrieval.mjs');
178
+ await fetchChunksFromIndex();
179
+
180
+ expect(capturedOpts?.size).toBe(100);
181
+ });
182
+ });