htaf commited on
Commit
496163b
·
1 Parent(s): 9af7af9

all tests pass

Browse files
package.json CHANGED
@@ -3,7 +3,8 @@
3
  "version": "1.0.0",
4
  "type": "module",
5
  "scripts": {
6
- "test": "vitest --run"
 
7
  },
8
  "devDependencies": {
9
  "vitest": "^1.6.0"
 
3
  "version": "1.0.0",
4
  "type": "module",
5
  "scripts": {
6
+ "test": "vitest --run",
7
+ "pipeline": "node ./src/pipeline/pipeline_cli.js"
8
  },
9
  "devDependencies": {
10
  "vitest": "^1.6.0"
src/generator/generator_core.mjs CHANGED
@@ -1,4 +1,4 @@
1
- // generat../generator/generator_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
 
1
+ // src/generator/generator_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
src/pipeline/pipeline.mjs ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // src/pipeline/pipeline.mjs
2
+ import fs from 'fs/promises';
3
+ import path from 'path';
4
+ import { fileURLToPath } from 'url';
5
+
6
+ import { loadProvider } from '../providers/provider.mjs';
7
+ import { hybridSearch } from '../retrieval/retrieval.mjs';
8
+ import { runGenerator } from '../generator/generator_core.mjs';
9
+ import { runVerifier } from '../verifier/verifier_core.mjs';
10
+ import { runReward } from '../reward/reward_core.mjs';
11
+
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+
15
+ const PROJECT_ROOT = path.join(__dirname, '..', '..');
16
+
17
+ const DEFAULT_SEEDS_PATH = path.join(
18
+ PROJECT_ROOT,
19
+ 'test_samples',
20
+ 'seed_questions.jsonl',
21
+ );
22
+
23
+ const DEFAULT_OUT_PATH = path.join(
24
+ PROJECT_ROOT,
25
+ 'gold',
26
+ 'pipeline_gold.jsonl',
27
+ );
28
+
29
+ /**
30
+ * Load JSONL seed questions.
31
+ * Each line may be:
32
+ * - { "question": "..." }
33
+ * - { "prompt": "..." }
34
+ * - { "text": "..." }
35
+ * - or just a raw string
36
+ */
37
+ export async function loadSeedQuestions(seedsPath = DEFAULT_SEEDS_PATH) {
38
+ const txt = await fs.readFile(seedsPath, 'utf8');
39
+ return txt
40
+ .split('\n')
41
+ .map((l) => l.trim())
42
+ .filter(Boolean)
43
+ .map((line) => JSON.parse(line));
44
+ }
45
+
46
+ /**
47
+ * Extract a question string from a seed record.
48
+ */
49
+ export function seedToQuestion(seed) {
50
+ if (typeof seed === 'string') return seed;
51
+ return seed.question || seed.prompt || seed.text || '';
52
+ }
53
+
54
+ /**
55
+ * Run a single pipeline step for one question.
56
+ *
57
+ * Orchestrates:
58
+ * retrieval → generator → verifier → reward
59
+ *
60
+ * Returns a structured result:
61
+ * {
62
+ * status: 'accepted' | 'generator_failed' | 'verifier_rejected' | 'reward_rejected',
63
+ * question,
64
+ * context,
65
+ * gen,
66
+ * ver,
67
+ * rew,
68
+ * }
69
+ */
70
+ export async function runPipelineStep({
71
+ question,
72
+ provider,
73
+ retrievalMode = process.env.RETRIEVAL_MODE || 'hybrid',
74
+ k = Number(process.env.RETRIEVAL_K || '6'),
75
+ }) {
76
+ if (!question || !question.trim()) {
77
+ return { status: 'invalid_question', question };
78
+ }
79
+
80
+ const prov = provider || loadProvider();
81
+
82
+ // --- Retrieval ---
83
+ let context = [];
84
+ if (retrievalMode === 'hybrid' || !retrievalMode) {
85
+ context = await hybridSearch(question, k);
86
+ } else {
87
+ // for now, other modes can be added later
88
+ context = await hybridSearch(question, k);
89
+ }
90
+
91
+ // --- Generator ---
92
+ const gen = await runGenerator(question, context, prov);
93
+ if (!gen || gen.ok === false) {
94
+ return { status: 'generator_failed', question, context, gen };
95
+ }
96
+
97
+ // --- Verifier ---
98
+ const ver = await runVerifier(gen, prov);
99
+ if (!ver || ver.ok === false) {
100
+ return { status: 'verifier_rejected', question, context, gen, ver };
101
+ }
102
+
103
+ // --- Reward ---
104
+ const rew = await runReward(gen, prov);
105
+ if (!rew || rew.ok === false) {
106
+ return { status: 'reward_rejected', question, context, gen, ver, rew };
107
+ }
108
+
109
+ return {
110
+ status: 'accepted',
111
+ question,
112
+ context,
113
+ gen,
114
+ ver,
115
+ rew,
116
+ };
117
+ }
118
+
119
+ /**
120
+ * Append a single accepted record to a JSONL file.
121
+ */
122
+ export async function appendGoldRecord(outPath, record) {
123
+ const line = JSON.stringify(record) + '\n';
124
+ await fs.mkdir(path.dirname(outPath), { recursive: true });
125
+ await fs.appendFile(outPath, line, 'utf8');
126
+ }
127
+
128
+ /**
129
+ * Run the pipeline over a batch of seed questions and write accepted
130
+ * samples to a JSONL file.
131
+ *
132
+ * Options:
133
+ * - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
134
+ * - outPath: output JSONL (defaults to gold/pipeline_gold.jsonl)
135
+ * - limit: max number of seeds to process
136
+ *
137
+ * Returns:
138
+ * { total, processed, accepted, outPath }
139
+ */
140
+ export async function runPipelineBatch({
141
+ seedsPath = DEFAULT_SEEDS_PATH,
142
+ outPath = DEFAULT_OUT_PATH,
143
+ limit,
144
+ } = {}) {
145
+ const provider = loadProvider();
146
+ const seeds = await loadSeedQuestions(seedsPath);
147
+ const max = typeof limit === 'number' ? limit : seeds.length;
148
+
149
+ let processed = 0;
150
+ let accepted = 0;
151
+
152
+ for (const seed of seeds.slice(0, max)) {
153
+ const question = seedToQuestion(seed);
154
+ const result = await runPipelineStep({ question, provider });
155
+
156
+ processed += 1;
157
+
158
+ if (result.status === 'accepted') {
159
+ const record = {
160
+ question,
161
+ context: result.context,
162
+ sample: result.gen, // treat generator result as opaque sample
163
+ verifier: result.ver,
164
+ reward: result.rew,
165
+ };
166
+
167
+ await appendGoldRecord(outPath, record);
168
+ accepted += 1;
169
+ }
170
+ }
171
+
172
+ return {
173
+ total: seeds.length,
174
+ processed,
175
+ accepted,
176
+ outPath,
177
+ };
178
+ }
src/pipeline/pipeline_cli.js ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+ // src/pipeline/pipeline_cli.js
3
+
4
+ import { fileURLToPath } from 'url';
5
+ import path from 'path';
6
+ import fs from 'fs/promises';
7
+
8
+ import { loadProvider } from '../providers/provider.mjs';
9
+ import {
10
+ loadSeedQuestions,
11
+ seedToQuestion,
12
+ runPipelineStep,
13
+ appendGoldRecord,
14
+ runPipelineBatch
15
+ } from './pipeline.mjs';
16
+
17
+ //
18
+ // ---- CLI Helpers ----
19
+ //
20
+ const __filename = fileURLToPath(import.meta.url);
21
+ const __dirname = path.dirname(__filename);
22
+
23
+ function parseArgs() {
24
+ const args = process.argv.slice(2);
25
+ const out = {};
26
+
27
+ for (let i = 0; i < args.length; i++) {
28
+ const a = args[i];
29
+
30
+ if (a === '--seeds' || a === '-s') {
31
+ out.seeds = args[++i];
32
+ } else if (a === '--out' || a === '-o') {
33
+ out.out = args[++i];
34
+ } else if (a === '--limit' || a === '-l') {
35
+ out.limit = Number(args[++i]);
36
+ } else if (a === '--provider' || a === '-p') {
37
+ out.provider = args[++i];
38
+ } else if (a === '--help' || a === '-h') {
39
+ out.help = true;
40
+ }
41
+ }
42
+
43
+ return out;
44
+ }
45
+
46
+ function showHelp() {
47
+ console.log(`
48
+ distill-pipeline — Full Distillation Cycle Runner
49
+
50
+ Usage:
51
+ node pipeline_cli.js [options]
52
+
53
+ Options:
54
+ --seeds <file> Path to seed JSONL file (default: test_samples/seed_questions.jsonl)
55
+ --out <file> Path to output JSONL (default: gold/pipeline_gold.jsonl)
56
+ --limit <n> Max number of seeds to process
57
+ --provider <name> Provider override: ollama | openai | http
58
+ -h, --help Show this help
59
+
60
+ Examples:
61
+ node pipeline_cli.js --limit 25
62
+ node pipeline_cli.js -s custom_seeds.jsonl -o gold/round1.jsonl
63
+ `);
64
+ }
65
+
66
+ //
67
+ // ---- MAIN ----
68
+ //
69
+ async function main() {
70
+ const args = parseArgs();
71
+
72
+ if (args.help) {
73
+ showHelp();
74
+ process.exit(0);
75
+ }
76
+
77
+ // ---- Resolve paths ----
78
+ const seedsPath = args.seeds
79
+ ? path.resolve(args.seeds)
80
+ : path.resolve(__dirname, '../../test_samples/seed_questions.jsonl');
81
+
82
+ const outPath = args.out
83
+ ? path.resolve(args.out)
84
+ : path.resolve(__dirname, '../../gold/pipeline_gold.jsonl');
85
+
86
+ const limit = args.limit ?? undefined;
87
+
88
+ // ---- Provider override ----
89
+ if (args.provider) {
90
+ process.env.PROVIDER_TYPE = args.provider;
91
+ }
92
+
93
+ // ---- Announce run ----
94
+ console.log(`\n🚀 Starting Distillation Pipeline`);
95
+ console.log(` Seeds: ${seedsPath}`);
96
+ console.log(` Output: ${outPath}`);
97
+ console.log(` Provider: ${process.env.PROVIDER_TYPE || 'ollama (default)'}`);
98
+ console.log(` Limit: ${limit ?? 'none'}\n`);
99
+
100
+ // ---- Run batch ----
101
+ const result = await runPipelineBatch({
102
+ seedsPath,
103
+ outPath,
104
+ limit,
105
+ });
106
+
107
+ console.log(`\n🎉 Pipeline completed`);
108
+ console.log(` Total Seeds: ${result.total}`);
109
+ console.log(` Processed: ${result.processed}`);
110
+ console.log(` Accepted: ${result.accepted}`);
111
+ console.log(` Written to: ${result.outPath}\n`);
112
+ }
113
+
114
+ main().catch((err) => {
115
+ console.error(`❌ Pipeline error:`, err);
116
+ process.exit(1);
117
+ });
src/providers/http_provider.mjs CHANGED
@@ -1,33 +1,26 @@
 
1
  import { BaseProvider } from './base.mjs';
2
 
3
  export class HttpProvider extends BaseProvider {
4
  constructor() {
5
  super();
6
- this.url = process.env.HTTP_PROVIDER_URL || "http://localhost:8000/generate";
7
- this.model = process.env.GENERATOR_MODEL || "qwen2.5-7b-instruct";
8
  }
9
 
10
- async generate(prompt, opts = {}) {
11
- const body = {
12
- model: this.model,
13
- prompt,
14
- ...opts
15
- };
16
-
17
  const resp = await fetch(this.url, {
18
- method: "POST",
19
- headers: { "Content-Type": "application/json" },
20
- body: JSON.stringify(body)
21
  });
22
 
 
 
 
 
23
  const data = await resp.json();
24
 
25
- return (
26
- data.output ||
27
- data.response ||
28
- data.text ||
29
- data.result ||
30
- ""
31
- );
32
  }
33
  }
 
1
+ // src/providers/http_provider.mjs
2
  import { BaseProvider } from './base.mjs';
3
 
4
  export class HttpProvider extends BaseProvider {
5
  constructor() {
6
  super();
7
+ this.url = process.env.HTTP_PROVIDER_URL;
 
8
  }
9
 
10
+ async generate(prompt) {
 
 
 
 
 
 
11
  const resp = await fetch(this.url, {
12
+ method: 'POST',
13
+ headers: { 'Content-Type': 'application/json' },
14
+ body: JSON.stringify({ prompt })
15
  });
16
 
17
+ if (!resp.ok) {
18
+ throw new Error(`HttpProvider error: ${resp.status}`);
19
+ }
20
+
21
  const data = await resp.json();
22
 
23
+ // Expect: { output: "<json string>" }
24
+ return data.output || data.response || '';
 
 
 
 
 
25
  }
26
  }
src/providers/ollama_provider.mjs CHANGED
@@ -1,31 +1,31 @@
 
1
  import { BaseProvider } from './base.mjs';
2
 
3
  export class OllamaProvider extends BaseProvider {
4
  constructor() {
5
  super();
6
- this.url = process.env.OLLAMA_URL || "http://localhost:11434/api/generate";
7
- this.model = process.env.GENERATOR_MODEL || "qwen2.5-7b-instruct";
8
  }
9
 
10
- async generate(prompt, opts = {}) {
11
- const body = {
12
- model: this.model,
13
- prompt,
14
- stream: false,
15
- ...opts
16
- };
17
-
18
  const resp = await fetch(this.url, {
19
- method: "POST",
20
- headers: { "Content-Type": "application/json" },
21
- body: JSON.stringify(body)
 
 
 
 
22
  });
23
 
24
  if (!resp.ok) {
25
- throw new Error(`Ollama error ${resp.status}`);
26
  }
27
 
28
  const data = await resp.json();
29
- return data.response || data.output || data;
 
 
30
  }
31
  }
 
1
+ // src/providers/ollama_provider.mjs
2
  import { BaseProvider } from './base.mjs';
3
 
4
  export class OllamaProvider extends BaseProvider {
5
  constructor() {
6
  super();
7
+ this.url = process.env.OLLAMA_URL || 'http://localhost:11434/api/generate';
8
+ this.model = process.env.GENERATOR_MODEL || 'qwen3-vl:8b-thinking';
9
  }
10
 
11
+ async generate(prompt) {
 
 
 
 
 
 
 
12
  const resp = await fetch(this.url, {
13
+ method: 'POST',
14
+ headers: { 'Content-Type': 'application/json' },
15
+ body: JSON.stringify({
16
+ model: this.model,
17
+ prompt,
18
+ stream: false
19
+ })
20
  });
21
 
22
  if (!resp.ok) {
23
+ throw new Error(`OllamaProvider error: ${resp.status}`);
24
  }
25
 
26
  const data = await resp.json();
27
+
28
+ // Must return *raw string*, because generator/verifier/reward expect text they parse with JSON.parse()
29
+ return data.response;
30
  }
31
  }
src/providers/openai_provider.mjs CHANGED
@@ -1,30 +1,31 @@
 
1
  import { BaseProvider } from './base.mjs';
2
 
3
  export class OpenAIProvider extends BaseProvider {
4
  constructor() {
5
  super();
6
- this.key = process.env.OPENAI_API_KEY;
7
- this.model = process.env.GENERATOR_MODEL || "gpt-4o-mini";
8
- this.url = "https://api.openai.com/v1/chat/completions";
9
  }
10
 
11
- async generate(prompt, opts = {}) {
12
- const body = {
13
- model: this.model,
14
- messages: [{ role: "user", content: prompt }],
15
- temperature: opts.temperature ?? 0.7
16
- };
17
-
18
- const resp = await fetch(this.url, {
19
- method: "POST",
20
  headers: {
21
- "Authorization": `Bearer ${this.key}`,
22
- "Content-Type": "application/json"
23
  },
24
- body: JSON.stringify(body)
 
 
 
25
  });
26
 
 
 
 
 
27
  const data = await resp.json();
28
- return data.choices?.[0]?.message?.content ?? "";
29
  }
30
  }
 
1
+ // src/providers/openai_provider.mjs
2
  import { BaseProvider } from './base.mjs';
3
 
4
  export class OpenAIProvider extends BaseProvider {
5
  constructor() {
6
  super();
7
+ this.apiKey = process.env.OPENAI_API_KEY;
8
+ this.model = process.env.GENERATOR_MODEL || 'gpt-4o-mini';
 
9
  }
10
 
11
+ async generate(prompt) {
12
+ const resp = await fetch('https://api.openai.com/v1/chat/completions', {
13
+ method: 'POST',
 
 
 
 
 
 
14
  headers: {
15
+ 'Authorization': `Bearer ${this.apiKey}`,
16
+ 'Content-Type': 'application/json'
17
  },
18
+ body: JSON.stringify({
19
+ model: this.model,
20
+ messages: [{ role: 'user', content: prompt }]
21
+ })
22
  });
23
 
24
+ if (!resp.ok) {
25
+ throw new Error(`OpenAIProvider error: ${resp.status}`);
26
+ }
27
+
28
  const data = await resp.json();
29
+ return data.choices[0].message.content;
30
  }
31
  }
src/providers/provider.mjs CHANGED
@@ -3,6 +3,14 @@ import { OllamaProvider } from './ollama_provider.mjs';
3
  import { OpenAIProvider } from './openai_provider.mjs';
4
  import { HttpProvider } from './http_provider.mjs';
5
 
 
 
 
 
 
 
 
 
6
  export function loadProvider() {
7
  const type = (process.env.PROVIDER_TYPE || "ollama").toLowerCase();
8
 
 
3
  import { OpenAIProvider } from './openai_provider.mjs';
4
  import { HttpProvider } from './http_provider.mjs';
5
 
6
+ // Add this at bottom of provider.mjs temporarily:
7
+ export async function debugLoad() {
8
+ const p = loadProvider();
9
+ console.log('Loaded provider:', p);
10
+ console.log('typeof generate:', typeof p.generate);
11
+ }
12
+
13
+
14
  export function loadProvider() {
15
  const type = (process.env.PROVIDER_TYPE || "ollama").toLowerCase();
16
 
src/reward/reward_core.mjs CHANGED
@@ -1,4 +1,4 @@
1
- // rewa../reward/reward_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
 
1
+ // src/reward/reward_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
src/verifier/verifier_core.mjs CHANGED
@@ -1,4 +1,4 @@
1
- // verifi../verifier/verifier_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
 
1
+ // src/verifier/verifier_core.mjs
2
  import fs from 'fs/promises';
3
  import path from 'path';
4
 
tests/tests.pipeline.mock.test.mjs ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // tests/pipeline.mock.test.mjs
2
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
3
+ import fs from 'fs/promises';
4
+ import path from 'path';
5
+ import os from 'os';
6
+ import { fileURLToPath } from 'url';
7
+
8
+ // --- Mocks ---
9
+ // retrieval
10
+ vi.mock('../src/retrieval/retrieval.mjs', () => {
11
+ return {
12
+ hybridSearch: vi.fn(async (query, k) => {
13
+ return [
14
+ { id: 'c1', content: `ctx for ${query}`, score: 1.0 },
15
+ { id: 'c2', content: `more ctx for ${query}`, score: 0.9 },
16
+ ].slice(0, k);
17
+ }),
18
+ };
19
+ });
20
+
21
+ // provider
22
+ const mockProvider = {
23
+ generate: vi.fn(),
24
+ };
25
+
26
+ vi.mock('../src/providers/provider.mjs', () => {
27
+ return {
28
+ loadProvider: vi.fn(() => mockProvider),
29
+ };
30
+ });
31
+
32
+ // generator
33
+ const mockGenResult = { ok: true, sample: { answer: 'A' } };
34
+ vi.mock('../src/generator/generator_core.mjs', () => {
35
+ return {
36
+ runGenerator: vi.fn(async () => mockGenResult),
37
+ };
38
+ });
39
+
40
+ // verifier
41
+ const mockVerResult = { ok: true };
42
+ vi.mock('../src/verifier/verifier_core.mjs', () => {
43
+ return {
44
+ runVerifier: vi.fn(async () => mockVerResult),
45
+ };
46
+ });
47
+
48
+ // reward
49
+ const mockRewResult = { ok: true, score: 0.9 };
50
+ vi.mock('../src/reward/reward_core.mjs', () => {
51
+ return {
52
+ runReward: vi.fn(async () => mockRewResult),
53
+ };
54
+ });
55
+
56
+ // --- Import after mocks ---
57
+ import {
58
+ runPipelineStep,
59
+ runPipelineBatch,
60
+ loadSeedQuestions,
61
+ seedToQuestion,
62
+ appendGoldRecord,
63
+ } from '../src/pipeline/pipeline.mjs';
64
+
65
+ describe('pipeline.mjs', () => {
66
+ beforeEach(() => {
67
+ vi.clearAllMocks();
68
+ });
69
+
70
+ it('seedToQuestion extracts question from various shapes', () => {
71
+ expect(seedToQuestion('raw')).toBe('raw');
72
+ expect(seedToQuestion({ question: 'q1' })).toBe('q1');
73
+ expect(seedToQuestion({ prompt: 'q2' })).toBe('q2');
74
+ expect(seedToQuestion({ text: 'q3' })).toBe('q3');
75
+ });
76
+
77
+ it('runPipelineStep returns accepted with mocked deps', async () => {
78
+ const res = await runPipelineStep({
79
+ question: 'What is service to others?',
80
+ });
81
+
82
+ expect(res.status).toBe('accepted');
83
+ expect(res.question).toBe('What is service to others?');
84
+ expect(Array.isArray(res.context)).toBe(true);
85
+ expect(res.gen).toEqual(mockGenResult);
86
+ expect(res.ver).toEqual(mockVerResult);
87
+ expect(res.rew).toEqual(mockRewResult);
88
+ });
89
+
90
+ it('runPipelineBatch processes seeds and writes JSONL', async () => {
91
+ const tmpDir = await fs.mkdtemp(
92
+ path.join(os.tmpdir(), 'distill-pipeline-test-'),
93
+ );
94
+
95
+ const seedsPath = path.join(tmpDir, 'seeds.jsonl');
96
+ const outPath = path.join(tmpDir, 'out.jsonl');
97
+
98
+ const seedsJsonl = [
99
+ JSON.stringify({ question: 'q1' }),
100
+ JSON.stringify({ prompt: 'q2' }),
101
+ JSON.stringify('q3'),
102
+ ].join('\n');
103
+
104
+ await fs.writeFile(seedsPath, seedsJsonl + '\n', 'utf8');
105
+
106
+ const summary = await runPipelineBatch({
107
+ seedsPath,
108
+ outPath,
109
+ limit: 3,
110
+ });
111
+
112
+ expect(summary.processed).toBe(3);
113
+ expect(summary.accepted).toBe(3);
114
+ expect(summary.outPath).toBe(outPath);
115
+
116
+ const outText = await fs.readFile(outPath, 'utf8');
117
+ const lines = outText.trim().split('\n');
118
+ expect(lines.length).toBe(3);
119
+
120
+ const first = JSON.parse(lines[0]);
121
+ expect(first).toHaveProperty('question');
122
+ expect(first).toHaveProperty('context');
123
+ expect(first).toHaveProperty('sample');
124
+ expect(first).toHaveProperty('verifier');
125
+ expect(first).toHaveProperty('reward');
126
+ });
127
+ });