all tests pass
Browse files- package.json +2 -1
- src/generator/generator_core.mjs +1 -1
- src/pipeline/pipeline.mjs +178 -0
- src/pipeline/pipeline_cli.js +117 -0
- src/providers/http_provider.mjs +12 -19
- src/providers/ollama_provider.mjs +15 -15
- src/providers/openai_provider.mjs +17 -16
- src/providers/provider.mjs +8 -0
- src/reward/reward_core.mjs +1 -1
- src/verifier/verifier_core.mjs +1 -1
- tests/tests.pipeline.mock.test.mjs +127 -0
package.json
CHANGED
|
@@ -3,7 +3,8 @@
|
|
| 3 |
"version": "1.0.0",
|
| 4 |
"type": "module",
|
| 5 |
"scripts": {
|
| 6 |
-
"test": "vitest --run"
|
|
|
|
| 7 |
},
|
| 8 |
"devDependencies": {
|
| 9 |
"vitest": "^1.6.0"
|
|
|
|
| 3 |
"version": "1.0.0",
|
| 4 |
"type": "module",
|
| 5 |
"scripts": {
|
| 6 |
+
"test": "vitest --run",
|
| 7 |
+
"pipeline": "node ./src/pipeline/pipeline_cli.js"
|
| 8 |
},
|
| 9 |
"devDependencies": {
|
| 10 |
"vitest": "^1.6.0"
|
src/generator/generator_core.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
//
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
|
|
|
| 1 |
+
// src/generator/generator_core.mjs
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
src/pipeline/pipeline.mjs
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// src/pipeline/pipeline.mjs
|
| 2 |
+
import fs from 'fs/promises';
|
| 3 |
+
import path from 'path';
|
| 4 |
+
import { fileURLToPath } from 'url';
|
| 5 |
+
|
| 6 |
+
import { loadProvider } from '../providers/provider.mjs';
|
| 7 |
+
import { hybridSearch } from '../retrieval/retrieval.mjs';
|
| 8 |
+
import { runGenerator } from '../generator/generator_core.mjs';
|
| 9 |
+
import { runVerifier } from '../verifier/verifier_core.mjs';
|
| 10 |
+
import { runReward } from '../reward/reward_core.mjs';
|
| 11 |
+
|
| 12 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 13 |
+
const __dirname = path.dirname(__filename);
|
| 14 |
+
|
| 15 |
+
const PROJECT_ROOT = path.join(__dirname, '..', '..');
|
| 16 |
+
|
| 17 |
+
const DEFAULT_SEEDS_PATH = path.join(
|
| 18 |
+
PROJECT_ROOT,
|
| 19 |
+
'test_samples',
|
| 20 |
+
'seed_questions.jsonl',
|
| 21 |
+
);
|
| 22 |
+
|
| 23 |
+
const DEFAULT_OUT_PATH = path.join(
|
| 24 |
+
PROJECT_ROOT,
|
| 25 |
+
'gold',
|
| 26 |
+
'pipeline_gold.jsonl',
|
| 27 |
+
);
|
| 28 |
+
|
| 29 |
+
/**
|
| 30 |
+
* Load JSONL seed questions.
|
| 31 |
+
* Each line may be:
|
| 32 |
+
* - { "question": "..." }
|
| 33 |
+
* - { "prompt": "..." }
|
| 34 |
+
* - { "text": "..." }
|
| 35 |
+
* - or just a raw string
|
| 36 |
+
*/
|
| 37 |
+
export async function loadSeedQuestions(seedsPath = DEFAULT_SEEDS_PATH) {
|
| 38 |
+
const txt = await fs.readFile(seedsPath, 'utf8');
|
| 39 |
+
return txt
|
| 40 |
+
.split('\n')
|
| 41 |
+
.map((l) => l.trim())
|
| 42 |
+
.filter(Boolean)
|
| 43 |
+
.map((line) => JSON.parse(line));
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
/**
|
| 47 |
+
* Extract a question string from a seed record.
|
| 48 |
+
*/
|
| 49 |
+
export function seedToQuestion(seed) {
|
| 50 |
+
if (typeof seed === 'string') return seed;
|
| 51 |
+
return seed.question || seed.prompt || seed.text || '';
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
/**
|
| 55 |
+
* Run a single pipeline step for one question.
|
| 56 |
+
*
|
| 57 |
+
* Orchestrates:
|
| 58 |
+
* retrieval → generator → verifier → reward
|
| 59 |
+
*
|
| 60 |
+
* Returns a structured result:
|
| 61 |
+
* {
|
| 62 |
+
* status: 'accepted' | 'generator_failed' | 'verifier_rejected' | 'reward_rejected',
|
| 63 |
+
* question,
|
| 64 |
+
* context,
|
| 65 |
+
* gen,
|
| 66 |
+
* ver,
|
| 67 |
+
* rew,
|
| 68 |
+
* }
|
| 69 |
+
*/
|
| 70 |
+
export async function runPipelineStep({
|
| 71 |
+
question,
|
| 72 |
+
provider,
|
| 73 |
+
retrievalMode = process.env.RETRIEVAL_MODE || 'hybrid',
|
| 74 |
+
k = Number(process.env.RETRIEVAL_K || '6'),
|
| 75 |
+
}) {
|
| 76 |
+
if (!question || !question.trim()) {
|
| 77 |
+
return { status: 'invalid_question', question };
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
const prov = provider || loadProvider();
|
| 81 |
+
|
| 82 |
+
// --- Retrieval ---
|
| 83 |
+
let context = [];
|
| 84 |
+
if (retrievalMode === 'hybrid' || !retrievalMode) {
|
| 85 |
+
context = await hybridSearch(question, k);
|
| 86 |
+
} else {
|
| 87 |
+
// for now, other modes can be added later
|
| 88 |
+
context = await hybridSearch(question, k);
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
// --- Generator ---
|
| 92 |
+
const gen = await runGenerator(question, context, prov);
|
| 93 |
+
if (!gen || gen.ok === false) {
|
| 94 |
+
return { status: 'generator_failed', question, context, gen };
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// --- Verifier ---
|
| 98 |
+
const ver = await runVerifier(gen, prov);
|
| 99 |
+
if (!ver || ver.ok === false) {
|
| 100 |
+
return { status: 'verifier_rejected', question, context, gen, ver };
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// --- Reward ---
|
| 104 |
+
const rew = await runReward(gen, prov);
|
| 105 |
+
if (!rew || rew.ok === false) {
|
| 106 |
+
return { status: 'reward_rejected', question, context, gen, ver, rew };
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
return {
|
| 110 |
+
status: 'accepted',
|
| 111 |
+
question,
|
| 112 |
+
context,
|
| 113 |
+
gen,
|
| 114 |
+
ver,
|
| 115 |
+
rew,
|
| 116 |
+
};
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
/**
|
| 120 |
+
* Append a single accepted record to a JSONL file.
|
| 121 |
+
*/
|
| 122 |
+
export async function appendGoldRecord(outPath, record) {
|
| 123 |
+
const line = JSON.stringify(record) + '\n';
|
| 124 |
+
await fs.mkdir(path.dirname(outPath), { recursive: true });
|
| 125 |
+
await fs.appendFile(outPath, line, 'utf8');
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
/**
|
| 129 |
+
* Run the pipeline over a batch of seed questions and write accepted
|
| 130 |
+
* samples to a JSONL file.
|
| 131 |
+
*
|
| 132 |
+
* Options:
|
| 133 |
+
* - seedsPath: JSONL of seeds (defaults to test_samples/seed_questions.jsonl)
|
| 134 |
+
* - outPath: output JSONL (defaults to gold/pipeline_gold.jsonl)
|
| 135 |
+
* - limit: max number of seeds to process
|
| 136 |
+
*
|
| 137 |
+
* Returns:
|
| 138 |
+
* { total, processed, accepted, outPath }
|
| 139 |
+
*/
|
| 140 |
+
export async function runPipelineBatch({
|
| 141 |
+
seedsPath = DEFAULT_SEEDS_PATH,
|
| 142 |
+
outPath = DEFAULT_OUT_PATH,
|
| 143 |
+
limit,
|
| 144 |
+
} = {}) {
|
| 145 |
+
const provider = loadProvider();
|
| 146 |
+
const seeds = await loadSeedQuestions(seedsPath);
|
| 147 |
+
const max = typeof limit === 'number' ? limit : seeds.length;
|
| 148 |
+
|
| 149 |
+
let processed = 0;
|
| 150 |
+
let accepted = 0;
|
| 151 |
+
|
| 152 |
+
for (const seed of seeds.slice(0, max)) {
|
| 153 |
+
const question = seedToQuestion(seed);
|
| 154 |
+
const result = await runPipelineStep({ question, provider });
|
| 155 |
+
|
| 156 |
+
processed += 1;
|
| 157 |
+
|
| 158 |
+
if (result.status === 'accepted') {
|
| 159 |
+
const record = {
|
| 160 |
+
question,
|
| 161 |
+
context: result.context,
|
| 162 |
+
sample: result.gen, // treat generator result as opaque sample
|
| 163 |
+
verifier: result.ver,
|
| 164 |
+
reward: result.rew,
|
| 165 |
+
};
|
| 166 |
+
|
| 167 |
+
await appendGoldRecord(outPath, record);
|
| 168 |
+
accepted += 1;
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
return {
|
| 173 |
+
total: seeds.length,
|
| 174 |
+
processed,
|
| 175 |
+
accepted,
|
| 176 |
+
outPath,
|
| 177 |
+
};
|
| 178 |
+
}
|
src/pipeline/pipeline_cli.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env node
|
| 2 |
+
// src/pipeline/pipeline_cli.js
|
| 3 |
+
|
| 4 |
+
import { fileURLToPath } from 'url';
|
| 5 |
+
import path from 'path';
|
| 6 |
+
import fs from 'fs/promises';
|
| 7 |
+
|
| 8 |
+
import { loadProvider } from '../providers/provider.mjs';
|
| 9 |
+
import {
|
| 10 |
+
loadSeedQuestions,
|
| 11 |
+
seedToQuestion,
|
| 12 |
+
runPipelineStep,
|
| 13 |
+
appendGoldRecord,
|
| 14 |
+
runPipelineBatch
|
| 15 |
+
} from './pipeline.mjs';
|
| 16 |
+
|
| 17 |
+
//
|
| 18 |
+
// ---- CLI Helpers ----
|
| 19 |
+
//
|
| 20 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 21 |
+
const __dirname = path.dirname(__filename);
|
| 22 |
+
|
| 23 |
+
function parseArgs() {
|
| 24 |
+
const args = process.argv.slice(2);
|
| 25 |
+
const out = {};
|
| 26 |
+
|
| 27 |
+
for (let i = 0; i < args.length; i++) {
|
| 28 |
+
const a = args[i];
|
| 29 |
+
|
| 30 |
+
if (a === '--seeds' || a === '-s') {
|
| 31 |
+
out.seeds = args[++i];
|
| 32 |
+
} else if (a === '--out' || a === '-o') {
|
| 33 |
+
out.out = args[++i];
|
| 34 |
+
} else if (a === '--limit' || a === '-l') {
|
| 35 |
+
out.limit = Number(args[++i]);
|
| 36 |
+
} else if (a === '--provider' || a === '-p') {
|
| 37 |
+
out.provider = args[++i];
|
| 38 |
+
} else if (a === '--help' || a === '-h') {
|
| 39 |
+
out.help = true;
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
return out;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
function showHelp() {
|
| 47 |
+
console.log(`
|
| 48 |
+
distill-pipeline — Full Distillation Cycle Runner
|
| 49 |
+
|
| 50 |
+
Usage:
|
| 51 |
+
node pipeline_cli.js [options]
|
| 52 |
+
|
| 53 |
+
Options:
|
| 54 |
+
--seeds <file> Path to seed JSONL file (default: test_samples/seed_questions.jsonl)
|
| 55 |
+
--out <file> Path to output JSONL (default: gold/pipeline_gold.jsonl)
|
| 56 |
+
--limit <n> Max number of seeds to process
|
| 57 |
+
--provider <name> Provider override: ollama | openai | http
|
| 58 |
+
-h, --help Show this help
|
| 59 |
+
|
| 60 |
+
Examples:
|
| 61 |
+
node pipeline_cli.js --limit 25
|
| 62 |
+
node pipeline_cli.js -s custom_seeds.jsonl -o gold/round1.jsonl
|
| 63 |
+
`);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
//
|
| 67 |
+
// ---- MAIN ----
|
| 68 |
+
//
|
| 69 |
+
async function main() {
|
| 70 |
+
const args = parseArgs();
|
| 71 |
+
|
| 72 |
+
if (args.help) {
|
| 73 |
+
showHelp();
|
| 74 |
+
process.exit(0);
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// ---- Resolve paths ----
|
| 78 |
+
const seedsPath = args.seeds
|
| 79 |
+
? path.resolve(args.seeds)
|
| 80 |
+
: path.resolve(__dirname, '../../test_samples/seed_questions.jsonl');
|
| 81 |
+
|
| 82 |
+
const outPath = args.out
|
| 83 |
+
? path.resolve(args.out)
|
| 84 |
+
: path.resolve(__dirname, '../../gold/pipeline_gold.jsonl');
|
| 85 |
+
|
| 86 |
+
const limit = args.limit ?? undefined;
|
| 87 |
+
|
| 88 |
+
// ---- Provider override ----
|
| 89 |
+
if (args.provider) {
|
| 90 |
+
process.env.PROVIDER_TYPE = args.provider;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
// ---- Announce run ----
|
| 94 |
+
console.log(`\n🚀 Starting Distillation Pipeline`);
|
| 95 |
+
console.log(` Seeds: ${seedsPath}`);
|
| 96 |
+
console.log(` Output: ${outPath}`);
|
| 97 |
+
console.log(` Provider: ${process.env.PROVIDER_TYPE || 'ollama (default)'}`);
|
| 98 |
+
console.log(` Limit: ${limit ?? 'none'}\n`);
|
| 99 |
+
|
| 100 |
+
// ---- Run batch ----
|
| 101 |
+
const result = await runPipelineBatch({
|
| 102 |
+
seedsPath,
|
| 103 |
+
outPath,
|
| 104 |
+
limit,
|
| 105 |
+
});
|
| 106 |
+
|
| 107 |
+
console.log(`\n🎉 Pipeline completed`);
|
| 108 |
+
console.log(` Total Seeds: ${result.total}`);
|
| 109 |
+
console.log(` Processed: ${result.processed}`);
|
| 110 |
+
console.log(` Accepted: ${result.accepted}`);
|
| 111 |
+
console.log(` Written to: ${result.outPath}\n`);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
main().catch((err) => {
|
| 115 |
+
console.error(`❌ Pipeline error:`, err);
|
| 116 |
+
process.exit(1);
|
| 117 |
+
});
|
src/providers/http_provider.mjs
CHANGED
|
@@ -1,33 +1,26 @@
|
|
|
|
|
| 1 |
import { BaseProvider } from './base.mjs';
|
| 2 |
|
| 3 |
export class HttpProvider extends BaseProvider {
|
| 4 |
constructor() {
|
| 5 |
super();
|
| 6 |
-
this.url = process.env.HTTP_PROVIDER_URL
|
| 7 |
-
this.model = process.env.GENERATOR_MODEL || "qwen2.5-7b-instruct";
|
| 8 |
}
|
| 9 |
|
| 10 |
-
async generate(prompt
|
| 11 |
-
const body = {
|
| 12 |
-
model: this.model,
|
| 13 |
-
prompt,
|
| 14 |
-
...opts
|
| 15 |
-
};
|
| 16 |
-
|
| 17 |
const resp = await fetch(this.url, {
|
| 18 |
-
method:
|
| 19 |
-
headers: {
|
| 20 |
-
body: JSON.stringify(
|
| 21 |
});
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
const data = await resp.json();
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
data.response ||
|
| 28 |
-
data.text ||
|
| 29 |
-
data.result ||
|
| 30 |
-
""
|
| 31 |
-
);
|
| 32 |
}
|
| 33 |
}
|
|
|
|
| 1 |
+
// src/providers/http_provider.mjs
|
| 2 |
import { BaseProvider } from './base.mjs';
|
| 3 |
|
| 4 |
export class HttpProvider extends BaseProvider {
|
| 5 |
constructor() {
|
| 6 |
super();
|
| 7 |
+
this.url = process.env.HTTP_PROVIDER_URL;
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
+
async generate(prompt) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
const resp = await fetch(this.url, {
|
| 12 |
+
method: 'POST',
|
| 13 |
+
headers: { 'Content-Type': 'application/json' },
|
| 14 |
+
body: JSON.stringify({ prompt })
|
| 15 |
});
|
| 16 |
|
| 17 |
+
if (!resp.ok) {
|
| 18 |
+
throw new Error(`HttpProvider error: ${resp.status}`);
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
const data = await resp.json();
|
| 22 |
|
| 23 |
+
// Expect: { output: "<json string>" }
|
| 24 |
+
return data.output || data.response || '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
}
|
src/providers/ollama_provider.mjs
CHANGED
|
@@ -1,31 +1,31 @@
|
|
|
|
|
| 1 |
import { BaseProvider } from './base.mjs';
|
| 2 |
|
| 3 |
export class OllamaProvider extends BaseProvider {
|
| 4 |
constructor() {
|
| 5 |
super();
|
| 6 |
-
this.url = process.env.OLLAMA_URL ||
|
| 7 |
-
this.model = process.env.GENERATOR_MODEL ||
|
| 8 |
}
|
| 9 |
|
| 10 |
-
async generate(prompt
|
| 11 |
-
const body = {
|
| 12 |
-
model: this.model,
|
| 13 |
-
prompt,
|
| 14 |
-
stream: false,
|
| 15 |
-
...opts
|
| 16 |
-
};
|
| 17 |
-
|
| 18 |
const resp = await fetch(this.url, {
|
| 19 |
-
method:
|
| 20 |
-
headers: {
|
| 21 |
-
body: JSON.stringify(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
});
|
| 23 |
|
| 24 |
if (!resp.ok) {
|
| 25 |
-
throw new Error(`
|
| 26 |
}
|
| 27 |
|
| 28 |
const data = await resp.json();
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
}
|
| 31 |
}
|
|
|
|
| 1 |
+
// src/providers/ollama_provider.mjs
|
| 2 |
import { BaseProvider } from './base.mjs';
|
| 3 |
|
| 4 |
export class OllamaProvider extends BaseProvider {
|
| 5 |
constructor() {
|
| 6 |
super();
|
| 7 |
+
this.url = process.env.OLLAMA_URL || 'http://localhost:11434/api/generate';
|
| 8 |
+
this.model = process.env.GENERATOR_MODEL || 'qwen3-vl:8b-thinking';
|
| 9 |
}
|
| 10 |
|
| 11 |
+
async generate(prompt) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
const resp = await fetch(this.url, {
|
| 13 |
+
method: 'POST',
|
| 14 |
+
headers: { 'Content-Type': 'application/json' },
|
| 15 |
+
body: JSON.stringify({
|
| 16 |
+
model: this.model,
|
| 17 |
+
prompt,
|
| 18 |
+
stream: false
|
| 19 |
+
})
|
| 20 |
});
|
| 21 |
|
| 22 |
if (!resp.ok) {
|
| 23 |
+
throw new Error(`OllamaProvider error: ${resp.status}`);
|
| 24 |
}
|
| 25 |
|
| 26 |
const data = await resp.json();
|
| 27 |
+
|
| 28 |
+
// Must return *raw string*, because generator/verifier/reward expect text they parse with JSON.parse()
|
| 29 |
+
return data.response;
|
| 30 |
}
|
| 31 |
}
|
src/providers/openai_provider.mjs
CHANGED
|
@@ -1,30 +1,31 @@
|
|
|
|
|
| 1 |
import { BaseProvider } from './base.mjs';
|
| 2 |
|
| 3 |
export class OpenAIProvider extends BaseProvider {
|
| 4 |
constructor() {
|
| 5 |
super();
|
| 6 |
-
this.
|
| 7 |
-
this.model = process.env.GENERATOR_MODEL ||
|
| 8 |
-
this.url = "https://api.openai.com/v1/chat/completions";
|
| 9 |
}
|
| 10 |
|
| 11 |
-
async generate(prompt
|
| 12 |
-
const
|
| 13 |
-
|
| 14 |
-
messages: [{ role: "user", content: prompt }],
|
| 15 |
-
temperature: opts.temperature ?? 0.7
|
| 16 |
-
};
|
| 17 |
-
|
| 18 |
-
const resp = await fetch(this.url, {
|
| 19 |
-
method: "POST",
|
| 20 |
headers: {
|
| 21 |
-
|
| 22 |
-
|
| 23 |
},
|
| 24 |
-
body: JSON.stringify(
|
|
|
|
|
|
|
|
|
|
| 25 |
});
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
const data = await resp.json();
|
| 28 |
-
return data.choices
|
| 29 |
}
|
| 30 |
}
|
|
|
|
| 1 |
+
// src/providers/openai_provider.mjs
|
| 2 |
import { BaseProvider } from './base.mjs';
|
| 3 |
|
| 4 |
export class OpenAIProvider extends BaseProvider {
|
| 5 |
constructor() {
|
| 6 |
super();
|
| 7 |
+
this.apiKey = process.env.OPENAI_API_KEY;
|
| 8 |
+
this.model = process.env.GENERATOR_MODEL || 'gpt-4o-mini';
|
|
|
|
| 9 |
}
|
| 10 |
|
| 11 |
+
async generate(prompt) {
|
| 12 |
+
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
|
| 13 |
+
method: 'POST',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
headers: {
|
| 15 |
+
'Authorization': `Bearer ${this.apiKey}`,
|
| 16 |
+
'Content-Type': 'application/json'
|
| 17 |
},
|
| 18 |
+
body: JSON.stringify({
|
| 19 |
+
model: this.model,
|
| 20 |
+
messages: [{ role: 'user', content: prompt }]
|
| 21 |
+
})
|
| 22 |
});
|
| 23 |
|
| 24 |
+
if (!resp.ok) {
|
| 25 |
+
throw new Error(`OpenAIProvider error: ${resp.status}`);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
const data = await resp.json();
|
| 29 |
+
return data.choices[0].message.content;
|
| 30 |
}
|
| 31 |
}
|
src/providers/provider.mjs
CHANGED
|
@@ -3,6 +3,14 @@ import { OllamaProvider } from './ollama_provider.mjs';
|
|
| 3 |
import { OpenAIProvider } from './openai_provider.mjs';
|
| 4 |
import { HttpProvider } from './http_provider.mjs';
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
export function loadProvider() {
|
| 7 |
const type = (process.env.PROVIDER_TYPE || "ollama").toLowerCase();
|
| 8 |
|
|
|
|
| 3 |
import { OpenAIProvider } from './openai_provider.mjs';
|
| 4 |
import { HttpProvider } from './http_provider.mjs';
|
| 5 |
|
| 6 |
+
// Add this at bottom of provider.mjs temporarily:
|
| 7 |
+
export async function debugLoad() {
|
| 8 |
+
const p = loadProvider();
|
| 9 |
+
console.log('Loaded provider:', p);
|
| 10 |
+
console.log('typeof generate:', typeof p.generate);
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
|
| 14 |
export function loadProvider() {
|
| 15 |
const type = (process.env.PROVIDER_TYPE || "ollama").toLowerCase();
|
| 16 |
|
src/reward/reward_core.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
//
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
|
|
|
| 1 |
+
// src/reward/reward_core.mjs
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
src/verifier/verifier_core.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
//
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
|
|
|
| 1 |
+
// src/verifier/verifier_core.mjs
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
tests/tests.pipeline.mock.test.mjs
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// tests/pipeline.mock.test.mjs
|
| 2 |
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
| 3 |
+
import fs from 'fs/promises';
|
| 4 |
+
import path from 'path';
|
| 5 |
+
import os from 'os';
|
| 6 |
+
import { fileURLToPath } from 'url';
|
| 7 |
+
|
| 8 |
+
// --- Mocks ---
|
| 9 |
+
// retrieval
|
| 10 |
+
vi.mock('../src/retrieval/retrieval.mjs', () => {
|
| 11 |
+
return {
|
| 12 |
+
hybridSearch: vi.fn(async (query, k) => {
|
| 13 |
+
return [
|
| 14 |
+
{ id: 'c1', content: `ctx for ${query}`, score: 1.0 },
|
| 15 |
+
{ id: 'c2', content: `more ctx for ${query}`, score: 0.9 },
|
| 16 |
+
].slice(0, k);
|
| 17 |
+
}),
|
| 18 |
+
};
|
| 19 |
+
});
|
| 20 |
+
|
| 21 |
+
// provider
|
| 22 |
+
const mockProvider = {
|
| 23 |
+
generate: vi.fn(),
|
| 24 |
+
};
|
| 25 |
+
|
| 26 |
+
vi.mock('../src/providers/provider.mjs', () => {
|
| 27 |
+
return {
|
| 28 |
+
loadProvider: vi.fn(() => mockProvider),
|
| 29 |
+
};
|
| 30 |
+
});
|
| 31 |
+
|
| 32 |
+
// generator
|
| 33 |
+
const mockGenResult = { ok: true, sample: { answer: 'A' } };
|
| 34 |
+
vi.mock('../src/generator/generator_core.mjs', () => {
|
| 35 |
+
return {
|
| 36 |
+
runGenerator: vi.fn(async () => mockGenResult),
|
| 37 |
+
};
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
// verifier
|
| 41 |
+
const mockVerResult = { ok: true };
|
| 42 |
+
vi.mock('../src/verifier/verifier_core.mjs', () => {
|
| 43 |
+
return {
|
| 44 |
+
runVerifier: vi.fn(async () => mockVerResult),
|
| 45 |
+
};
|
| 46 |
+
});
|
| 47 |
+
|
| 48 |
+
// reward
|
| 49 |
+
const mockRewResult = { ok: true, score: 0.9 };
|
| 50 |
+
vi.mock('../src/reward/reward_core.mjs', () => {
|
| 51 |
+
return {
|
| 52 |
+
runReward: vi.fn(async () => mockRewResult),
|
| 53 |
+
};
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
+
// --- Import after mocks ---
|
| 57 |
+
import {
|
| 58 |
+
runPipelineStep,
|
| 59 |
+
runPipelineBatch,
|
| 60 |
+
loadSeedQuestions,
|
| 61 |
+
seedToQuestion,
|
| 62 |
+
appendGoldRecord,
|
| 63 |
+
} from '../src/pipeline/pipeline.mjs';
|
| 64 |
+
|
| 65 |
+
describe('pipeline.mjs', () => {
|
| 66 |
+
beforeEach(() => {
|
| 67 |
+
vi.clearAllMocks();
|
| 68 |
+
});
|
| 69 |
+
|
| 70 |
+
it('seedToQuestion extracts question from various shapes', () => {
|
| 71 |
+
expect(seedToQuestion('raw')).toBe('raw');
|
| 72 |
+
expect(seedToQuestion({ question: 'q1' })).toBe('q1');
|
| 73 |
+
expect(seedToQuestion({ prompt: 'q2' })).toBe('q2');
|
| 74 |
+
expect(seedToQuestion({ text: 'q3' })).toBe('q3');
|
| 75 |
+
});
|
| 76 |
+
|
| 77 |
+
it('runPipelineStep returns accepted with mocked deps', async () => {
|
| 78 |
+
const res = await runPipelineStep({
|
| 79 |
+
question: 'What is service to others?',
|
| 80 |
+
});
|
| 81 |
+
|
| 82 |
+
expect(res.status).toBe('accepted');
|
| 83 |
+
expect(res.question).toBe('What is service to others?');
|
| 84 |
+
expect(Array.isArray(res.context)).toBe(true);
|
| 85 |
+
expect(res.gen).toEqual(mockGenResult);
|
| 86 |
+
expect(res.ver).toEqual(mockVerResult);
|
| 87 |
+
expect(res.rew).toEqual(mockRewResult);
|
| 88 |
+
});
|
| 89 |
+
|
| 90 |
+
it('runPipelineBatch processes seeds and writes JSONL', async () => {
|
| 91 |
+
const tmpDir = await fs.mkdtemp(
|
| 92 |
+
path.join(os.tmpdir(), 'distill-pipeline-test-'),
|
| 93 |
+
);
|
| 94 |
+
|
| 95 |
+
const seedsPath = path.join(tmpDir, 'seeds.jsonl');
|
| 96 |
+
const outPath = path.join(tmpDir, 'out.jsonl');
|
| 97 |
+
|
| 98 |
+
const seedsJsonl = [
|
| 99 |
+
JSON.stringify({ question: 'q1' }),
|
| 100 |
+
JSON.stringify({ prompt: 'q2' }),
|
| 101 |
+
JSON.stringify('q3'),
|
| 102 |
+
].join('\n');
|
| 103 |
+
|
| 104 |
+
await fs.writeFile(seedsPath, seedsJsonl + '\n', 'utf8');
|
| 105 |
+
|
| 106 |
+
const summary = await runPipelineBatch({
|
| 107 |
+
seedsPath,
|
| 108 |
+
outPath,
|
| 109 |
+
limit: 3,
|
| 110 |
+
});
|
| 111 |
+
|
| 112 |
+
expect(summary.processed).toBe(3);
|
| 113 |
+
expect(summary.accepted).toBe(3);
|
| 114 |
+
expect(summary.outPath).toBe(outPath);
|
| 115 |
+
|
| 116 |
+
const outText = await fs.readFile(outPath, 'utf8');
|
| 117 |
+
const lines = outText.trim().split('\n');
|
| 118 |
+
expect(lines.length).toBe(3);
|
| 119 |
+
|
| 120 |
+
const first = JSON.parse(lines[0]);
|
| 121 |
+
expect(first).toHaveProperty('question');
|
| 122 |
+
expect(first).toHaveProperty('context');
|
| 123 |
+
expect(first).toHaveProperty('sample');
|
| 124 |
+
expect(first).toHaveProperty('verifier');
|
| 125 |
+
expect(first).toHaveProperty('reward');
|
| 126 |
+
});
|
| 127 |
+
});
|