added generator test
Browse files- run.sh +106 -0
- run_qg_verbose.sh +0 -21
- src/generator/generator_core.mjs +21 -56
- tests/generator_core.test.mjs +59 -14
run.sh
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
# run.sh — friendly wrapper for running the distillation pipeline
|
| 5 |
+
|
| 6 |
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 7 |
+
cd "$ROOT"
|
| 8 |
+
|
| 9 |
+
# -----------------------------
|
| 10 |
+
# Default config (can be overridden in env or CLI)
|
| 11 |
+
# -----------------------------
|
| 12 |
+
ES_NODE="${ES_NODE:-http://localhost:9200}"
|
| 13 |
+
ES_INDEX="${ES_INDEX:-quo_distill_index}"
|
| 14 |
+
|
| 15 |
+
PIPELINE_SEED_MODE="${PIPELINE_SEED_MODE:-question-first}"
|
| 16 |
+
|
| 17 |
+
RETRIEVAL_MODE="${RETRIEVAL_MODE:-hybrid}"
|
| 18 |
+
RETRIEVAL_K="${RETRIEVAL_K:-6}"
|
| 19 |
+
|
| 20 |
+
QUESTION_MAX="${QUESTION_MAX:-3}"
|
| 21 |
+
LIMIT="${LIMIT:-5}"
|
| 22 |
+
|
| 23 |
+
GENERATOR_MODEL="${GENERATOR_MODEL:-qwen3-vl:8b-thinking}"
|
| 24 |
+
VERIFIER_MODEL="${VERIFIER_MODEL:-tensortemplar/patronus-lynx:8b-instruct-q4_K_M}"
|
| 25 |
+
REWARD_MODEL="${REWARD_MODEL:-tensortemplar/patronus-lynx:8b-instruct-q4_K_M}"
|
| 26 |
+
|
| 27 |
+
VERBOSE=0
|
| 28 |
+
THINKING=0
|
| 29 |
+
|
| 30 |
+
# -----------------------------
|
| 31 |
+
# Argument parsing
|
| 32 |
+
# -----------------------------
|
| 33 |
+
while [[ $# -gt 0 ]]; do
|
| 34 |
+
case "$1" in
|
| 35 |
+
-l|--limit)
|
| 36 |
+
LIMIT="$2"
|
| 37 |
+
shift 2
|
| 38 |
+
;;
|
| 39 |
+
-q|--questions)
|
| 40 |
+
QUESTION_MAX="$2"
|
| 41 |
+
shift 2
|
| 42 |
+
;;
|
| 43 |
+
-v|--verbose)
|
| 44 |
+
VERBOSE=1
|
| 45 |
+
shift
|
| 46 |
+
;;
|
| 47 |
+
-t|--thinking)
|
| 48 |
+
THINKING=1
|
| 49 |
+
shift
|
| 50 |
+
;;
|
| 51 |
+
*)
|
| 52 |
+
echo "Unknown option: $1"
|
| 53 |
+
echo "Usage: ./run.sh [--limit N] [--questions N] [--verbose] [--thinking]"
|
| 54 |
+
exit 1
|
| 55 |
+
;;
|
| 56 |
+
esac
|
| 57 |
+
done
|
| 58 |
+
|
| 59 |
+
# If thinking flag is present, use thinking model + thinking prompt header
|
| 60 |
+
if [[ "$THINKING" -eq 1 ]]; then
|
| 61 |
+
echo "🧠 Using THINKING MODE"
|
| 62 |
+
GENERATOR_MODEL="qwen3-vl:8b-thinking"
|
| 63 |
+
fi
|
| 64 |
+
|
| 65 |
+
# -----------------------------
|
| 66 |
+
# Export for pipeline
|
| 67 |
+
# -----------------------------
|
| 68 |
+
export ES_NODE
|
| 69 |
+
export ES_INDEX
|
| 70 |
+
export PIPELINE_SEED_MODE
|
| 71 |
+
|
| 72 |
+
export RETRIEVAL_MODE
|
| 73 |
+
export RETRIEVAL_K
|
| 74 |
+
|
| 75 |
+
export QUESTION_MAX
|
| 76 |
+
export LIMIT
|
| 77 |
+
|
| 78 |
+
export GENERATOR_PROVIDER="ollama"
|
| 79 |
+
export GENERATOR_MODEL
|
| 80 |
+
|
| 81 |
+
export VERIFIER_PROVIDER="ollama"
|
| 82 |
+
export VERIFIER_MODEL
|
| 83 |
+
|
| 84 |
+
export REWARD_PROVIDER="ollama"
|
| 85 |
+
export REWARD_MODEL
|
| 86 |
+
|
| 87 |
+
echo "========================================"
|
| 88 |
+
echo "🚀 Running Distillation Pipeline"
|
| 89 |
+
echo " ES_NODE: $ES_NODE"
|
| 90 |
+
echo " ES_INDEX: $ES_INDEX"
|
| 91 |
+
echo " Retrieval: $RETRIEVAL_MODE (k=$RETRIEVAL_K)"
|
| 92 |
+
echo " Mode: $PIPELINE_SEED_MODE"
|
| 93 |
+
echo " Limit: $LIMIT"
|
| 94 |
+
echo " Question max: $QUESTION_MAX"
|
| 95 |
+
echo " Generator model: $GENERATOR_MODEL"
|
| 96 |
+
echo " Verifier model: $VERIFIER_MODEL"
|
| 97 |
+
echo " Reward model: $REWARD_MODEL"
|
| 98 |
+
echo " Verbose: $VERBOSE"
|
| 99 |
+
echo "========================================"
|
| 100 |
+
echo
|
| 101 |
+
|
| 102 |
+
if [[ "$VERBOSE" -eq 1 ]]; then
|
| 103 |
+
npm run pipeline -- --limit "$LIMIT" --verbose
|
| 104 |
+
else
|
| 105 |
+
npm run pipeline -- --limit "$LIMIT"
|
| 106 |
+
fi
|
run_qg_verbose.sh
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env bash
|
| 2 |
-
|
| 3 |
-
# Always fail on error
|
| 4 |
-
set -e
|
| 5 |
-
|
| 6 |
-
echo "==============================================="
|
| 7 |
-
echo " Running Question-First Distillation Pipeline"
|
| 8 |
-
echo "==============================================="
|
| 9 |
-
echo
|
| 10 |
-
|
| 11 |
-
# You can adjust limit here if needed
|
| 12 |
-
LIMIT="${1:-5}"
|
| 13 |
-
|
| 14 |
-
export PIPELINE_SEED_MODE=question-first
|
| 15 |
-
|
| 16 |
-
echo "→ Mode: question-first"
|
| 17 |
-
echo "→ Verbose: yes"
|
| 18 |
-
echo "→ Limit: $LIMIT"
|
| 19 |
-
echo
|
| 20 |
-
|
| 21 |
-
npm run pipeline -- --limit "$LIMIT" --verbose
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/generator/generator_core.mjs
CHANGED
|
@@ -2,80 +2,45 @@
|
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
| 5 |
-
//
|
| 6 |
-
async function
|
| 7 |
const filePath = path.resolve(
|
| 8 |
path.dirname(new URL(import.meta.url).pathname),
|
| 9 |
-
'..',
|
| 10 |
-
'..',
|
| 11 |
-
'prompts',
|
| 12 |
-
'generator_prompt.txt'
|
| 13 |
);
|
| 14 |
return await fs.readFile(filePath, 'utf8');
|
| 15 |
}
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
*/
|
| 20 |
-
async function makeGeneratorPrompt(question, contextChunks) {
|
| 21 |
-
const tmpl = await loadPromptTemplate();
|
| 22 |
-
|
| 23 |
-
let prompt = tmpl.replace(/{{QUESTION}}/g, question);
|
| 24 |
-
|
| 25 |
-
if (tmpl.includes('{{CONTEXT}}')) {
|
| 26 |
-
const contextText =
|
| 27 |
-
contextChunks?.map((c) => c.content || c.text || '').join('\n\n---\n\n') ??
|
| 28 |
-
'';
|
| 29 |
-
|
| 30 |
-
prompt = prompt.replace(/{{CONTEXT}}/g, contextText);
|
| 31 |
-
}
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
* @param {string} question - the natural language question
|
| 40 |
-
* @param {Array} contextChunks - retrieved context objects from RAG
|
| 41 |
-
* @param {object} provider - { generate(prompt) → string }
|
| 42 |
-
* @returns {object} generator result:
|
| 43 |
-
* {
|
| 44 |
-
* raw: string, // full model output (think + answer)
|
| 45 |
-
* thought: string|null, // content inside <think>...</think> if present
|
| 46 |
-
* answer: string, // final answer after </think> (or whole raw if no tags)
|
| 47 |
-
* query: string, // original question
|
| 48 |
-
* context: Array // original context chunks
|
| 49 |
-
* }
|
| 50 |
-
*/
|
| 51 |
-
export async function runGenerator(question, contextChunks, provider) {
|
| 52 |
-
const prompt = await makeGeneratorPrompt(question, contextChunks);
|
| 53 |
|
| 54 |
const raw = await provider.generate(prompt);
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
// Extract <think> ... </think> if the thinking model emits it
|
| 60 |
-
const startTag = '<think>';
|
| 61 |
-
const endTag = '</think>';
|
| 62 |
-
const startIdx = raw.indexOf(startTag);
|
| 63 |
-
const endIdx = raw.indexOf(endTag);
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
| 68 |
}
|
| 69 |
|
| 70 |
return {
|
| 71 |
raw,
|
| 72 |
thought,
|
| 73 |
answer,
|
| 74 |
-
|
| 75 |
-
context: contextChunks
|
| 76 |
};
|
| 77 |
}
|
| 78 |
|
| 79 |
-
export default {
|
| 80 |
-
runGenerator,
|
| 81 |
-
};
|
|
|
|
| 2 |
import fs from 'fs/promises';
|
| 3 |
import path from 'path';
|
| 4 |
|
| 5 |
+
// Load generator template
|
| 6 |
+
async function loadTemplate() {
|
| 7 |
const filePath = path.resolve(
|
| 8 |
path.dirname(new URL(import.meta.url).pathname),
|
| 9 |
+
'..', '..', 'prompts', 'generator_prompt.txt'
|
|
|
|
|
|
|
|
|
|
| 10 |
);
|
| 11 |
return await fs.readFile(filePath, 'utf8');
|
| 12 |
}
|
| 13 |
|
| 14 |
+
export async function runGenerator(question, contextChunks, provider) {
|
| 15 |
+
const template = await loadTemplate();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
const ctxText = contextChunks
|
| 18 |
+
.map(c => c.content || c.text || "")
|
| 19 |
+
.join("\n\n---\n\n");
|
| 20 |
|
| 21 |
+
const prompt = template
|
| 22 |
+
.replace('{{QUESTION}}', question)
|
| 23 |
+
.replace('{{CONTEXT}}', ctxText);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
const raw = await provider.generate(prompt);
|
| 26 |
|
| 27 |
+
// extract visible chain-of-thought
|
| 28 |
+
const thinkMatch = raw.match(/<think>([\s\S]*?)<\/think>/i);
|
| 29 |
+
const thought = thinkMatch ? thinkMatch[1].trim() : null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
// final answer = text after </think>
|
| 32 |
+
let answer = raw;
|
| 33 |
+
if (thinkMatch) {
|
| 34 |
+
answer = raw.slice(thinkMatch.index + thinkMatch[0].length).trim();
|
| 35 |
}
|
| 36 |
|
| 37 |
return {
|
| 38 |
raw,
|
| 39 |
thought,
|
| 40 |
answer,
|
| 41 |
+
question,
|
| 42 |
+
context: contextChunks
|
| 43 |
};
|
| 44 |
}
|
| 45 |
|
| 46 |
+
export default { runGenerator };
|
|
|
|
|
|
tests/generator_core.test.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import { describe, it, expect, vi } from 'vitest';
|
| 2 |
import { runGenerator } from '../src/generator/generator_core.mjs';
|
| 3 |
|
| 4 |
-
describe('generator_core.mjs', () => {
|
| 5 |
-
it('
|
| 6 |
const fakeContext = [
|
| 7 |
{ content: 'First context chunk' },
|
| 8 |
{ content: 'Second context chunk' },
|
|
@@ -10,31 +10,76 @@ describe('generator_core.mjs', () => {
|
|
| 10 |
|
| 11 |
const provider = {
|
| 12 |
generate: vi.fn(async (prompt) => {
|
| 13 |
-
//
|
| 14 |
expect(prompt).toContain('What is love?');
|
| 15 |
-
//
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}),
|
| 19 |
};
|
| 20 |
|
| 21 |
const result = await runGenerator('What is love?', fakeContext, provider);
|
| 22 |
|
| 23 |
expect(provider.generate).toHaveBeenCalledOnce();
|
| 24 |
-
expect(result.
|
| 25 |
-
expect(result.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
});
|
| 27 |
|
| 28 |
-
it('handles
|
| 29 |
-
const fakeContext = [{ content: '
|
| 30 |
|
| 31 |
const provider = {
|
| 32 |
-
generate: vi.fn(async () =>
|
|
|
|
|
|
|
|
|
|
| 33 |
};
|
| 34 |
|
| 35 |
-
const result = await runGenerator(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
expect(result.raw).toBe('
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
});
|
| 40 |
});
|
|
|
|
| 1 |
import { describe, it, expect, vi } from 'vitest';
|
| 2 |
import { runGenerator } from '../src/generator/generator_core.mjs';
|
| 3 |
|
| 4 |
+
describe('generator_core.mjs (thinking generator)', () => {
|
| 5 |
+
it('includes question and context in the prompt', async () => {
|
| 6 |
const fakeContext = [
|
| 7 |
{ content: 'First context chunk' },
|
| 8 |
{ content: 'Second context chunk' },
|
|
|
|
| 10 |
|
| 11 |
const provider = {
|
| 12 |
generate: vi.fn(async (prompt) => {
|
| 13 |
+
// Prompt should contain the question
|
| 14 |
expect(prompt).toContain('What is love?');
|
| 15 |
+
// And the context chunks (we expect {{CONTEXT}} is wired up)
|
| 16 |
+
expect(prompt).toContain('First context chunk');
|
| 17 |
+
expect(prompt).toContain('Second context chunk');
|
| 18 |
+
|
| 19 |
+
// Return some simple thinking-style output
|
| 20 |
+
return `<think>
|
| 21 |
+
I consider the meaning of love using only the context.
|
| 22 |
+
</think>
|
| 23 |
+
Love is the recognition of shared being.
|
| 24 |
+
`;
|
| 25 |
}),
|
| 26 |
};
|
| 27 |
|
| 28 |
const result = await runGenerator('What is love?', fakeContext, provider);
|
| 29 |
|
| 30 |
expect(provider.generate).toHaveBeenCalledOnce();
|
| 31 |
+
expect(result.question).toBe('What is love?');
|
| 32 |
+
expect(result.context).toHaveLength(2);
|
| 33 |
+
expect(result.raw).toContain('<think>');
|
| 34 |
+
expect(result.answer).toBe('Love is the recognition of shared being.');
|
| 35 |
+
expect(result.thought).toContain('consider the meaning of love');
|
| 36 |
+
});
|
| 37 |
+
|
| 38 |
+
it('extracts thought and answer correctly when <think> block is present', async () => {
|
| 39 |
+
const fakeContext = [{ content: 'ctx' }];
|
| 40 |
+
|
| 41 |
+
const provider = {
|
| 42 |
+
generate: vi.fn(async () => {
|
| 43 |
+
return `<think>
|
| 44 |
+
Step 1: Read the context carefully.
|
| 45 |
+
Step 2: Identify the relevant statements.
|
| 46 |
+
Step 3: Synthesize an answer.
|
| 47 |
+
</think>
|
| 48 |
+
The final answer derived from the context.`;
|
| 49 |
+
}),
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
const result = await runGenerator(
|
| 53 |
+
'Test question?',
|
| 54 |
+
fakeContext,
|
| 55 |
+
provider,
|
| 56 |
+
);
|
| 57 |
+
|
| 58 |
+
expect(result.raw).toContain('<think>');
|
| 59 |
+
expect(result.thought).toContain('Step 1:');
|
| 60 |
+
expect(result.thought).toContain('Step 3:');
|
| 61 |
+
expect(result.answer).toBe('The final answer derived from the context.');
|
| 62 |
});
|
| 63 |
|
| 64 |
+
it('handles output without <think> block gracefully', async () => {
|
| 65 |
+
const fakeContext = [{ content: 'ctx' }];
|
| 66 |
|
| 67 |
const provider = {
|
| 68 |
+
generate: vi.fn(async () => {
|
| 69 |
+
// No <think> tags at all
|
| 70 |
+
return 'Just a direct answer with no visible reasoning.';
|
| 71 |
+
}),
|
| 72 |
};
|
| 73 |
|
| 74 |
+
const result = await runGenerator(
|
| 75 |
+
'Another question?',
|
| 76 |
+
fakeContext,
|
| 77 |
+
provider,
|
| 78 |
+
);
|
| 79 |
|
| 80 |
+
expect(result.raw).toBe('Just a direct answer with no visible reasoning.');
|
| 81 |
+
// No think tags means thought=null and answer = full output
|
| 82 |
+
expect(result.thought).toBeNull();
|
| 83 |
+
expect(result.answer).toBe('Just a direct answer with no visible reasoning.');
|
| 84 |
});
|
| 85 |
});
|