htaf
/

distill-pipeline

retrieval-augmented-generation

self-improvement

question-generation

reward-modeling

Model card Files Files and versions

distill-pipeline / src /generator /generator_core.mjs

htaf's picture

added new instruct pipeline for faster generation

2739b3a 3 months ago

history blame contribute delete

6.63 kB

	// src/generator/generator_core.mjs
	import fs from 'fs/promises';
	import path from 'path';

	// Load generator template
	async function loadTemplate() {
	const filePath = path.resolve(
	path.dirname(new URL(import.meta.url).pathname),
	'..', '..', 'prompts', 'generator_prompt.txt'
	);
	return await fs.readFile(filePath, 'utf8');
	}

	export async function runGenerator(question, contextChunks, provider) {
	const template = await loadTemplate();

	const ctxText = contextChunks
	.map(c => c.content \|\| c.text \|\| "")
	.join("\n\n---\n\n");

	const prompt = template
	.replace('{{QUESTION}}', question)
	.replace('{{CONTEXT}}', ctxText);

	const response = await provider.generate(prompt, { includeJson: true });

	// Normalize provider output: string or { response, thinking }
	const raw = typeof response === 'string' ? response : response?.response ?? '';
	const thinkingObj = typeof response === 'object' && response?.thinking ? response.thinking : null;
	const rawJson =
	typeof response === 'object' && response?.fullResponse
	? (({ context, ...rest }) => rest)(response.fullResponse)
	: null;


	let thought = null;
	let answer = raw?.trim?.() ?? raw;
	let confidence = null;
	let evidence = null;
	let limitations = null;

	const safeParse = (txt) => {
	if (!txt \|\| typeof txt !== 'string') return null;
	try {
	return JSON.parse(txt);
	} catch {
	// try to extract braces substring
	const start = txt.indexOf('{');
	const end = txt.lastIndexOf('}');
	if (start !== -1 && end !== -1 && end > start) {
	try {
	return JSON.parse(txt.slice(start, end + 1));
	} catch {
	return null;
	}
	}
	return null;
	}
	};

	// Prefer structured thinking object if provided
	if (thinkingObj) {
	thought = thinkingObj;
	}

	const extractThoughtBlock = (txt) => {
	if (!txt \|\| typeof txt !== 'string') return null;
	const thoughtMatch = txt.match(/<\\|thought\\|>([\s\S]*?)<\\|end_of_thought\\|>/i);
	if (thoughtMatch) return thoughtMatch[1].trim();

	const understandingMatch = txt.match(/<understanding>[\s\S]*?(?=<\\|answer\\|\|<answer>\|$)/i);
	if (understandingMatch) return understandingMatch[0].trim();

	return null;
	};

	// Try parsing Qwen-style answer block first
	const parseAnswerBlock = (txt) => {
	if (!txt \|\| typeof txt !== 'string') return null;
	const blockMatch = txt.match(/<\\|answer\\|>([\s\S]*?)<\\|end_of_answer\\|>/i);
	const body = blockMatch ? blockMatch[1] : txt;
	const lines = body.split('\n').map((l) => l.trim()).filter(Boolean);
	const result = {};
	// line-based fallbacks even without tags
	const answerLine = txt.match(/^answer:\s*(.+)$/im);
	if (answerLine) result.answer = answerLine[1].trim();
	const confLine = txt.match(/^confidence:\s*(.+)$/im);
	if (confLine) result.confidence = confLine[1].trim();
	const evidenceLine = txt.match(/^evidence:\s*(.+)$/im);
	if (evidenceLine) {
	const evLine = evidenceLine[1].trim();
	let ev = [];
	const arrMatch = evLine.match(/\[(.*)\]/);
	if (arrMatch) {
	ev = arrMatch[1]
	.split(/,(?=(?:[^'"]\|'[^']'\|"[^"]")*$)/)
	.map((s) => s.replace(/^["'\s]+\|["'\s]+$/g, ''))
	.filter(Boolean);
	} else {
	ev = evLine.split(',').map((s) => s.replace(/^["'\s]+\|["'\s]+$/g, '')).filter(Boolean);
	}
	result.evidence = ev;
	}
	const limLine = txt.match(/^limitations?:\s*(.+)$/im);
	if (limLine) result.limitations = limLine[1].trim();

	for (const line of lines) {
	if (/^confidence:/i.test(line)) {
	const val = line.split(':')[1]?.trim();
	result.confidence = val \|\| null;
	} else if (/^answer:/i.test(line)) {
	result.answer = line.split(':').slice(1).join(':').trim();
	} else if (/^evidence:/i.test(line)) {
	const evLine = line.split(':').slice(1).join(':').trim();
	// Try to parse bracketed array, else split by comma
	let ev = [];
	const arrMatch = evLine.match(/\[(.*)\]/);
	if (arrMatch) {
	ev = arrMatch[1]
	.split(/,(?=(?:[^'"]\|'[^']'\|"[^"]")*$)/)
	.map((s) => s.replace(/^["'\s]+\|["'\s]+$/g, ''))
	.filter(Boolean);
	} else {
	ev = evLine.split(',').map((s) => s.trim()).filter(Boolean);
	}
	result.evidence = ev;
	} else if (/^limitations:/i.test(line)) {
	result.limitations = line.split(':').slice(1).join(':').trim();
	}
	}
	return result;
	};

	const blockParsed = parseAnswerBlock(raw);
	if (blockParsed?.answer) {
	answer = blockParsed.answer;
	confidence = blockParsed.confidence ?? confidence;
	evidence = blockParsed.evidence ?? evidence;
	limitations = blockParsed.limitations ?? limitations;
	if (!thought) {
	const t = extractThoughtBlock(raw);
	if (t) thought = t;
	}
	} else {
	// fallback: parse JSON if it's actually JSON
	const parsed = safeParse(raw);
	if (parsed && typeof parsed === 'object') {
	const reasoning = parsed.reasoning \|\| parsed.REASONING;
	if (Array.isArray(reasoning) && !thought) {
	thought = reasoning.join(' ');
	}

	const ans =
	parsed.answer \|\|
	parsed.ANSWER \|\|
	parsed.final \|\|
	parsed.output;
	if (typeof ans === 'string') {
	answer = ans.trim();
	} else if (Array.isArray(ans)) {
	answer = ans.join(' ').trim();
	}

	if (parsed.confidence != null) {
	const num = Number(parsed.confidence);
	if (Number.isFinite(num)) confidence = num;
	else if (typeof parsed.confidence === 'string') confidence = parsed.confidence;
	}

	if (parsed.evidence) evidence = parsed.evidence;
	if (parsed.limitations) limitations = parsed.limitations;
	} else {
	// fallback: extract thought block or <think>
	const tBlock = extractThoughtBlock(raw);
	if (tBlock) thought = tBlock;
	// fallback: extract visible chain-of-thought tags if present
	const thinkMatch = typeof raw === 'string'
	? raw.match(/<think>([\s\S]*?)<\/think>/i)
	: null;
	thought = thought \|\| (thinkMatch ? thinkMatch[1].trim() : null);
	if (thinkMatch) {
	answer = raw.slice(thinkMatch.index + thinkMatch[0].length).trim();
	}
	}
	}

	if (!thought && raw) {
	thought = raw;
	}

	return {
	raw,
	thinking: thinkingObj,
	thought,
	answer,
	confidence,
	evidence,
	limitations,
	question,
	context: contextChunks,
	rawJson,
	};
	}

	export default { runGenerator };