distill-pipeline / src /verifier /verifier_core.mjs
htaf's picture
verifier works better now
b14f0ba
// src/verifier/verifier_core.mjs
import fs from "fs/promises";
import path from "path";
async function loadTemplate() {
const filePath = path.resolve(
path.dirname(new URL(import.meta.url).pathname),
"..",
"..",
"prompts",
"verifier_prompt.txt"
);
return await fs.readFile(filePath, "utf8");
}
export async function runVerifier({ question, context, gen }, provider) {
const tmpl = await loadTemplate();
const ctxText = context
.map((c) => c.content || c.text || "")
.join("\n\n---\n\n");
const prompt = tmpl
.replace(/{question}/g, question)
.replace(/{answer}/g, gen.answer || '')
.replace(/{context}/g, ctxText);
const raw = await provider.generate(prompt);
// Parse strict JSON format:
// {"REASONING": <bullet points>, "SCORE": <final score>}
let ok = false;
let score = null;
let reasoning = null;
let error = null;
const rawLower = typeof raw === 'string' ? raw.toLowerCase() : '';
if (/score/i.test(raw) && /pass/i.test(raw)) {
score = 'PASS';
ok = true;
} else if (/score/i.test(raw) && /fail/i.test(raw)) {
score = 'FAIL';
ok = false;
}
const safeParse = (txt) => {
try {
return JSON.parse(txt);
} catch {
return null;
}
};
const parseJsonLoose = (text) => {
if (!text || typeof text !== 'string') return null;
// Trim and try direct parse first
const direct = text.trim();
const parsedDirect = safeParse(direct);
if (parsedDirect) return parsedDirect;
// Heuristic: grab the substring between first { and last }
const start = direct.indexOf('{');
const end = direct.lastIndexOf('}');
let candidate =
start !== -1 && end !== -1 && end > start
? direct.slice(start, end + 1)
: direct;
// Fix stray PROMPT = ... prefix into JSON key
candidate = candidate.replace(/^\s*PROMPT\s*=/i, '"PROMPT":');
// Fix unquoted PASS/FAIL tokens after "SCORE":
candidate = candidate
.replace(/"SCORE"\s*:\s*PASS/gi, '"SCORE":"PASS"')
.replace(/"SCORE"\s*:\s*FAIL/gi, '"SCORE":"FAIL"')
// also tolerate SCORE: PASS without quotes or braces nearby
.replace(/\bSCORE\s*:\s*PASS\b/gi, '"SCORE":"PASS"')
.replace(/\bSCORE\s*:\s*FAIL\b/gi, '"SCORE":"FAIL"');
// If still missing braces, wrap
const trimmed = candidate.trim();
const hasLeadingBrace = trimmed.startsWith('{');
const hasTrailingBrace = trimmed.endsWith('}');
if (!hasLeadingBrace) candidate = `{${candidate}`;
if (!hasTrailingBrace) candidate = `${candidate}}`;
return safeParse(candidate);
};
const parsed = parseJsonLoose(raw);
if (!parsed) {
error = 'invalid_json';
} else {
reasoning = parsed?.REASONING ?? null;
if (Object.prototype.hasOwnProperty.call(parsed, 'SCORE')) {
const s = parsed.SCORE;
// Accept PASS/FAIL strings from finetuned verifier
if (typeof s === 'string') {
const trimmed = s.trim().toLowerCase();
if (trimmed === 'pass') {
score = 'PASS';
ok = true;
} else if (trimmed === 'fail') {
score = 'FAIL';
ok = false;
} else {
const num = Number(s);
if (Number.isFinite(num)) {
score = num;
ok = num >= 0.5;
}
}
} else if (typeof s === 'number') {
score = s;
ok = s >= 0.5;
}
} else {
error = 'missing_score';
}
}
// Fallback: raw PASS/FAIL tokens even if parsing failed
if (!ok && typeof raw === 'string') {
if (/pass/i.test(raw) && !/fail/i.test(raw)) {
score = score ?? 'PASS';
ok = true;
error = null;
} else if (/fail/i.test(raw) && !/pass/i.test(raw)) {
score = score ?? 'FAIL';
ok = false;
error = null;
}
}
return { raw, ok, score, reasoning, error };
}
export default { runVerifier };