htaf commited on
Commit
b14f0ba
·
1 Parent(s): fad3187

verifier works better now

Browse files
src/pipeline/step.mjs CHANGED
@@ -158,11 +158,32 @@ export async function runPipelineStep({
158
 
159
  if (verbose) {
160
  if (gen?.thought) {
 
 
 
 
161
  log(' [generator] thought:');
162
- log(' ' + preview(gen.thought, 300).replace(/\n/g, '\n '));
163
  }
164
  log(' [generator] answer:');
165
  log(' ' + preview(gen?.answer ?? '', 400).replace(/\n/g, '\n '));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
  } catch (e) {
168
  const msg = e?.message || String(e);
 
158
 
159
  if (verbose) {
160
  if (gen?.thought) {
161
+ const thoughtPreview =
162
+ typeof gen.thought === 'string'
163
+ ? gen.thought
164
+ : JSON.stringify(gen.thought, null, 2);
165
  log(' [generator] thought:');
166
+ log(' ' + preview(thoughtPreview, 500).replace(/\n/g, '\n '));
167
  }
168
  log(' [generator] answer:');
169
  log(' ' + preview(gen?.answer ?? '', 400).replace(/\n/g, '\n '));
170
+ if (gen?.confidence) {
171
+ log(' [generator] confidence: ' + gen.confidence);
172
+ }
173
+ if (gen?.evidence) {
174
+ log(
175
+ ' [generator] evidence: ' +
176
+ preview(
177
+ Array.isArray(gen.evidence)
178
+ ? gen.evidence.join(' | ')
179
+ : String(gen.evidence),
180
+ 400,
181
+ ).replace(/\n/g, '\n '),
182
+ );
183
+ }
184
+ if (gen?.limitations) {
185
+ log(' [generator] limitations: ' + preview(gen.limitations, 200));
186
+ }
187
  }
188
  } catch (e) {
189
  const msg = e?.message || String(e);
src/verifier/verifier_core.mjs CHANGED
@@ -121,6 +121,19 @@ export async function runVerifier({ question, context, gen }, provider) {
121
  }
122
  }
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return { raw, ok, score, reasoning, error };
125
  }
126
 
 
121
  }
122
  }
123
 
124
+ // Fallback: raw PASS/FAIL tokens even if parsing failed
125
+ if (!ok && typeof raw === 'string') {
126
+ if (/pass/i.test(raw) && !/fail/i.test(raw)) {
127
+ score = score ?? 'PASS';
128
+ ok = true;
129
+ error = null;
130
+ } else if (/fail/i.test(raw) && !/pass/i.test(raw)) {
131
+ score = score ?? 'FAIL';
132
+ ok = false;
133
+ error = null;
134
+ }
135
+ }
136
+
137
  return { raw, ok, score, reasoning, error };
138
  }
139
 
tests/verifier_core.test.mjs CHANGED
@@ -160,4 +160,24 @@ describe('verifier_core.mjs', () => {
160
  expect(res.score).toBe('PASS');
161
  expect(Array.isArray(res.reasoning) || res.reasoning == null).toBe(true);
162
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  });
 
160
  expect(res.score).toBe('PASS');
161
  expect(Array.isArray(res.reasoning) || res.reasoning == null).toBe(true);
162
  });
163
+
164
+ it('treats raw PASS token as ok', async () => {
165
+ const sample = {
166
+ question: 'What is love?',
167
+ context: [{ content: 'ctx' }],
168
+ gen: { answer: 'Reasonable answer', raw: 'Reasonable answer' },
169
+ };
170
+
171
+ const provider = {
172
+ generate: vi.fn(async () => 'PROMPT = PASS'),
173
+ };
174
+
175
+ const res = await runVerifier(
176
+ { question: sample.question, context: sample.context, gen: sample.gen },
177
+ provider,
178
+ );
179
+
180
+ expect(res.ok).toBe(true);
181
+ expect(res.score).toBe('PASS');
182
+ });
183
  });