import { test } from 'node:test'; import { strict as assert } from 'node:assert'; import { decodeBio, BIO_O, BIO_B, BIO_I } from '../src/bio.js'; /** Build flat (T, 3) logits from a list of class indices. Each row * gets a +5 bump for the chosen class so argmax is unambiguous. */ function logitsFromLabels(labels: number[]): Float32Array { const out = new Float32Array(labels.length * 3); for (let t = 0; t < labels.length; t++) { out[t * 3 + labels[t]] = 5; } return out; } test('decodeBio: B I I O → one span', () => { const logits = logitsFromLabels([BIO_B, BIO_I, BIO_I, BIO_O]); assert.deepEqual(decodeBio(logits), [[0, 2]]); }); test('decodeBio: orphan I is dropped', () => { // I at the start without a preceding B should be silently skipped // (matches Python valid_only=True). const logits = logitsFromLabels([BIO_I, BIO_I, BIO_O, BIO_B, BIO_I]); assert.deepEqual(decodeBio(logits), [[3, 4]]); }); test('decodeBio: attention mask zeroes positions to O', () => { const logits = logitsFromLabels([BIO_B, BIO_I, BIO_I, BIO_B]); const attn = BigInt64Array.from([1n, 1n, 0n, 0n]); // Positions 2,3 are masked → contribute O. So we get B I → span [0,1]. assert.deepEqual(decodeBio(logits, attn), [[0, 1]]); }); test('decodeBio: threshold suppresses low-confidence spans', () => { // Build a row where B has prob ~0.4 (below default 0.5). const logits = new Float32Array(3 * 3); // row 0: O=0.1, B=0.4 (weakest), I=0.5 (max but I-without-B is invalid) logits[0] = -1; logits[1] = 0.0; logits[2] = 0.4; // row 1: O=2, B=-1, I=-1 → O logits[3] = 2; logits[4] = -1; logits[5] = -1; // row 2: O=0, B=2, I=0 → B (high confidence) logits[6] = 0; logits[7] = 2; logits[8] = 0; const spans = decodeBio(logits, undefined, 0.7); // The high-confidence B at position 2 should be the only B; its // span runs to end-of-sequence (just position 2). assert.deepEqual(spans, [[2, 2]]); }); test('decodeBio: empty input', () => { assert.deepEqual(decodeBio(new Float32Array(0)), []); });