File size: 13,605 Bytes

6fb4bfe

/**
 * InfonCorefModel — the main public class.
 *
 * Wires together: tokenizer → backbone+BIO ONNX → BIO decode →
 * mention scorer ONNX → cluster grouping. Everything else in this
 * package is a helper for one of those stages.
 */

import { decodeBio } from './bio.js';
import { fetchHubFile, fetchHubJson, hubUrl } from './hub.js';
import { createSession, makeTensor, type OrtSession, type OrtTensor } from './ort.js';
import { buildPairs, groupClusters, pickAntecedents } from './pairs.js';
import { loadTokenizer, type Tokenizer } from './tokenizer.js';
import type {
  CorefResult,
  LoadedModel,
  Mention,
  ModelOptions,
  Token,
} from './types.js';

/** Files that must exist in an HF model repo for this client to load. */
interface RepoMeta {
  hidden_size: number;
  n_bio_classes: number;
  max_length: number;
  /** ``"<s>"`` for XLM-R, ``"[CLS]"`` for BERT. */
  cls_token: string;
  /** Filenames keyed by precision. */
  files: {
    backbone_fp32?: string;
    backbone_fp16?: string;
    scorer_fp32?: string;
    scorer_fp16?: string;
    tokenizer: string;
  };
}

/**
 * Loads + runs the multilingual coreference pointer model.
 *
 * @example Browser
 * ```ts
 * import { InfonCorefModel } from '@cp500/infon-coref';
 *
 * const model = await InfonCorefModel.fromHub('cp500/infon-coref-pointer', {
 *   precision: 'fp16',
 *   device: 'auto',  // tries WebGPU, falls back to WASM
 * });
 *
 * const result = await model.resolve(
 *   "Toyota announced a partnership with Panasonic. " +
 *   "The Japanese automaker said the deal is worth $250M."
 * );
 *
 * for (const cluster of result.clusters) {
 *   const mentions = cluster.map(i => result.mentions[i].text);
 *   console.log(mentions.join(' = '));
 *   // Toyota = The Japanese automaker
 * }
 * ```
 *
 * @example Node
 * ```ts
 * import { InfonCorefModel } from '@cp500/infon-coref';
 *
 * const model = await InfonCorefModel.fromLocal('./models/coref-pointer/');
 * const result = await model.resolve(text);
 * ```
 */
export class InfonCorefModel {
  /** Built via {@link fromHub} / {@link fromLocal}; do not construct
   * directly. */
  private constructor(private readonly loaded: LoadedModel) {}

  /** Load the model artefacts from a Hugging Face repo.
   *
   * Caches downloads in the browser Cache API (named
   * ``infon-coref-v1``); subsequent loads in the same origin are
   * instant. */
  static async fromHub(
    repo: string,
    opts: ModelOptions & { revision?: string } = {},
  ): Promise<InfonCorefModel> {
    const meta = await fetchHubJson<RepoMeta>({
      repo,
      path: 'meta.json',
      revision: opts.revision,
    });
    const precision = opts.precision ?? 'fp16';
    const backboneFile =
      precision === 'fp16'
        ? meta.files.backbone_fp16 ?? meta.files.backbone_fp32
        : meta.files.backbone_fp32 ?? meta.files.backbone_fp16;
    const scorerFile =
      precision === 'fp16'
        ? meta.files.scorer_fp16 ?? meta.files.scorer_fp32
        : meta.files.scorer_fp32 ?? meta.files.scorer_fp16;
    if (!backboneFile || !scorerFile) {
      throw new Error(
        `repo ${repo} is missing backbone/scorer ONNX files; check meta.json`,
      );
    }

    // The browser will follow .onnx → .onnx.data automatically when
    // ORT is given a URL — but ORT-web's URL loader doesn't always
    // negotiate external data correctly cross-origin. Fetching to
    // ArrayBuffer once and passing buffers in is more reliable.
    const [backboneBuf, scorerBuf, tokBuf] = await Promise.all([
      fetchHubFile({ repo, path: backboneFile, revision: opts.revision }),
      fetchHubFile({ repo, path: scorerFile, revision: opts.revision }),
      fetchHubFile({
        repo,
        path: meta.files.tokenizer,
        revision: opts.revision,
      }),
    ]);
    if (opts.debug) {
      const mb = (b: ArrayBuffer) => (b.byteLength / 1e6).toFixed(1);
      console.debug(
        `[infon-coref] loaded backbone ${mb(backboneBuf)} MB, ` +
          `scorer ${mb(scorerBuf)} MB, tokenizer ${mb(tokBuf)} MB`,
      );
    }

    return InfonCorefModel.#fromBuffers(
      backboneBuf,
      scorerBuf,
      tokBuf,
      meta,
      opts,
      `hub:${repo}`,
    );
  }

  /** Load the model from a local directory.
   *
   * Browser: ``baseUrl`` is a URL prefix (``/models/coref/``). Node:
   * a filesystem path (``./models/coref/``). The directory must
   * contain ``meta.json``, the ONNX files referenced therein, and
   * ``tokenizer.json``. */
  static async fromLocal(
    baseUrl: string,
    opts: ModelOptions = {},
  ): Promise<InfonCorefModel> {
    const isPath =
      typeof window === 'undefined' && !baseUrl.startsWith('http');
    const join = (name: string) =>
      isPath
        ? `${baseUrl.replace(/\/$/, '')}/${name}`
        : new URL(name, baseUrl.endsWith('/') ? baseUrl : baseUrl + '/').href;

    const meta = await loadJson<RepoMeta>(join('meta.json'), isPath);
    const precision = opts.precision ?? 'fp16';
    const backboneFile =
      precision === 'fp16'
        ? meta.files.backbone_fp16 ?? meta.files.backbone_fp32
        : meta.files.backbone_fp32 ?? meta.files.backbone_fp16;
    const scorerFile =
      precision === 'fp16'
        ? meta.files.scorer_fp16 ?? meta.files.scorer_fp32
        : meta.files.scorer_fp32 ?? meta.files.scorer_fp16;
    if (!backboneFile || !scorerFile) {
      throw new Error(
        `local model missing backbone/scorer ONNX files in meta.json`,
      );
    }

    const [backboneBuf, scorerBuf, tokBuf] = await Promise.all([
      loadBytes(join(backboneFile), isPath),
      loadBytes(join(scorerFile), isPath),
      loadBytes(join(meta.files.tokenizer), isPath),
    ]);
    return InfonCorefModel.#fromBuffers(
      backboneBuf,
      scorerBuf,
      tokBuf,
      meta,
      opts,
      `local:${baseUrl}`,
    );
  }

  static async #fromBuffers(
    backboneBuf: ArrayBuffer,
    scorerBuf: ArrayBuffer,
    tokBuf: ArrayBuffer,
    meta: RepoMeta,
    opts: ModelOptions,
    sourceTag: string,
  ): Promise<InfonCorefModel> {
    const device = opts.device ?? 'auto';
    const [backbone, scorer, tokenizer] = await Promise.all([
      createSession(new Uint8Array(backboneBuf), device),
      createSession(new Uint8Array(scorerBuf), device),
      loadTokenizer(new Uint8Array(tokBuf)),
    ]);
    return new InfonCorefModel({
      backbone,
      scorer,
      tokenizer,
      meta: {
        hiddenSize: meta.hidden_size,
        nBioClasses: meta.n_bio_classes,
        maxLength: opts.maxLength ?? meta.max_length,
        precision: opts.precision ?? 'fp16',
        device:
          device === 'auto'
            ? typeof window !== 'undefined'
              ? 'auto-browser'
              : 'auto-node'
            : device,
      },
    });
  }

  /** Run end-to-end coreference resolution on a single document. */
  async resolve(text: string, opts: ModelOptions = {}): Promise<CorefResult> {
    const debug = opts.debug ?? false;
    const t0 = nowMs();

    // 1. Tokenize.
    const enc = this.loaded.tokenizer.tokenize(text, {
      maxLength: opts.maxLength ?? this.loaded.meta.maxLength,
    });
    const T = enc.inputIds.length;
    const t1 = nowMs();

    // 2. Backbone forward → (last_hidden_state, bio_logits).
    const idsT = await makeTensor('int64', enc.inputIds, [1, T]);
    const maskT = await makeTensor('int64', enc.attentionMask, [1, T]);
    const bbOut = await this.loaded.backbone.run({
      input_ids: idsT,
      attention_mask: maskT,
    });
    const hiddenTensor = bbOut.last_hidden_state ?? bbOut.hidden_states;
    const bioTensor = bbOut.bio_logits;
    if (!hiddenTensor || !bioTensor) {
      throw new Error(
        `backbone outputs missing; got: [${Object.keys(bbOut).join(', ')}]`,
      );
    }
    const t2 = nowMs();

    // 3. Decode BIO into wordpiece spans.
    const bioLogits = floatArray(bioTensor);
    const spans = decodeBio(
      bioLogits,
      enc.attentionMask,
      opts.bioThreshold,
    );
    const t3 = nowMs();

    if (spans.length === 0) {
      // No mentions detected — short-circuit so we don't run the
      // scorer with empty inputs (some ORT EPs choke on M=0).
      return this.#emptyResult(text, enc.tokens, [t0, t1, t2, t3]);
    }

    // 4. Build pair tensors + run scorer.
    const M = spans.length;
    const starts = BigInt64Array.from(spans.map(([s]) => BigInt(s)));
    const ends = BigInt64Array.from(spans.map(([, e]) => BigInt(e)));
    const [pairI, pairJ] = buildPairs(M);

    // Hidden is (1, T, H); the scorer wants (T, H).
    const hiddenFlat = floatArray(hiddenTensor);
    const H = this.loaded.meta.hiddenSize;
    const hiddenT = await makeTensor('float32', hiddenFlat, [T, H]);
    const startsT = await makeTensor('int64', starts, [M]);
    const endsT = await makeTensor('int64', ends, [M]);
    const piT = await makeTensor('int64', pairI, [pairI.length]);
    const pjT = await makeTensor('int64', pairJ, [pairJ.length]);
    const scOut = await this.loaded.scorer.run({
      hidden: hiddenT,
      span_starts: startsT,
      span_ends: endsT,
      pair_i: piT,
      pair_j: pjT,
    });
    const scoresTensor = scOut.pair_scores;
    if (!scoresTensor) {
      throw new Error(
        `scorer output missing 'pair_scores'; got [${Object.keys(scOut).join(', ')}]`,
      );
    }
    const scores = floatArray(scoresTensor);
    const t4 = nowMs();

    // 5. Per-mention argmax + cluster grouping.
    const decisions = pickAntecedents(M, pairI, pairJ, scores);
    const grouping = groupClusters(decisions);

    // 6. Project wordpiece spans to char offsets via the tokenizer's
    //    offset map.
    const mentions: Mention[] = spans.map(([wstart, wend], i) => {
      const sTok = enc.tokens[wstart];
      const eTok = enc.tokens[wend];
      const charStart = sTok?.start ?? 0;
      const charEnd = eTok?.end ?? charStart;
      return {
        start: wstart,
        end: wend,
        charStart,
        charEnd,
        text: text.slice(charStart, charEnd),
        cluster: grouping.cluster[i],
        antecedent: decisions[i].antecedent - 1, // 0-based; -1 = DUMMY
      };
    });

    if (debug) {
      console.debug('[infon-coref] timings (ms)', {
        tokenize: t1 - t0,
        backbone: t2 - t1,
        bioDecode: t3 - t2,
        scorer: t4 - t3,
        total: t4 - t0,
      });
    }

    return {
      text,
      tokens: enc.tokens,
      mentions,
      clusters: grouping.clusters,
      timing: {
        tokenize: t1 - t0,
        backbone: t2 - t1,
        bioDecode: t3 - t2,
        scorer: t4 - t3,
        total: t4 - t0,
      },
    };
  }

  /** Architecture metadata loaded from the repo's ``meta.json``. */
  get meta() {
    return this.loaded.meta;
  }

  #emptyResult(
    text: string,
    tokens: Token[],
    [t0, t1, t2, t3]: number[],
  ): CorefResult {
    return {
      text,
      tokens,
      mentions: [],
      clusters: [],
      timing: {
        tokenize: t1 - t0,
        backbone: t2 - t1,
        bioDecode: t3 - t2,
        scorer: 0,
        total: t3 - t0,
      },
    };
  }
}

// ── Internal helpers ────────────────────────────────────────────────

function nowMs(): number {
  return typeof performance !== 'undefined'
    ? performance.now()
    : Date.now();
}

function floatArray(t: OrtTensor): Float32Array {
  // FP32 tensors are Float32Array; FP16 ORT tensors are Uint16Array
  // bit-packed half-floats. We only run the scorer in FP32 currently
  // (it's tiny and FP16 buys nothing), but the backbone may return
  // FP16 hidden states — promote to Float32 so the scorer feed is
  // shape-correct. For FP16 → FP32 we use a quick bit-twiddle (no
  // dependency on a runtime fp16 polyfill).
  if (t.data instanceof Float32Array) return t.data;
  if (t.data instanceof Uint16Array) {
    const out = new Float32Array(t.data.length);
    for (let i = 0; i < t.data.length; i++) {
      out[i] = halfToFloat(t.data[i]);
    }
    return out;
  }
  throw new Error(
    `expected Float32Array or Uint16Array tensor, got ${(t.data as { constructor: { name: string } }).constructor.name}`,
  );
}

/** IEEE 754 half → single. Fast enough for the per-token volume we
 * see (a few thousand floats per doc); for very long inputs prefer
 * an FP16 backbone variant that ORT itself converts at the boundary. */
function halfToFloat(h: number): number {
  const sign = (h & 0x8000) >> 15;
  const exp = (h & 0x7c00) >> 10;
  const frac = h & 0x03ff;
  if (exp === 0) {
    return (sign ? -1 : 1) * Math.pow(2, -14) * (frac / 1024);
  } else if (exp === 0x1f) {
    return frac ? NaN : (sign ? -1 : 1) * Infinity;
  }
  return (sign ? -1 : 1) * Math.pow(2, exp - 15) * (1 + frac / 1024);
}

async function loadJson<T>(path: string, isFsPath: boolean): Promise<T> {
  if (isFsPath) {
    const fs = await import('node:fs/promises');
    return JSON.parse(await fs.readFile(path, 'utf-8')) as T;
  }
  const r = await fetch(path);
  if (!r.ok) throw new Error(`fetch ${path}: ${r.status}`);
  return (await r.json()) as T;
}

async function loadBytes(
  path: string,
  isFsPath: boolean,
): Promise<ArrayBuffer> {
  if (isFsPath) {
    const fs = await import('node:fs/promises');
    const buf = await fs.readFile(path);
    return buf.buffer.slice(
      buf.byteOffset,
      buf.byteOffset + buf.byteLength,
    );
  }
  const r = await fetch(path);
  if (!r.ok) throw new Error(`fetch ${path}: ${r.status}`);
  return await r.arrayBuffer();
}