File size: 879 Bytes
e3aec01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.3.0';

/**
 * A class to handle loading and using the Transformers.js AutoTokenizer.
 * Coded by Jason Mayes 2026.
 */
export class Tokenizer {
  constructor() {
    this.tokenizer = undefined;
  }

  /**
   * Loads the tokenizer from a pretrained model ID.
   * @param {string} tokenizerId The ID of the pretrained tokenizer.
   * @return {Promise<void>}
   */
  async load(tokenizerId) {
    this.tokenizer = await AutoTokenizer.from_pretrained(tokenizerId);
  }

  /**
   * Encodes text into token IDs.
   * @param {string} text The text to encode.
   * @return {Promise<Array<number>>} Array of token IDs.
   */
  async encode(text) {
    if (!this.tokenizer) {
      throw new Error('Tokenizer not loaded. Call load() first.');
    }
    return await this.tokenizer.encode(text);
  }
}