Spaces:
Running
Running
File size: 879 Bytes
e3aec01 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.3.0';
/**
* A class to handle loading and using the Transformers.js AutoTokenizer.
* Coded by Jason Mayes 2026.
*/
export class Tokenizer {
constructor() {
this.tokenizer = undefined;
}
/**
* Loads the tokenizer from a pretrained model ID.
* @param {string} tokenizerId The ID of the pretrained tokenizer.
* @return {Promise<void>}
*/
async load(tokenizerId) {
this.tokenizer = await AutoTokenizer.from_pretrained(tokenizerId);
}
/**
* Encodes text into token IDs.
* @param {string} text The text to encode.
* @return {Promise<Array<number>>} Array of token IDs.
*/
async encode(text) {
if (!this.tokenizer) {
throw new Error('Tokenizer not loaded. Call load() first.');
}
return await this.tokenizer.encode(text);
}
}
|