import * as tokenizer from "./mistral-tokenizer-js"; import { MistralAIChatMessage } from "../api-schemas"; export function init() { tokenizer.initializemistralTokenizer(); return true; } export function getTokenCount(prompt: MistralAIChatMessage[] | string) { if (typeof prompt === "string") { return getTextTokenCount(prompt); } let chunks = []; for (const message of prompt) { switch (message.role) { case "system": chunks.push(message.content); break; case "assistant": chunks.push(message.content + ""); break; case "user": chunks.push("[INST] " + message.content + " [/INST]"); break; } } return getTextTokenCount(chunks.join(" ")); } function getTextTokenCount(prompt: string) { if (prompt.length > 800000) { throw new Error("Content is too large to tokenize."); } return { tokenizer: "mistral-tokenizer-js", token_count: tokenizer.encode(prompt.normalize("NFKC"))!.length, }; }