File size: 1,009 Bytes
5c5b371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import * as tokenizer from "./mistral-tokenizer-js";
import { MistralAIChatMessage } from "../api-schemas";
export function init() {
tokenizer.initializemistralTokenizer();
return true;
}
export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
if (typeof prompt === "string") {
return getTextTokenCount(prompt);
}
let chunks = [];
for (const message of prompt) {
switch (message.role) {
case "system":
chunks.push(message.content);
break;
case "assistant":
chunks.push(message.content + "</s>");
break;
case "user":
chunks.push("[INST] " + message.content + " [/INST]");
break;
}
}
return getTextTokenCount(chunks.join(" "));
}
function getTextTokenCount(prompt: string) {
if (prompt.length > 800000) {
throw new Error("Content is too large to tokenize.");
}
return {
tokenizer: "mistral-tokenizer-js",
token_count: tokenizer.encode(prompt.normalize("NFKC"))!.length,
};
}
|