File size: 1,009 Bytes
5c5b371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import * as tokenizer from "./mistral-tokenizer-js";
import { MistralAIChatMessage } from "../api-schemas";

export function init() {
  tokenizer.initializemistralTokenizer();
  return true;
}

export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
  if (typeof prompt === "string") {
    return getTextTokenCount(prompt);
  }

  let chunks = [];
  for (const message of prompt) {
    switch (message.role) {
      case "system":
        chunks.push(message.content);
        break;
      case "assistant":
        chunks.push(message.content + "</s>");
        break;
      case "user":
        chunks.push("[INST] " + message.content + " [/INST]");
        break;
    }
  }
  return getTextTokenCount(chunks.join(" "));
}

function getTextTokenCount(prompt: string) {
  if (prompt.length > 800000) {
    throw new Error("Content is too large to tokenize.");
  }

  return {
    tokenizer: "mistral-tokenizer-js",
    token_count: tokenizer.encode(prompt.normalize("NFKC"))!.length,
  };
}