File size: 3,969 Bytes
5c5b371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import { getTokenizer } from "@anthropic-ai/tokenizer";
import { Tiktoken } from "tiktoken/lite";
import { AnthropicChatMessage } from "../api-schemas";
import { libSharp } from "../file-storage";
import { logger } from "../../logger";

const log = logger.child({ module: "tokenizer", service: "anthropic" });

let encoder: Tiktoken;
let userRoleCount = 0;
let assistantRoleCount = 0;

export function init() {
  // they export a `countTokens` function too but it instantiates a new
  // tokenizer every single time and it is not fast...
  encoder = getTokenizer();
  userRoleCount = encoder.encode("\n\nHuman: ", "all").length;
  assistantRoleCount = encoder.encode("\n\nAssistant: ", "all").length;
  return true;
}

export async function getTokenCount(
  prompt: string | { system: string; messages: AnthropicChatMessage[] }
) {
  if (typeof prompt !== "string") {
    return getTokenCountForMessages(prompt);
  }

  if (prompt.length > 800000) {
    throw new Error("Content is too large to tokenize.");
  }

  return {
    tokenizer: "@anthropic-ai/tokenizer",
    token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
  };
}

async function getTokenCountForMessages({
  system,
  messages,
}: {
  system: string;
  messages: AnthropicChatMessage[];
}) {
  let numTokens = 0;

  numTokens += (await getTokenCount(system)).token_count;

  for (const message of messages) {
    const { content, role } = message;
    numTokens += role === "user" ? userRoleCount : assistantRoleCount;

    const parts = Array.isArray(content)
      ? content
      : [{ type: "text" as const, text: content }];

    for (const part of parts) {
      switch (part.type) {
        case "text":
          const { text } = part;
          if (text.length > 800000 || numTokens > 200000) {
            throw new Error("Text content is too large to tokenize.");
          }
          numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
          break;
        case "image":
          numTokens += await getImageTokenCount(part.source.data);
          break;
       case "tool_use":
       case "tool_result":
          break;
        default:
          throw new Error(`Unsupported Anthropic content type.`);
      }
    }
  }

  if (messages[messages.length - 1].role !== "assistant") {
    numTokens += assistantRoleCount;
  }

  return { tokenizer: "@anthropic-ai/tokenizer", token_count: numTokens };
}

async function getImageTokenCount(b64: string) {
  // https://docs.anthropic.com/claude/docs/vision
  // If your image's long edge is more than 1568 pixels, or your image is more
  // than ~1600 tokens, it will first be scaled down, preserving aspect ratio,
  // until it is within size limits. Assuming your image does not need to be
  // resized, you can estimate the number of tokens used via this simple
  // algorithm:
  // tokens = (width px * height px)/750

  const buffer = Buffer.from(b64, "base64");
  const image = libSharp(buffer);
  const metadata = await image.metadata();

  if (!metadata || !metadata.width || !metadata.height) {
    throw new Error("Prompt includes an image that could not be parsed");
  }

  const MAX_TOKENS = 1600;
  const MAX_LENGTH_PX = 1568;
  const PIXELS_PER_TOKEN = 750;
  const { width, height } = metadata;
  let tokens = (width * height) / PIXELS_PER_TOKEN;

  // Resize the image if it's too large
  if (tokens > MAX_TOKENS || width > MAX_LENGTH_PX || height > MAX_LENGTH_PX) {
    const longestEdge = Math.max(width, height);

    let factor;
    if (tokens > MAX_TOKENS) {
      const targetPixels = PIXELS_PER_TOKEN * MAX_TOKENS;
      factor = Math.sqrt(targetPixels / (width * height));
    } else {
      factor = MAX_LENGTH_PX / longestEdge;
    }

    const scaledWidth = width * factor;
    const scaledHeight = height * factor;

    tokens = (scaledWidth * scaledHeight) / 750;
  }

  log.debug({ width, height, tokens }, "Calculated Claude Vision token cost");
  return Math.ceil(tokens);
}