File size: 3,969 Bytes
5c5b371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import { getTokenizer } from "@anthropic-ai/tokenizer";
import { Tiktoken } from "tiktoken/lite";
import { AnthropicChatMessage } from "../api-schemas";
import { libSharp } from "../file-storage";
import { logger } from "../../logger";
const log = logger.child({ module: "tokenizer", service: "anthropic" });
let encoder: Tiktoken;
let userRoleCount = 0;
let assistantRoleCount = 0;
export function init() {
// they export a `countTokens` function too but it instantiates a new
// tokenizer every single time and it is not fast...
encoder = getTokenizer();
userRoleCount = encoder.encode("\n\nHuman: ", "all").length;
assistantRoleCount = encoder.encode("\n\nAssistant: ", "all").length;
return true;
}
export async function getTokenCount(
prompt: string | { system: string; messages: AnthropicChatMessage[] }
) {
if (typeof prompt !== "string") {
return getTokenCountForMessages(prompt);
}
if (prompt.length > 800000) {
throw new Error("Content is too large to tokenize.");
}
return {
tokenizer: "@anthropic-ai/tokenizer",
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
};
}
async function getTokenCountForMessages({
system,
messages,
}: {
system: string;
messages: AnthropicChatMessage[];
}) {
let numTokens = 0;
numTokens += (await getTokenCount(system)).token_count;
for (const message of messages) {
const { content, role } = message;
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
const parts = Array.isArray(content)
? content
: [{ type: "text" as const, text: content }];
for (const part of parts) {
switch (part.type) {
case "text":
const { text } = part;
if (text.length > 800000 || numTokens > 200000) {
throw new Error("Text content is too large to tokenize.");
}
numTokens += encoder.encode(text.normalize("NFKC"), "all").length;
break;
case "image":
numTokens += await getImageTokenCount(part.source.data);
break;
case "tool_use":
case "tool_result":
break;
default:
throw new Error(`Unsupported Anthropic content type.`);
}
}
}
if (messages[messages.length - 1].role !== "assistant") {
numTokens += assistantRoleCount;
}
return { tokenizer: "@anthropic-ai/tokenizer", token_count: numTokens };
}
async function getImageTokenCount(b64: string) {
// https://docs.anthropic.com/claude/docs/vision
// If your image's long edge is more than 1568 pixels, or your image is more
// than ~1600 tokens, it will first be scaled down, preserving aspect ratio,
// until it is within size limits. Assuming your image does not need to be
// resized, you can estimate the number of tokens used via this simple
// algorithm:
// tokens = (width px * height px)/750
const buffer = Buffer.from(b64, "base64");
const image = libSharp(buffer);
const metadata = await image.metadata();
if (!metadata || !metadata.width || !metadata.height) {
throw new Error("Prompt includes an image that could not be parsed");
}
const MAX_TOKENS = 1600;
const MAX_LENGTH_PX = 1568;
const PIXELS_PER_TOKEN = 750;
const { width, height } = metadata;
let tokens = (width * height) / PIXELS_PER_TOKEN;
// Resize the image if it's too large
if (tokens > MAX_TOKENS || width > MAX_LENGTH_PX || height > MAX_LENGTH_PX) {
const longestEdge = Math.max(width, height);
let factor;
if (tokens > MAX_TOKENS) {
const targetPixels = PIXELS_PER_TOKEN * MAX_TOKENS;
factor = Math.sqrt(targetPixels / (width * height));
} else {
factor = MAX_LENGTH_PX / longestEdge;
}
const scaledWidth = width * factor;
const scaledHeight = height * factor;
tokens = (scaledWidth * scaledHeight) / 750;
}
log.debug({ width, height, tokens }, "Calculated Claude Vision token cost");
return Math.ceil(tokens);
}
|