File size: 7,358 Bytes
5c5b371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
import { Tiktoken } from "tiktoken/lite";
import cl100k_base from "tiktoken/encoders/cl100k_base.json";
import { logger } from "../../logger";
import { libSharp } from "../file-storage";
import { GoogleAIChatMessage, OpenAIChatMessage } from "../api-schemas";
const log = logger.child({ module: "tokenizer", service: "openai" });
const GPT4_VISION_SYSTEM_PROMPT_SIZE = 170;
let encoder: Tiktoken;
export function init() {
encoder = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
return true;
}
// Tested against:
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
export async function getTokenCount(
prompt: string | OpenAIChatMessage[],
model: string
) {
if (typeof prompt === "string") {
return getTextTokenCount(prompt);
}
const oldFormatting = model.startsWith("turbo-0301");
const vision = model.includes("vision");
const tokensPerMessage = oldFormatting ? 4 : 3;
const tokensPerName = oldFormatting ? -1 : 1; // older formatting replaces role with name if name is present
let numTokens = vision ? GPT4_VISION_SYSTEM_PROMPT_SIZE : 0;
for (const message of prompt) {
numTokens += tokensPerMessage;
for (const key of Object.keys(message)) {
{
let textContent: string = "";
const value = message[key as keyof OpenAIChatMessage];
if (!value) continue;
if (key === 'function_call') continue;
if (Array.isArray(value)) {
for (const item of value) {
if (item.type === "text") {
textContent += item.text;
} else if (["image", "image_url"].includes(item.type)) {
const { url, detail } = item.image_url;
const cost = await getGpt4VisionTokenCost(url, detail);
numTokens += cost ?? 0;
}
}
} else {
textContent = value as string;
}
if (textContent.length > 800000 || numTokens > 200000) {
throw new Error("Content is too large to tokenize.");
}
numTokens += encoder.encode(textContent).length;
if (key === "name") {
numTokens += tokensPerName;
}
}
}
}
numTokens += 3; // every reply is primed with <|start|>assistant<|message|>
return { tokenizer: "tiktoken", token_count: numTokens };
}
async function getGpt4VisionTokenCost(
url: string,
detail: "auto" | "low" | "high" = "auto"
) {
// For now we do not allow remote images as the proxy would have to download
// them, which is a potential DoS vector.
if (!url.startsWith("data:image/")) {
throw new Error(
"Remote images are not supported. Add the image to your prompt as a base64 data URL."
);
}
const base64Data = url.split(",")[1];
const buffer = Buffer.from(base64Data, "base64");
const image = libSharp(buffer);
const metadata = await image.metadata();
if (!metadata || !metadata.width || !metadata.height) {
throw new Error("Prompt includes an image that could not be parsed");
}
const { width, height } = metadata;
let selectedDetail: "low" | "high";
if (detail === "auto") {
const threshold = 512 * 512;
const imageSize = width * height;
selectedDetail = imageSize > threshold ? "high" : "low";
} else {
selectedDetail = detail;
}
// https://platform.openai.com/docs/guides/vision/calculating-costs
if (selectedDetail === "low") {
log.info(
{ width, height, tokens: 85 },
"Using fixed GPT-4-Vision token cost for low detail image"
);
return 85;
}
let newWidth = width;
let newHeight = height;
if (width > 2048 || height > 2048) {
const aspectRatio = width / height;
if (width > height) {
newWidth = 2048;
newHeight = Math.round(2048 / aspectRatio);
} else {
newHeight = 2048;
newWidth = Math.round(2048 * aspectRatio);
}
}
if (newWidth < newHeight) {
newHeight = Math.round((newHeight / newWidth) * 768);
newWidth = 768;
} else {
newWidth = Math.round((newWidth / newHeight) * 768);
newHeight = 768;
}
const tiles = Math.ceil(newWidth / 512) * Math.ceil(newHeight / 512);
const tokens = 170 * tiles + 85;
log.info(
{ width, height, newWidth, newHeight, tiles, tokens },
"Calculated GPT-4-Vision token cost for high detail image"
);
return tokens;
}
function getTextTokenCount(prompt: string) {
if (prompt.length > 500000) {
return {
tokenizer: "length fallback",
token_count: 100000,
};
}
return {
tokenizer: "tiktoken",
token_count: encoder.encode(prompt).length,
};
}
// Model Resolution Price
// DALL路E 3 1024脳1024 $0.040 / image
// 1024脳1792, 1792脳1024 $0.080 / image
// DALL路E 3 HD 1024脳1024 $0.080 / image
// 1024脳1792, 1792脳1024 $0.120 / image
// DALL路E 2 1024脳1024 $0.020 / image
// 512脳512 $0.018 / image
// 256脳256 $0.016 / image
export const DALLE_TOKENS_PER_DOLLAR = 100000;
/**
* OpenAI image generation with DALL-E doesn't use tokens but everything else
* in the application does. There is a fixed cost for each image generation
* request depending on the model and selected quality/resolution parameters,
* which we convert to tokens at a rate of 100000 tokens per dollar.
*/
export function getOpenAIImageCost(params: {
model: "dall-e-2" | "dall-e-3" | "gpt-image-1";
quality: "standard" | "hd" | "high" | "medium" | "low" | "auto";
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024" | "1536x1024" | "1024x1536" | "auto";
n: number | null;
}) {
const { model, quality, resolution, n } = params;
const usd = (() => {
switch (model) {
case "dall-e-2":
switch (resolution) {
case "512x512":
return 0.018;
case "256x256":
return 0.016;
case "1024x1024":
return 0.02;
default:
throw new Error("Invalid resolution");
}
case "dall-e-3":
switch (resolution) {
case "1024x1024":
return quality === "standard" ? 0.04 : 0.08;
case "1024x1792":
case "1792x1024":
return quality === "standard" ? 0.08 : 0.12;
default:
throw new Error("Invalid resolution");
}
case "gpt-image-1":
// gpt-image-1 pricing is approximately $0.04 per image
// This is a simplified pricing model, adjust as needed based on official pricing
return 0.04;
default:
throw new Error("Invalid image generation model");
}
})();
const tokens = (n ?? 1) * (usd * DALLE_TOKENS_PER_DOLLAR);
return {
tokenizer: `openai-image cost`,
token_count: Math.ceil(tokens),
};
}
export function estimateGoogleAITokenCount(
prompt: string | GoogleAIChatMessage[]
) {
if (typeof prompt === "string") {
return getTextTokenCount(prompt);
}
const tokensPerMessage = 3;
let numTokens = 0;
for (const message of prompt) {
numTokens += tokensPerMessage;
const textPart = message.parts.find(p => 'text' in p) as { text: string } | undefined;
if (textPart) {
numTokens += encoder.encode(textPart.text).length;
}
}
numTokens += 3;
return {
tokenizer: "tiktoken (google-ai estimate)",
token_count: numTokens,
};
}
|