| import type { Anthropic } from '@anthropic-ai/sdk' |
| import type { BetaMessageParam as MessageParam } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' |
| |
| |
| import type { CountTokensCommandInput } from '@aws-sdk/client-bedrock-runtime' |
| import { getAPIProvider } from 'src/utils/model/providers.js' |
| import { VERTEX_COUNT_TOKENS_ALLOWED_BETAS } from '../constants/betas.js' |
| import type { Attachment } from '../utils/attachments.js' |
| import { getModelBetas } from '../utils/betas.js' |
| import { getVertexRegionForModel, isEnvTruthy } from '../utils/envUtils.js' |
| import { logError } from '../utils/log.js' |
| import { normalizeAttachmentForAPI } from '../utils/messages.js' |
| import { |
| createBedrockRuntimeClient, |
| getInferenceProfileBackingModel, |
| isFoundationModel, |
| } from '../utils/model/bedrock.js' |
| import { |
| getDefaultSonnetModel, |
| getMainLoopModel, |
| getSmallFastModel, |
| normalizeModelStringForAPI, |
| } from '../utils/model/model.js' |
| import { jsonStringify } from '../utils/slowOperations.js' |
| import { isToolReferenceBlock } from '../utils/toolSearch.js' |
| import { getAPIMetadata, getExtraBodyParams } from './api/claude.js' |
| import { getAnthropicClient } from './api/client.js' |
| import { withTokenCountVCR } from './vcr.js' |
|
|
| |
| |
| const TOKEN_COUNT_THINKING_BUDGET = 1024 |
| const TOKEN_COUNT_MAX_TOKENS = 2048 |
|
|
| |
| |
| |
| function hasThinkingBlocks( |
| messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| ): boolean { |
| for (const message of messages) { |
| if (message.role === 'assistant' && Array.isArray(message.content)) { |
| for (const block of message.content) { |
| if ( |
| typeof block === 'object' && |
| block !== null && |
| 'type' in block && |
| (block.type === 'thinking' || block.type === 'redacted_thinking') |
| ) { |
| return true |
| } |
| } |
| } |
| } |
| return false |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| function stripToolSearchFieldsFromMessages( |
| messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| ): Anthropic.Beta.Messages.BetaMessageParam[] { |
| return messages.map(message => { |
| if (!Array.isArray(message.content)) { |
| return message |
| } |
|
|
| const normalizedContent = message.content.map(block => { |
| |
| if (block.type === 'tool_use') { |
| |
| const toolUse = |
| block as Anthropic.Beta.Messages.BetaToolUseBlockParam & { |
| caller?: unknown |
| } |
| return { |
| type: 'tool_use' as const, |
| id: toolUse.id, |
| name: toolUse.name, |
| input: toolUse.input, |
| } |
| } |
|
|
| |
| if (block.type === 'tool_result') { |
| const toolResult = |
| block as Anthropic.Beta.Messages.BetaToolResultBlockParam |
| if (Array.isArray(toolResult.content)) { |
| const filteredContent = (toolResult.content as unknown[]).filter( |
| c => !isToolReferenceBlock(c), |
| ) as typeof toolResult.content |
|
|
| if (filteredContent.length === 0) { |
| return { |
| ...toolResult, |
| content: [{ type: 'text' as const, text: '[tool references]' }], |
| } |
| } |
| if (filteredContent.length !== toolResult.content.length) { |
| return { |
| ...toolResult, |
| content: filteredContent, |
| } |
| } |
| } |
| } |
|
|
| return block |
| }) |
|
|
| return { |
| ...message, |
| content: normalizedContent, |
| } |
| }) |
| } |
|
|
| export async function countTokensWithAPI( |
| content: string, |
| ): Promise<number | null> { |
| |
| if (!content) { |
| return 0 |
| } |
|
|
| const message: Anthropic.Beta.Messages.BetaMessageParam = { |
| role: 'user', |
| content: content, |
| } |
|
|
| return countMessagesTokensWithAPI([message], []) |
| } |
|
|
| export async function countMessagesTokensWithAPI( |
| messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| tools: Anthropic.Beta.Messages.BetaToolUnion[], |
| ): Promise<number | null> { |
| return withTokenCountVCR(messages, tools, async () => { |
| try { |
| const model = getMainLoopModel() |
| const betas = getModelBetas(model) |
| const containsThinking = hasThinkingBlocks(messages) |
|
|
| if (getAPIProvider() === 'bedrock') { |
| |
| return countTokensWithBedrock({ |
| model: normalizeModelStringForAPI(model), |
| messages, |
| tools, |
| betas, |
| containsThinking, |
| }) |
| } |
|
|
| const anthropic = await getAnthropicClient({ |
| maxRetries: 1, |
| model, |
| source: 'count_tokens', |
| }) |
|
|
| const filteredBetas = |
| getAPIProvider() === 'vertex' |
| ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) |
| : betas |
|
|
| const response = await anthropic.beta.messages.countTokens({ |
| model: normalizeModelStringForAPI(model), |
| messages: |
| |
| |
| messages.length > 0 ? messages : [{ role: 'user', content: 'foo' }], |
| tools, |
| ...(filteredBetas.length > 0 && { betas: filteredBetas }), |
| |
| ...(containsThinking && { |
| thinking: { |
| type: 'enabled', |
| budget_tokens: TOKEN_COUNT_THINKING_BUDGET, |
| }, |
| }), |
| }) |
|
|
| if (typeof response.input_tokens !== 'number') { |
| |
| |
| return null |
| } |
|
|
| return response.input_tokens |
| } catch (error) { |
| logError(error) |
| return null |
| } |
| }) |
| } |
|
|
| export function roughTokenCountEstimation( |
| content: string, |
| bytesPerToken: number = 4, |
| ): number { |
| return Math.round(content.length / bytesPerToken) |
| } |
|
|
| |
| |
| |
| |
| |
| export function bytesPerTokenForFileType(fileExtension: string): number { |
| switch (fileExtension) { |
| case 'json': |
| case 'jsonl': |
| case 'jsonc': |
| return 2 |
| default: |
| return 4 |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| export function roughTokenCountEstimationForFileType( |
| content: string, |
| fileExtension: string, |
| ): number { |
| return roughTokenCountEstimation( |
| content, |
| bytesPerTokenForFileType(fileExtension), |
| ) |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| export async function countTokensViaHaikuFallback( |
| messages: Anthropic.Beta.Messages.BetaMessageParam[], |
| tools: Anthropic.Beta.Messages.BetaToolUnion[], |
| ): Promise<number | null> { |
| |
| const containsThinking = hasThinkingBlocks(messages) |
|
|
| |
| const isVertexGlobalEndpoint = |
| isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && |
| getVertexRegionForModel(getSmallFastModel()) === 'global' |
| |
| const isBedrockWithThinking = |
| isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) && containsThinking |
| |
| const isVertexWithThinking = |
| isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && containsThinking |
| |
| |
| |
| |
| |
| |
| const model = |
| isVertexGlobalEndpoint || isBedrockWithThinking || isVertexWithThinking |
| ? getDefaultSonnetModel() |
| : getSmallFastModel() |
| const anthropic = await getAnthropicClient({ |
| maxRetries: 1, |
| model, |
| source: 'count_tokens', |
| }) |
|
|
| |
| |
| const normalizedMessages = stripToolSearchFieldsFromMessages(messages) |
|
|
| const messagesToSend: MessageParam[] = |
| normalizedMessages.length > 0 |
| ? (normalizedMessages as MessageParam[]) |
| : [{ role: 'user', content: 'count' }] |
|
|
| const betas = getModelBetas(model) |
| |
| |
| const filteredBetas = |
| getAPIProvider() === 'vertex' |
| ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) |
| : betas |
|
|
| |
| const response = await anthropic.beta.messages.create({ |
| model: normalizeModelStringForAPI(model), |
| max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, |
| messages: messagesToSend, |
| tools: tools.length > 0 ? tools : undefined, |
| ...(filteredBetas.length > 0 && { betas: filteredBetas }), |
| metadata: getAPIMetadata(), |
| ...getExtraBodyParams(), |
| |
| ...(containsThinking && { |
| thinking: { |
| type: 'enabled', |
| budget_tokens: TOKEN_COUNT_THINKING_BUDGET, |
| }, |
| }), |
| }) |
|
|
| const usage = response.usage |
| const inputTokens = usage.input_tokens |
| const cacheCreationTokens = usage.cache_creation_input_tokens || 0 |
| const cacheReadTokens = usage.cache_read_input_tokens || 0 |
|
|
| return inputTokens + cacheCreationTokens + cacheReadTokens |
| } |
|
|
| export function roughTokenCountEstimationForMessages( |
| messages: readonly { |
| type: string |
| message?: { content?: unknown } |
| attachment?: Attachment |
| }[], |
| ): number { |
| let totalTokens = 0 |
| for (const message of messages) { |
| totalTokens += roughTokenCountEstimationForMessage(message) |
| } |
| return totalTokens |
| } |
|
|
| export function roughTokenCountEstimationForMessage(message: { |
| type: string |
| message?: { content?: unknown } |
| attachment?: Attachment |
| }): number { |
| if ( |
| (message.type === 'assistant' || message.type === 'user') && |
| message.message?.content |
| ) { |
| return roughTokenCountEstimationForContent( |
| message.message?.content as |
| | string |
| | Array<Anthropic.ContentBlock> |
| | Array<Anthropic.ContentBlockParam> |
| | undefined, |
| ) |
| } |
|
|
| if (message.type === 'attachment' && message.attachment) { |
| const userMessages = normalizeAttachmentForAPI(message.attachment) |
| let total = 0 |
| for (const userMsg of userMessages) { |
| total += roughTokenCountEstimationForContent(userMsg.message.content) |
| } |
| return total |
| } |
|
|
| return 0 |
| } |
|
|
| function roughTokenCountEstimationForContent( |
| content: |
| | string |
| | Array<Anthropic.ContentBlock> |
| | Array<Anthropic.ContentBlockParam> |
| | undefined, |
| ): number { |
| if (!content) { |
| return 0 |
| } |
| if (typeof content === 'string') { |
| return roughTokenCountEstimation(content) |
| } |
| let totalTokens = 0 |
| for (const block of content) { |
| totalTokens += roughTokenCountEstimationForBlock(block) |
| } |
| return totalTokens |
| } |
|
|
| function roughTokenCountEstimationForBlock( |
| block: string | Anthropic.ContentBlock | Anthropic.ContentBlockParam, |
| ): number { |
| if (typeof block === 'string') { |
| return roughTokenCountEstimation(block) |
| } |
| if (block.type === 'text') { |
| return roughTokenCountEstimation(block.text) |
| } |
| if (block.type === 'image' || block.type === 'document') { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| return 2000 |
| } |
| if (block.type === 'tool_result') { |
| return roughTokenCountEstimationForContent(block.content) |
| } |
| if (block.type === 'tool_use') { |
| |
| |
| |
| return roughTokenCountEstimation( |
| block.name + jsonStringify(block.input ?? {}), |
| ) |
| } |
| if (block.type === 'thinking') { |
| return roughTokenCountEstimation(block.thinking) |
| } |
| if (block.type === 'redacted_thinking') { |
| return roughTokenCountEstimation(block.data) |
| } |
| |
| |
| |
| |
| return roughTokenCountEstimation(jsonStringify(block)) |
| } |
|
|
| async function countTokensWithBedrock({ |
| model, |
| messages, |
| tools, |
| betas, |
| containsThinking, |
| }: { |
| model: string |
| messages: Anthropic.Beta.Messages.BetaMessageParam[] |
| tools: Anthropic.Beta.Messages.BetaToolUnion[] |
| betas: string[] |
| containsThinking: boolean |
| }): Promise<number | null> { |
| try { |
| const client = await createBedrockRuntimeClient() |
| |
| const modelId = isFoundationModel(model) |
| ? model |
| : await getInferenceProfileBackingModel(model) |
| if (!modelId) { |
| return null |
| } |
|
|
| const requestBody = { |
| anthropic_version: 'bedrock-2023-05-31', |
| |
| |
| messages: |
| messages.length > 0 ? messages : [{ role: 'user', content: 'foo' }], |
| max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, |
| ...(tools.length > 0 && { tools }), |
| ...(betas.length > 0 && { anthropic_beta: betas }), |
| ...(containsThinking && { |
| thinking: { |
| type: 'enabled', |
| budget_tokens: TOKEN_COUNT_THINKING_BUDGET, |
| }, |
| }), |
| } |
|
|
| const { CountTokensCommand } = await import( |
| '@aws-sdk/client-bedrock-runtime' |
| ) |
| const input: CountTokensCommandInput = { |
| modelId, |
| input: { |
| invokeModel: { |
| body: new TextEncoder().encode(jsonStringify(requestBody)), |
| }, |
| }, |
| } |
| const response = await client.send(new CountTokensCommand(input)) |
| const tokenCount = response.inputTokens ?? null |
| return tokenCount |
| } catch (error) { |
| logError(error) |
| return null |
| } |
| } |
|
|