| | const Anthropic = require('@anthropic-ai/sdk'); |
| | const { logger } = require('@librechat/data-schemas'); |
| | const { HttpsProxyAgent } = require('https-proxy-agent'); |
| | const { |
| | Constants, |
| | ErrorTypes, |
| | EModelEndpoint, |
| | parseTextParts, |
| | anthropicSettings, |
| | getResponseSender, |
| | validateVisionModel, |
| | } = require('librechat-data-provider'); |
| | const { sleep, SplitStreamHandler: _Handler, addCacheControl } = require('@librechat/agents'); |
| | const { |
| | Tokenizer, |
| | createFetch, |
| | matchModelName, |
| | getClaudeHeaders, |
| | getModelMaxTokens, |
| | configureReasoning, |
| | checkPromptCacheSupport, |
| | getModelMaxOutputTokens, |
| | createStreamEventHandlers, |
| | } = require('@librechat/api'); |
| | const { |
| | truncateText, |
| | formatMessage, |
| | titleFunctionPrompt, |
| | parseParamFromPrompt, |
| | createContextHandlers, |
| | } = require('./prompts'); |
| | const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); |
| | const { encodeAndFormat } = require('~/server/services/Files/images/encode'); |
| | const BaseClient = require('./BaseClient'); |
| |
|
| | const HUMAN_PROMPT = '\n\nHuman:'; |
| | const AI_PROMPT = '\n\nAssistant:'; |
| |
|
| | class SplitStreamHandler extends _Handler { |
| | getDeltaContent(chunk) { |
| | return (chunk?.delta?.text ?? chunk?.completion) || ''; |
| | } |
| | getReasoningDelta(chunk) { |
| | return chunk?.delta?.thinking || ''; |
| | } |
| | } |
| |
|
| | |
| | function delayBeforeRetry(attempts, baseDelay = 1000) { |
| | return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts)); |
| | } |
| |
|
| | const tokenEventTypes = new Set(['message_start', 'message_delta']); |
| | const { legacy } = anthropicSettings; |
| |
|
| | class AnthropicClient extends BaseClient { |
| | constructor(apiKey, options = {}) { |
| | super(apiKey, options); |
| | this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY; |
| | this.userLabel = HUMAN_PROMPT; |
| | this.assistantLabel = AI_PROMPT; |
| | this.contextStrategy = options.contextStrategy |
| | ? options.contextStrategy.toLowerCase() |
| | : 'discard'; |
| | this.setOptions(options); |
| | |
| | this.systemMessage; |
| | |
| | this.message_start; |
| | |
| | this.message_delta; |
| | |
| | |
| | this.isClaudeLatest; |
| | |
| | |
| | this.useMessages; |
| | |
| | |
| | this.supportsCacheControl; |
| | |
| | |
| | this.inputTokensKey = 'input_tokens'; |
| | |
| | |
| | this.outputTokensKey = 'output_tokens'; |
| | |
| | this.streamHandler; |
| | } |
| |
|
| | setOptions(options) { |
| | if (this.options && !this.options.replaceOptions) { |
| | |
| | this.options.modelOptions = { |
| | ...this.options.modelOptions, |
| | ...options.modelOptions, |
| | }; |
| | delete options.modelOptions; |
| | |
| | this.options = { |
| | ...this.options, |
| | ...options, |
| | }; |
| | } else { |
| | this.options = options; |
| | } |
| |
|
| | this.modelOptions = Object.assign( |
| | { |
| | model: anthropicSettings.model.default, |
| | }, |
| | this.modelOptions, |
| | this.options.modelOptions, |
| | ); |
| |
|
| | const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic); |
| | this.isClaudeLatest = |
| | /claude-[3-9]/.test(modelMatch) || /claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch); |
| | const isLegacyOutput = !( |
| | /claude-3[-.]5-sonnet/.test(modelMatch) || |
| | /claude-3[-.]7/.test(modelMatch) || |
| | /claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch) || |
| | /claude-[4-9]/.test(modelMatch) |
| | ); |
| | this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch); |
| |
|
| | if ( |
| | isLegacyOutput && |
| | this.modelOptions.maxOutputTokens && |
| | this.modelOptions.maxOutputTokens > legacy.maxOutputTokens.default |
| | ) { |
| | this.modelOptions.maxOutputTokens = legacy.maxOutputTokens.default; |
| | } |
| |
|
| | this.useMessages = this.isClaudeLatest || !!this.options.attachments; |
| |
|
| | this.defaultVisionModel = this.options.visionModel ?? 'claude-3-sonnet-20240229'; |
| | this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); |
| |
|
| | this.maxContextTokens = |
| | this.options.maxContextTokens ?? |
| | getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? |
| | 100000; |
| | this.maxResponseTokens = |
| | this.modelOptions.maxOutputTokens ?? |
| | getModelMaxOutputTokens( |
| | this.modelOptions.model, |
| | this.options.endpointType ?? this.options.endpoint, |
| | this.options.endpointTokenConfig, |
| | ) ?? |
| | anthropicSettings.maxOutputTokens.reset(this.modelOptions.model); |
| | this.maxPromptTokens = |
| | this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; |
| |
|
| | const reservedTokens = this.maxPromptTokens + this.maxResponseTokens; |
| | if (reservedTokens > this.maxContextTokens) { |
| | const info = `Total Possible Tokens + Max Output Tokens must be less than or equal to Max Context Tokens: ${this.maxPromptTokens} (total possible output) + ${this.maxResponseTokens} (max output) = ${reservedTokens}/${this.maxContextTokens} (max context)`; |
| | const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; |
| | logger.warn(info); |
| | throw new Error(errorMessage); |
| | } else if (this.maxResponseTokens === this.maxContextTokens) { |
| | const info = `Max Output Tokens must be less than Max Context Tokens: ${this.maxResponseTokens} (max output) = ${this.maxContextTokens} (max context)`; |
| | const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; |
| | logger.warn(info); |
| | throw new Error(errorMessage); |
| | } |
| |
|
| | this.sender = |
| | this.options.sender ?? |
| | getResponseSender({ |
| | model: this.modelOptions.model, |
| | endpoint: EModelEndpoint.anthropic, |
| | modelLabel: this.options.modelLabel, |
| | }); |
| |
|
| | this.startToken = '||>'; |
| | this.endToken = ''; |
| |
|
| | return this; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | getClient(requestOptions) { |
| | |
| | const options = { |
| | fetch: createFetch({ |
| | directEndpoint: this.options.directEndpoint, |
| | reverseProxyUrl: this.options.reverseProxyUrl, |
| | }), |
| | apiKey: this.apiKey, |
| | fetchOptions: {}, |
| | }; |
| |
|
| | if (this.options.proxy) { |
| | options.fetchOptions.agent = new HttpsProxyAgent(this.options.proxy); |
| | } |
| |
|
| | if (this.options.reverseProxyUrl) { |
| | options.baseURL = this.options.reverseProxyUrl; |
| | } |
| |
|
| | const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl); |
| | if (headers) { |
| | options.defaultHeaders = headers; |
| | } |
| |
|
| | return new Anthropic(options); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | getStreamUsage() { |
| | const inputUsage = this.message_start?.message?.usage ?? {}; |
| | const outputUsage = this.message_delta?.usage ?? {}; |
| | return Object.assign({}, inputUsage, outputUsage); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { |
| | const originalEstimate = tokenCountMap[currentMessageId] || 0; |
| |
|
| | if (!usage || typeof usage.input_tokens !== 'number') { |
| | return originalEstimate; |
| | } |
| |
|
| | tokenCountMap[currentMessageId] = 0; |
| | const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { |
| | const numCount = Number(count); |
| | return sum + (isNaN(numCount) ? 0 : numCount); |
| | }, 0); |
| | const totalInputTokens = |
| | (usage.input_tokens ?? 0) + |
| | (usage.cache_creation_input_tokens ?? 0) + |
| | (usage.cache_read_input_tokens ?? 0); |
| |
|
| | const currentMessageTokens = totalInputTokens - totalTokensFromMap; |
| | return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | getTokenCountForResponse(responseMessage) { |
| | return this.getTokenCountForMessage({ |
| | role: 'assistant', |
| | content: responseMessage.text, |
| | }); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | checkVisionRequest(attachments) { |
| | const availableModels = this.options.modelsConfig?.[EModelEndpoint.anthropic]; |
| | this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels }); |
| |
|
| | const visionModelAvailable = availableModels?.includes(this.defaultVisionModel); |
| | if ( |
| | attachments && |
| | attachments.some((file) => file?.type && file?.type?.includes('image')) && |
| | visionModelAvailable && |
| | !this.isVisionModel |
| | ) { |
| | this.modelOptions.model = this.defaultVisionModel; |
| | this.isVisionModel = true; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | calculateImageTokenCost({ width, height }) { |
| | return Math.ceil((width * height) / 750); |
| | } |
| |
|
| | async addImageURLs(message, attachments) { |
| | const { files, image_urls } = await encodeAndFormat(this.options.req, attachments, { |
| | endpoint: EModelEndpoint.anthropic, |
| | }); |
| | message.image_urls = image_urls.length ? image_urls : undefined; |
| | return files; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | async recordTokenUsage({ promptTokens, completionTokens, usage, model, context = 'message' }) { |
| | if (usage != null && usage?.input_tokens != null) { |
| | const input = usage.input_tokens ?? 0; |
| | const write = usage.cache_creation_input_tokens ?? 0; |
| | const read = usage.cache_read_input_tokens ?? 0; |
| |
|
| | await spendStructuredTokens( |
| | { |
| | context, |
| | user: this.user, |
| | conversationId: this.conversationId, |
| | model: model ?? this.modelOptions.model, |
| | endpointTokenConfig: this.options.endpointTokenConfig, |
| | }, |
| | { |
| | promptTokens: { input, write, read }, |
| | completionTokens, |
| | }, |
| | ); |
| |
|
| | return; |
| | } |
| |
|
| | await spendTokens( |
| | { |
| | context, |
| | user: this.user, |
| | conversationId: this.conversationId, |
| | model: model ?? this.modelOptions.model, |
| | endpointTokenConfig: this.options.endpointTokenConfig, |
| | }, |
| | { promptTokens, completionTokens }, |
| | ); |
| | } |
| |
|
| | async buildMessages(messages, parentMessageId) { |
| | const orderedMessages = this.constructor.getMessagesForConversation({ |
| | messages, |
| | parentMessageId, |
| | }); |
| |
|
| | logger.debug('[AnthropicClient] orderedMessages', { orderedMessages, parentMessageId }); |
| |
|
| | if (this.options.attachments) { |
| | const attachments = await this.options.attachments; |
| | const images = attachments.filter((file) => file.type.includes('image')); |
| |
|
| | if (images.length && !this.isVisionModel) { |
| | throw new Error('Images are only supported with the Claude 3 family of models'); |
| | } |
| |
|
| | const latestMessage = orderedMessages[orderedMessages.length - 1]; |
| |
|
| | if (this.message_file_map) { |
| | this.message_file_map[latestMessage.messageId] = attachments; |
| | } else { |
| | this.message_file_map = { |
| | [latestMessage.messageId]: attachments, |
| | }; |
| | } |
| |
|
| | const files = await this.addImageURLs(latestMessage, attachments); |
| |
|
| | this.options.attachments = files; |
| | } |
| |
|
| | if (this.message_file_map) { |
| | this.contextHandlers = createContextHandlers( |
| | this.options.req, |
| | orderedMessages[orderedMessages.length - 1].text, |
| | ); |
| | } |
| |
|
| | const formattedMessages = orderedMessages.map((message, i) => { |
| | const formattedMessage = this.useMessages |
| | ? formatMessage({ |
| | message, |
| | endpoint: EModelEndpoint.anthropic, |
| | }) |
| | : { |
| | author: message.isCreatedByUser ? this.userLabel : this.assistantLabel, |
| | content: message?.content ?? message.text, |
| | }; |
| |
|
| | const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount; |
| | |
| | if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { |
| | orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage); |
| | } |
| |
|
| | |
| | if (this.message_file_map && this.message_file_map[message.messageId]) { |
| | const attachments = this.message_file_map[message.messageId]; |
| | for (const file of attachments) { |
| | if (file.embedded) { |
| | this.contextHandlers?.processFile(file); |
| | continue; |
| | } |
| | if (file.metadata?.fileIdentifier) { |
| | continue; |
| | } |
| |
|
| | orderedMessages[i].tokenCount += this.calculateImageTokenCost({ |
| | width: file.width, |
| | height: file.height, |
| | }); |
| | } |
| | } |
| |
|
| | formattedMessage.tokenCount = orderedMessages[i].tokenCount; |
| | return formattedMessage; |
| | }); |
| |
|
| | if (this.contextHandlers) { |
| | this.augmentedPrompt = await this.contextHandlers.createContext(); |
| | this.options.promptPrefix = this.augmentedPrompt + (this.options.promptPrefix ?? ''); |
| | } |
| |
|
| | let { context: messagesInWindow, remainingContextTokens } = |
| | await this.getMessagesWithinTokenLimit({ messages: formattedMessages }); |
| |
|
| | const tokenCountMap = orderedMessages |
| | .slice(orderedMessages.length - messagesInWindow.length) |
| | .reduce((map, message, index) => { |
| | const { messageId } = message; |
| | if (!messageId) { |
| | return map; |
| | } |
| |
|
| | map[messageId] = orderedMessages[index].tokenCount; |
| | return map; |
| | }, {}); |
| |
|
| | logger.debug('[AnthropicClient]', { |
| | messagesInWindow: messagesInWindow.length, |
| | remainingContextTokens, |
| | }); |
| |
|
| | let lastAuthor = ''; |
| | let groupedMessages = []; |
| |
|
| | for (let i = 0; i < messagesInWindow.length; i++) { |
| | const message = messagesInWindow[i]; |
| | const author = message.role ?? message.author; |
| | |
| | if (lastAuthor !== author) { |
| | const newMessage = { |
| | content: [message.content], |
| | }; |
| |
|
| | if (message.role) { |
| | newMessage.role = message.role; |
| | } else { |
| | newMessage.author = message.author; |
| | } |
| |
|
| | groupedMessages.push(newMessage); |
| | lastAuthor = author; |
| | |
| | } else { |
| | groupedMessages[groupedMessages.length - 1].content.push(message.content); |
| | } |
| | } |
| |
|
| | groupedMessages = groupedMessages.map((msg, i) => { |
| | const isLast = i === groupedMessages.length - 1; |
| | if (msg.content.length === 1) { |
| | const content = msg.content[0]; |
| | return { |
| | ...msg, |
| | |
| | content: |
| | isLast && this.useMessages && msg.role === 'assistant' && typeof content === 'string' |
| | ? content?.trim() |
| | : content, |
| | }; |
| | } |
| |
|
| | if (!this.useMessages && msg.tokenCount) { |
| | delete msg.tokenCount; |
| | } |
| |
|
| | return msg; |
| | }); |
| |
|
| | let identityPrefix = ''; |
| | if (this.options.userLabel) { |
| | identityPrefix = `\nHuman's name: ${this.options.userLabel}`; |
| | } |
| |
|
| | if (this.options.modelLabel) { |
| | identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`; |
| | } |
| |
|
| | let promptPrefix = (this.options.promptPrefix ?? '').trim(); |
| | if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { |
| | promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); |
| | } |
| | if (promptPrefix) { |
| | |
| | if (!promptPrefix.endsWith(`${this.endToken}`)) { |
| | promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`; |
| | } |
| | promptPrefix = `\nContext:\n${promptPrefix}`; |
| | } |
| |
|
| | if (identityPrefix) { |
| | promptPrefix = `${identityPrefix}${promptPrefix}`; |
| | } |
| |
|
| | |
| | let isEdited = lastAuthor === this.assistantLabel; |
| | const promptSuffix = isEdited ? '' : `${promptPrefix}${this.assistantLabel}\n`; |
| | let currentTokenCount = |
| | isEdited || this.useMessages |
| | ? this.getTokenCount(promptPrefix) |
| | : this.getTokenCount(promptSuffix); |
| |
|
| | let promptBody = ''; |
| | const maxTokenCount = this.maxPromptTokens; |
| |
|
| | const context = []; |
| |
|
| | |
| | |
| | |
| | |
| | const nextMessage = { |
| | remove: false, |
| | tokenCount: 0, |
| | messageString: '', |
| | }; |
| |
|
| | const buildPromptBody = async () => { |
| | if (currentTokenCount < maxTokenCount && groupedMessages.length > 0) { |
| | const message = groupedMessages.pop(); |
| | const isCreatedByUser = message.author === this.userLabel; |
| | |
| | const messagePrefix = |
| | isCreatedByUser || !isEdited ? message.author : `${promptPrefix}${message.author}`; |
| | const messageString = `${messagePrefix}\n${message.content}${this.endToken}\n`; |
| | let newPromptBody = `${messageString}${promptBody}`; |
| |
|
| | context.unshift(message); |
| |
|
| | const tokenCountForMessage = this.getTokenCount(messageString); |
| | const newTokenCount = currentTokenCount + tokenCountForMessage; |
| |
|
| | if (!isCreatedByUser) { |
| | nextMessage.messageString = messageString; |
| | nextMessage.tokenCount = tokenCountForMessage; |
| | } |
| |
|
| | if (newTokenCount > maxTokenCount) { |
| | if (!promptBody) { |
| | |
| | throw new Error( |
| | `Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`, |
| | ); |
| | } |
| |
|
| | |
| | |
| | if (isCreatedByUser) { |
| | nextMessage.remove = true; |
| | } |
| |
|
| | return false; |
| | } |
| | promptBody = newPromptBody; |
| | currentTokenCount = newTokenCount; |
| |
|
| | |
| | if (isEdited) { |
| | isEdited = false; |
| | } |
| |
|
| | |
| | await new Promise((resolve) => setImmediate(resolve)); |
| | return buildPromptBody(); |
| | } |
| | return true; |
| | }; |
| |
|
| | const messagesPayload = []; |
| | const buildMessagesPayload = async () => { |
| | let canContinue = true; |
| |
|
| | if (promptPrefix) { |
| | this.systemMessage = promptPrefix; |
| | } |
| |
|
| | while (currentTokenCount < maxTokenCount && groupedMessages.length > 0 && canContinue) { |
| | const message = groupedMessages.pop(); |
| |
|
| | let tokenCountForMessage = message.tokenCount ?? this.getTokenCountForMessage(message); |
| |
|
| | const newTokenCount = currentTokenCount + tokenCountForMessage; |
| | const exceededMaxCount = newTokenCount > maxTokenCount; |
| |
|
| | if (exceededMaxCount && messagesPayload.length === 0) { |
| | throw new Error( |
| | `Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`, |
| | ); |
| | } else if (exceededMaxCount) { |
| | canContinue = false; |
| | break; |
| | } |
| |
|
| | delete message.tokenCount; |
| | messagesPayload.unshift(message); |
| | currentTokenCount = newTokenCount; |
| |
|
| | |
| | if (isEdited && message.role === 'assistant') { |
| | isEdited = false; |
| | } |
| |
|
| | |
| | await new Promise((resolve) => setImmediate(resolve)); |
| | } |
| | }; |
| |
|
| | const processTokens = () => { |
| | |
| | currentTokenCount += 2; |
| |
|
| | |
| | this.modelOptions.maxOutputTokens = Math.min( |
| | this.maxContextTokens - currentTokenCount, |
| | this.maxResponseTokens, |
| | ); |
| | }; |
| |
|
| | if ( |
| | /claude-[3-9]/.test(this.modelOptions.model) || |
| | /claude-(?:sonnet|opus|haiku)-[4-9]/.test(this.modelOptions.model) |
| | ) { |
| | await buildMessagesPayload(); |
| | processTokens(); |
| | return { |
| | prompt: messagesPayload, |
| | context: messagesInWindow, |
| | promptTokens: currentTokenCount, |
| | tokenCountMap, |
| | }; |
| | } else { |
| | await buildPromptBody(); |
| | processTokens(); |
| | } |
| |
|
| | if (nextMessage.remove) { |
| | promptBody = promptBody.replace(nextMessage.messageString, ''); |
| | currentTokenCount -= nextMessage.tokenCount; |
| | context.shift(); |
| | } |
| |
|
| | let prompt = `${promptBody}${promptSuffix}`; |
| |
|
| | return { prompt, context, promptTokens: currentTokenCount, tokenCountMap }; |
| | } |
| |
|
| | getCompletion() { |
| | logger.debug("AnthropicClient doesn't use getCompletion (all handled in sendCompletion)"); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | async createResponse(client, options, useMessages) { |
| | return (useMessages ?? this.useMessages) |
| | ? await client.messages.create(options) |
| | : await client.completions.create(options); |
| | } |
| |
|
| | getMessageMapMethod() { |
| | |
| | |
| | |
| | return (msg) => { |
| | if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) { |
| | msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim(); |
| | } else if (msg.content != null) { |
| | msg.text = parseTextParts(msg.content, true); |
| | delete msg.content; |
| | } |
| |
|
| | return msg; |
| | }; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | getStreamText(intermediateReply) { |
| | if (!this.streamHandler) { |
| | return intermediateReply?.join('') ?? ''; |
| | } |
| |
|
| | const reasoningText = this.streamHandler.reasoningTokens.join(''); |
| |
|
| | const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : ''; |
| |
|
| | return `${reasoningBlock}${this.streamHandler.tokens.join('')}`; |
| | } |
| |
|
| | async sendCompletion(payload, { onProgress, abortController }) { |
| | if (!abortController) { |
| | abortController = new AbortController(); |
| | } |
| |
|
| | const { signal } = abortController; |
| |
|
| | const modelOptions = { ...this.modelOptions }; |
| | if (typeof onProgress === 'function') { |
| | modelOptions.stream = true; |
| | } |
| |
|
| | logger.debug('modelOptions', { modelOptions }); |
| | const metadata = { |
| | user_id: this.user, |
| | }; |
| |
|
| | const { |
| | stream, |
| | model, |
| | temperature, |
| | maxOutputTokens, |
| | stop: stop_sequences, |
| | topP: top_p, |
| | topK: top_k, |
| | } = this.modelOptions; |
| |
|
| | let requestOptions = { |
| | model, |
| | stream: stream || true, |
| | stop_sequences, |
| | temperature, |
| | metadata, |
| | }; |
| |
|
| | if (this.useMessages) { |
| | requestOptions.messages = payload; |
| | requestOptions.max_tokens = |
| | maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model); |
| | } else { |
| | requestOptions.prompt = payload; |
| | requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default; |
| | } |
| |
|
| | requestOptions = configureReasoning(requestOptions, { |
| | thinking: this.options.thinking, |
| | thinkingBudget: this.options.thinkingBudget, |
| | }); |
| |
|
| | if (!/claude-3[-.]7/.test(model)) { |
| | requestOptions.top_p = top_p; |
| | requestOptions.top_k = top_k; |
| | } else if (requestOptions.thinking == null) { |
| | requestOptions.topP = top_p; |
| | requestOptions.topK = top_k; |
| | } |
| |
|
| | if (this.systemMessage && this.supportsCacheControl === true) { |
| | requestOptions.system = [ |
| | { |
| | type: 'text', |
| | text: this.systemMessage, |
| | cache_control: { type: 'ephemeral' }, |
| | }, |
| | ]; |
| | } else if (this.systemMessage) { |
| | requestOptions.system = this.systemMessage; |
| | } |
| |
|
| | if (this.supportsCacheControl === true && this.useMessages) { |
| | requestOptions.messages = addCacheControl(requestOptions.messages); |
| | } |
| |
|
| | logger.debug('[AnthropicClient]', { ...requestOptions }); |
| | const handlers = createStreamEventHandlers(this.options.res); |
| | this.streamHandler = new SplitStreamHandler({ |
| | accumulate: true, |
| | runId: this.responseMessageId, |
| | handlers, |
| | }); |
| |
|
| | let intermediateReply = this.streamHandler.tokens; |
| |
|
| | const maxRetries = 3; |
| | const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; |
| | async function processResponse() { |
| | let attempts = 0; |
| |
|
| | while (attempts < maxRetries) { |
| | let response; |
| | try { |
| | const client = this.getClient(requestOptions); |
| | response = await this.createResponse(client, requestOptions); |
| |
|
| | signal.addEventListener('abort', () => { |
| | logger.debug('[AnthropicClient] message aborted!'); |
| | if (response.controller?.abort) { |
| | response.controller.abort(); |
| | } |
| | }); |
| |
|
| | for await (const completion of response) { |
| | const type = completion?.type ?? ''; |
| | if (tokenEventTypes.has(type)) { |
| | logger.debug(`[AnthropicClient] ${type}`, completion); |
| | this[type] = completion; |
| | } |
| | this.streamHandler.handle(completion); |
| | await sleep(streamRate); |
| | } |
| |
|
| | break; |
| | } catch (error) { |
| | attempts += 1; |
| | logger.warn( |
| | `User: ${this.user} | Anthropic Request ${attempts} failed: ${error.message}`, |
| | ); |
| |
|
| | if (attempts < maxRetries) { |
| | await delayBeforeRetry(attempts, 350); |
| | } else if (this.streamHandler && this.streamHandler.reasoningTokens.length) { |
| | return this.getStreamText(); |
| | } else if (intermediateReply.length > 0) { |
| | return this.getStreamText(intermediateReply); |
| | } else { |
| | throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`); |
| | } |
| | } finally { |
| | signal.removeEventListener('abort', () => { |
| | logger.debug('[AnthropicClient] message aborted!'); |
| | if (response.controller?.abort) { |
| | response.controller.abort(); |
| | } |
| | }); |
| | } |
| | } |
| | } |
| |
|
| | await processResponse.bind(this)(); |
| | return this.getStreamText(intermediateReply); |
| | } |
| |
|
| | getSaveOptions() { |
| | return { |
| | maxContextTokens: this.options.maxContextTokens, |
| | artifacts: this.options.artifacts, |
| | promptPrefix: this.options.promptPrefix, |
| | modelLabel: this.options.modelLabel, |
| | promptCache: this.options.promptCache, |
| | thinking: this.options.thinking, |
| | thinkingBudget: this.options.thinkingBudget, |
| | resendFiles: this.options.resendFiles, |
| | iconURL: this.options.iconURL, |
| | greeting: this.options.greeting, |
| | spec: this.options.spec, |
| | ...this.modelOptions, |
| | }; |
| | } |
| |
|
| | getBuildMessagesOptions() { |
| | logger.debug("AnthropicClient doesn't use getBuildMessagesOptions"); |
| | } |
| |
|
| | getEncoding() { |
| | return 'cl100k_base'; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | getTokenCount(text) { |
| | const encoding = this.getEncoding(); |
| | return Tokenizer.getTokenCount(text, encoding); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | async titleConvo({ text, responseText = '' }) { |
| | let title = 'New Chat'; |
| | this.message_delta = undefined; |
| | this.message_start = undefined; |
| | const convo = `<initial_message> |
| | ${truncateText(text)} |
| | </initial_message> |
| | <response> |
| | ${JSON.stringify(truncateText(responseText))} |
| | </response>`; |
| |
|
| | const { ANTHROPIC_TITLE_MODEL } = process.env ?? {}; |
| | const model = this.options.titleModel ?? ANTHROPIC_TITLE_MODEL ?? 'claude-3-haiku-20240307'; |
| | const system = titleFunctionPrompt; |
| |
|
| | const titleChatCompletion = async () => { |
| | const content = `<conversation_context> |
| | ${convo} |
| | </conversation_context> |
| | |
| | Please generate a title for this conversation.`; |
| |
|
| | const titleMessage = { role: 'user', content }; |
| | const requestOptions = { |
| | model, |
| | temperature: 0.3, |
| | max_tokens: 1024, |
| | system, |
| | stop_sequences: ['\n\nHuman:', '\n\nAssistant', '</function_calls>'], |
| | messages: [titleMessage], |
| | }; |
| |
|
| | try { |
| | const response = await this.createResponse( |
| | this.getClient(requestOptions), |
| | requestOptions, |
| | true, |
| | ); |
| | let promptTokens = response?.usage?.input_tokens; |
| | let completionTokens = response?.usage?.output_tokens; |
| | if (!promptTokens) { |
| | promptTokens = this.getTokenCountForMessage(titleMessage); |
| | promptTokens += this.getTokenCountForMessage({ role: 'system', content: system }); |
| | } |
| | if (!completionTokens) { |
| | completionTokens = this.getTokenCountForMessage(response.content[0]); |
| | } |
| | await this.recordTokenUsage({ |
| | model, |
| | promptTokens, |
| | completionTokens, |
| | context: 'title', |
| | }); |
| | const text = response.content[0].text; |
| | title = parseParamFromPrompt(text, 'title'); |
| | } catch (e) { |
| | logger.error('[AnthropicClient] There was an issue generating the title', e); |
| | } |
| | }; |
| |
|
| | await titleChatCompletion(); |
| | logger.debug('[AnthropicClient] Convo Title: ' + title); |
| | return title; |
| | } |
| | } |
| |
|
| | module.exports = AnthropicClient; |
| |
|