| import { feature } from 'bun:bundle' |
| import { markPostCompaction } from 'src/bootstrap/state.js' |
| import { getSdkBetas } from '../../bootstrap/state.js' |
| import type { QuerySource } from '../../constants/querySource.js' |
| import type { ToolUseContext } from '../../Tool.js' |
| import type { Message } from '../../types/message.js' |
| import { getGlobalConfig } from '../../utils/config.js' |
| import { getContextWindowForModel } from '../../utils/context.js' |
| import { logForDebugging } from '../../utils/debug.js' |
| import { isEnvTruthy } from '../../utils/envUtils.js' |
| import { hasExactErrorMessage } from '../../utils/errors.js' |
| import type { CacheSafeParams } from '../../utils/forkedAgent.js' |
| import { logError } from '../../utils/log.js' |
| import { tokenCountWithEstimation } from '../../utils/tokens.js' |
| import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js' |
| import { getMaxOutputTokensForModel } from '../api/claude.js' |
| import { notifyCompaction } from '../api/promptCacheBreakDetection.js' |
| import { setLastSummarizedMessageId } from '../SessionMemory/sessionMemoryUtils.js' |
| import { |
| type CompactionResult, |
| compactConversation, |
| ERROR_MESSAGE_USER_ABORT, |
| type RecompactionInfo, |
| } from './compact.js' |
| import { runPostCompactCleanup } from './postCompactCleanup.js' |
| import { trySessionMemoryCompaction } from './sessionMemoryCompact.js' |
|
|
| |
| |
| const MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20_000 |
|
|
| |
| export function getEffectiveContextWindowSize(model: string): number { |
| const reservedTokensForSummary = Math.min( |
| getMaxOutputTokensForModel(model), |
| MAX_OUTPUT_TOKENS_FOR_SUMMARY, |
| ) |
| let contextWindow = getContextWindowForModel(model, getSdkBetas()) |
|
|
| const autoCompactWindow = process.env.CLAUDE_CODE_AUTO_COMPACT_WINDOW |
| if (autoCompactWindow) { |
| const parsed = parseInt(autoCompactWindow, 10) |
| if (!isNaN(parsed) && parsed > 0) { |
| contextWindow = Math.min(contextWindow, parsed) |
| } |
| } |
|
|
| return contextWindow - reservedTokensForSummary |
| } |
|
|
| export type AutoCompactTrackingState = { |
| compacted: boolean |
| turnCounter: number |
| |
| turnId: string |
| |
| |
| |
| consecutiveFailures?: number |
| } |
|
|
| export const AUTOCOMPACT_BUFFER_TOKENS = 13_000 |
| export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000 |
| export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000 |
| export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000 |
|
|
| |
| |
| |
| const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3 |
|
|
| export function getAutoCompactThreshold(model: string): number { |
| const effectiveContextWindow = getEffectiveContextWindowSize(model) |
|
|
| const autocompactThreshold = |
| effectiveContextWindow - AUTOCOMPACT_BUFFER_TOKENS |
|
|
| |
| const envPercent = process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE |
| if (envPercent) { |
| const parsed = parseFloat(envPercent) |
| if (!isNaN(parsed) && parsed > 0 && parsed <= 100) { |
| const percentageThreshold = Math.floor( |
| effectiveContextWindow * (parsed / 100), |
| ) |
| return Math.min(percentageThreshold, autocompactThreshold) |
| } |
| } |
|
|
| return autocompactThreshold |
| } |
|
|
| export function calculateTokenWarningState( |
| tokenUsage: number, |
| model: string, |
| ): { |
| percentLeft: number |
| isAboveWarningThreshold: boolean |
| isAboveErrorThreshold: boolean |
| isAboveAutoCompactThreshold: boolean |
| isAtBlockingLimit: boolean |
| } { |
| const autoCompactThreshold = getAutoCompactThreshold(model) |
| const threshold = isAutoCompactEnabled() |
| ? autoCompactThreshold |
| : getEffectiveContextWindowSize(model) |
|
|
| const percentLeft = Math.max( |
| 0, |
| Math.round(((threshold - tokenUsage) / threshold) * 100), |
| ) |
|
|
| const warningThreshold = threshold - WARNING_THRESHOLD_BUFFER_TOKENS |
| const errorThreshold = threshold - ERROR_THRESHOLD_BUFFER_TOKENS |
|
|
| const isAboveWarningThreshold = tokenUsage >= warningThreshold |
| const isAboveErrorThreshold = tokenUsage >= errorThreshold |
|
|
| const isAboveAutoCompactThreshold = |
| isAutoCompactEnabled() && tokenUsage >= autoCompactThreshold |
|
|
| const actualContextWindow = getEffectiveContextWindowSize(model) |
| const defaultBlockingLimit = |
| actualContextWindow - MANUAL_COMPACT_BUFFER_TOKENS |
|
|
| |
| const blockingLimitOverride = process.env.CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE |
| const parsedOverride = blockingLimitOverride |
| ? parseInt(blockingLimitOverride, 10) |
| : NaN |
| const blockingLimit = |
| !isNaN(parsedOverride) && parsedOverride > 0 |
| ? parsedOverride |
| : defaultBlockingLimit |
|
|
| const isAtBlockingLimit = tokenUsage >= blockingLimit |
|
|
| return { |
| percentLeft, |
| isAboveWarningThreshold, |
| isAboveErrorThreshold, |
| isAboveAutoCompactThreshold, |
| isAtBlockingLimit, |
| } |
| } |
|
|
| export function isAutoCompactEnabled(): boolean { |
| if (isEnvTruthy(process.env.DISABLE_COMPACT)) { |
| return false |
| } |
| |
| if (isEnvTruthy(process.env.DISABLE_AUTO_COMPACT)) { |
| return false |
| } |
| |
| const userConfig = getGlobalConfig() |
| return userConfig.autoCompactEnabled |
| } |
|
|
| export async function shouldAutoCompact( |
| messages: Message[], |
| model: string, |
| querySource?: QuerySource, |
| |
| |
| |
| snipTokensFreed = 0, |
| ): Promise<boolean> { |
| |
| |
| if (querySource === 'session_memory' || querySource === 'compact') { |
| return false |
| } |
| |
| |
| |
| |
| |
| if (feature('CONTEXT_COLLAPSE')) { |
| if (querySource === 'marble_origami') { |
| return false |
| } |
| } |
|
|
| if (!isAutoCompactEnabled()) { |
| return false |
| } |
|
|
| |
| |
| |
| |
| |
| |
| if (feature('REACTIVE_COMPACT')) { |
| if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_raccoon', false)) { |
| return false |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if (feature('CONTEXT_COLLAPSE')) { |
| |
| const { isContextCollapseEnabled } = |
| require('../contextCollapse/index.js') as typeof import('../contextCollapse/index.js') |
| |
| if (isContextCollapseEnabled()) { |
| return false |
| } |
| } |
|
|
| const tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed |
| const threshold = getAutoCompactThreshold(model) |
| const effectiveWindow = getEffectiveContextWindowSize(model) |
|
|
| logForDebugging( |
| `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`, |
| ) |
|
|
| const { isAboveAutoCompactThreshold } = calculateTokenWarningState( |
| tokenCount, |
| model, |
| ) |
|
|
| return isAboveAutoCompactThreshold |
| } |
|
|
| export async function autoCompactIfNeeded( |
| messages: Message[], |
| toolUseContext: ToolUseContext, |
| cacheSafeParams: CacheSafeParams, |
| querySource?: QuerySource, |
| tracking?: AutoCompactTrackingState, |
| snipTokensFreed?: number, |
| ): Promise<{ |
| wasCompacted: boolean |
| compactionResult?: CompactionResult |
| consecutiveFailures?: number |
| }> { |
| if (isEnvTruthy(process.env.DISABLE_COMPACT)) { |
| return { wasCompacted: false } |
| } |
|
|
| |
| |
| |
| if ( |
| tracking?.consecutiveFailures !== undefined && |
| tracking.consecutiveFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES |
| ) { |
| return { wasCompacted: false } |
| } |
|
|
| const model = toolUseContext.options.mainLoopModel |
| const shouldCompact = await shouldAutoCompact( |
| messages, |
| model, |
| querySource, |
| snipTokensFreed, |
| ) |
|
|
| if (!shouldCompact) { |
| return { wasCompacted: false } |
| } |
|
|
| const recompactionInfo: RecompactionInfo = { |
| isRecompactionInChain: tracking?.compacted === true, |
| turnsSincePreviousCompact: tracking?.turnCounter ?? -1, |
| previousCompactTurnId: tracking?.turnId, |
| autoCompactThreshold: getAutoCompactThreshold(model), |
| querySource, |
| } |
|
|
| |
| const sessionMemoryResult = await trySessionMemoryCompaction( |
| messages, |
| toolUseContext.agentId, |
| recompactionInfo.autoCompactThreshold, |
| ) |
| if (sessionMemoryResult) { |
| |
| |
| setLastSummarizedMessageId(undefined) |
| runPostCompactCleanup(querySource) |
| |
| |
| |
| |
| if (feature('PROMPT_CACHE_BREAK_DETECTION')) { |
| notifyCompaction(querySource ?? 'compact', toolUseContext.agentId) |
| } |
| markPostCompaction() |
| return { |
| wasCompacted: true, |
| compactionResult: sessionMemoryResult, |
| } |
| } |
|
|
| try { |
| const compactionResult = await compactConversation( |
| messages, |
| toolUseContext, |
| cacheSafeParams, |
| true, |
| undefined, |
| true, |
| recompactionInfo, |
| ) |
|
|
| |
| |
| setLastSummarizedMessageId(undefined) |
| runPostCompactCleanup(querySource) |
|
|
| return { |
| wasCompacted: true, |
| compactionResult, |
| |
| consecutiveFailures: 0, |
| } |
| } catch (error) { |
| if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) { |
| logError(error) |
| } |
| |
| |
| |
| const prevFailures = tracking?.consecutiveFailures ?? 0 |
| const nextFailures = prevFailures + 1 |
| if (nextFailures >= MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES) { |
| logForDebugging( |
| `autocompact: circuit breaker tripped after ${nextFailures} consecutive failures — skipping future attempts this session`, |
| { level: 'warn' }, |
| ) |
| } |
| return { wasCompacted: false, consecutiveFailures: nextFailures } |
| } |
| } |
|
|