File size: 7,628 Bytes
2b64d42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | import { describe, it, beforeEach } from 'node:test';
import assert from 'node:assert/strict';
import { extractCachePolicy } from '../src/handlers/messages.js';
import { handleMessages } from '../src/handlers/messages.js';
import { checkin as poolCheckin, checkout as poolCheckout, poolClear } from '../src/conversation-pool.js';
// Anthropic prompt-caching markers (cache_control: { type: 'ephemeral',
// ttl?: '5m' | '1h' }) appear on tools[], system[] blocks, and
// messages[].content[] blocks. Cascade upstream doesn't speak this
// dialect — the proxy parses, summarises, and strips them so they
// don't leak into Cascade requests, then attributes the resulting
// cache_creation tokens to ephemeral_5m or ephemeral_1h based on the
// presence of any 1h marker.
describe('extractCachePolicy — strip + summarise cache_control markers', () => {
it('counts 5m markers across tools, system, messages and strips them', () => {
const body = {
tools: [
{ name: 't1', cache_control: { type: 'ephemeral' } },
{ name: 't2' },
],
system: [
{ type: 'text', text: 'sys1' },
{ type: 'text', text: 'sys2', cache_control: { type: 'ephemeral', ttl: '5m' } },
],
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hello' },
{ type: 'text', text: 'tagged', cache_control: { type: 'ephemeral' } },
] },
],
};
const policy = extractCachePolicy(body);
assert.equal(policy.breakpointCount, 3);
assert.equal(policy.has1h, false);
// markers stripped in place
assert.equal(body.tools[0].cache_control, undefined);
assert.equal(body.system[1].cache_control, undefined);
assert.equal(body.messages[0].content[1].cache_control, undefined);
});
it('flags has1h when any marker requests 1h ttl', () => {
const body = {
system: [
{ type: 'text', text: 'a', cache_control: { type: 'ephemeral', ttl: '5m' } },
{ type: 'text', text: 'b', cache_control: { type: 'ephemeral', ttl: '1h' } },
],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 2);
assert.equal(p.has1h, true);
});
it('returns zero policy and no mutation when no markers present', () => {
const body = {
tools: [{ name: 't' }],
system: [{ type: 'text', text: 'x' }],
messages: [{ role: 'user', content: 'hi' }],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 0);
assert.equal(p.has1h, false);
});
it('strips top-level cache_control auto-cache hint', () => {
const body = {
cache_control: { type: 'ephemeral', ttl: '1h' },
messages: [{ role: 'user', content: 'hi' }],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 1);
assert.equal(p.has1h, true);
assert.equal(body.cache_control, undefined);
});
it('does not throw on malformed bodies', () => {
assert.doesNotThrow(() => extractCachePolicy({}));
assert.doesNotThrow(() => extractCachePolicy({ tools: null, system: 'x' }));
assert.doesNotThrow(() => extractCachePolicy({ messages: [{ role: 'user', content: null }] }));
});
});
describe('handleMessages — cache_control round-trip into Anthropic usage shape', () => {
function fakeChat(usagePatch) {
return {
async handleChatCompletions(body, ctx) {
// body.__cachePolicy must reach chat.js
return {
status: 200,
body: {
id: 'chat_1', object: 'chat.completion', created: 1, model: body.model,
choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
usage: {
prompt_tokens: 10, completion_tokens: 1, total_tokens: 11,
prompt_tokens_details: { cached_tokens: 0 },
cache_creation_input_tokens: 100,
cache_read_input_tokens: 0,
...usagePatch,
},
},
};
},
};
}
it('5m markers route creation tokens to ephemeral_5m_input_tokens', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hi', cache_control: { type: 'ephemeral' } },
] },
],
}, fakeChat({
cache_creation_input_tokens: 100,
cache_creation: { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 0 },
}));
assert.equal(result.status, 200);
assert.equal(result.body.usage.cache_creation_input_tokens, 100);
assert.deepEqual(result.body.usage.cache_creation, {
ephemeral_5m_input_tokens: 100,
ephemeral_1h_input_tokens: 0,
});
});
it('1h markers route creation tokens to ephemeral_1h_input_tokens', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hi', cache_control: { type: 'ephemeral', ttl: '1h' } },
] },
],
}, fakeChat({
cache_creation_input_tokens: 200,
cache_creation: { ephemeral_5m_input_tokens: 0, ephemeral_1h_input_tokens: 200 },
}));
assert.equal(result.status, 200);
assert.equal(result.body.usage.cache_creation_input_tokens, 200);
assert.deepEqual(result.body.usage.cache_creation, {
ephemeral_5m_input_tokens: 0,
ephemeral_1h_input_tokens: 200,
});
});
it('cascade pool entry honours ttlHintMs longer than default', async () => {
poolClear();
const baseEntry = {
cascadeId: 'c1', sessionId: 's1', lsPort: 12345, apiKey: 'k',
createdAt: Date.now(),
};
// Default-TTL entry: should expire at the 30-min default.
poolCheckin('fp_default', { ...baseEntry });
// 1h-hint entry: should outlive the default.
poolCheckin('fp_1h', { ...baseEntry }, '', 90 * 60 * 1000);
// After 35 min the default entry is gone, the 1h entry remains.
// We can't fast-forward time without mocking; instead simulate by
// mutating lastAccess on the stored entries directly via checkout +
// re-checkin with an old timestamp, but the simpler check is just
// that the entry struct keeps the hint. Verify by checkout while
// both are still fresh (< pool default), then by the surface fact
// that the 1h-hint entry still has its hint after restore.
const entry = poolCheckout('fp_1h');
assert.equal(entry?.ttlHintMs, 90 * 60 * 1000);
poolClear();
});
it('cascade pool checkin preserves ttlHintMs when restoring without an explicit hint', () => {
poolClear();
const e = { cascadeId: 'c', sessionId: 's', lsPort: 1, apiKey: 'k', ttlHintMs: 90 * 60 * 1000 };
poolCheckin('fp1', e);
const got = poolCheckout('fp1');
assert.equal(got.ttlHintMs, 90 * 60 * 1000);
poolClear();
});
it('emits both flat fields and nested split when no markers were sent', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [{ role: 'user', content: 'hi' }],
}, fakeChat({
cache_creation_input_tokens: 50,
}));
assert.equal(result.status, 200);
const u = result.body.usage;
// Both shapes coexist; the flat total equals the split sum.
assert.equal(u.cache_creation_input_tokens, 50);
assert.equal(u.cache_read_input_tokens, 0);
assert.equal(
u.cache_creation.ephemeral_5m_input_tokens + u.cache_creation.ephemeral_1h_input_tokens,
u.cache_creation_input_tokens,
);
});
});
|