W / test /cache-control.test.js
Ac66's picture
Upload folder using huggingface_hub
2b64d42 verified
import { describe, it, beforeEach } from 'node:test';
import assert from 'node:assert/strict';
import { extractCachePolicy } from '../src/handlers/messages.js';
import { handleMessages } from '../src/handlers/messages.js';
import { checkin as poolCheckin, checkout as poolCheckout, poolClear } from '../src/conversation-pool.js';
// Anthropic prompt-caching markers (cache_control: { type: 'ephemeral',
// ttl?: '5m' | '1h' }) appear on tools[], system[] blocks, and
// messages[].content[] blocks. Cascade upstream doesn't speak this
// dialect — the proxy parses, summarises, and strips them so they
// don't leak into Cascade requests, then attributes the resulting
// cache_creation tokens to ephemeral_5m or ephemeral_1h based on the
// presence of any 1h marker.
describe('extractCachePolicy — strip + summarise cache_control markers', () => {
it('counts 5m markers across tools, system, messages and strips them', () => {
const body = {
tools: [
{ name: 't1', cache_control: { type: 'ephemeral' } },
{ name: 't2' },
],
system: [
{ type: 'text', text: 'sys1' },
{ type: 'text', text: 'sys2', cache_control: { type: 'ephemeral', ttl: '5m' } },
],
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hello' },
{ type: 'text', text: 'tagged', cache_control: { type: 'ephemeral' } },
] },
],
};
const policy = extractCachePolicy(body);
assert.equal(policy.breakpointCount, 3);
assert.equal(policy.has1h, false);
// markers stripped in place
assert.equal(body.tools[0].cache_control, undefined);
assert.equal(body.system[1].cache_control, undefined);
assert.equal(body.messages[0].content[1].cache_control, undefined);
});
it('flags has1h when any marker requests 1h ttl', () => {
const body = {
system: [
{ type: 'text', text: 'a', cache_control: { type: 'ephemeral', ttl: '5m' } },
{ type: 'text', text: 'b', cache_control: { type: 'ephemeral', ttl: '1h' } },
],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 2);
assert.equal(p.has1h, true);
});
it('returns zero policy and no mutation when no markers present', () => {
const body = {
tools: [{ name: 't' }],
system: [{ type: 'text', text: 'x' }],
messages: [{ role: 'user', content: 'hi' }],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 0);
assert.equal(p.has1h, false);
});
it('strips top-level cache_control auto-cache hint', () => {
const body = {
cache_control: { type: 'ephemeral', ttl: '1h' },
messages: [{ role: 'user', content: 'hi' }],
};
const p = extractCachePolicy(body);
assert.equal(p.breakpointCount, 1);
assert.equal(p.has1h, true);
assert.equal(body.cache_control, undefined);
});
it('does not throw on malformed bodies', () => {
assert.doesNotThrow(() => extractCachePolicy({}));
assert.doesNotThrow(() => extractCachePolicy({ tools: null, system: 'x' }));
assert.doesNotThrow(() => extractCachePolicy({ messages: [{ role: 'user', content: null }] }));
});
});
describe('handleMessages — cache_control round-trip into Anthropic usage shape', () => {
function fakeChat(usagePatch) {
return {
async handleChatCompletions(body, ctx) {
// body.__cachePolicy must reach chat.js
return {
status: 200,
body: {
id: 'chat_1', object: 'chat.completion', created: 1, model: body.model,
choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
usage: {
prompt_tokens: 10, completion_tokens: 1, total_tokens: 11,
prompt_tokens_details: { cached_tokens: 0 },
cache_creation_input_tokens: 100,
cache_read_input_tokens: 0,
...usagePatch,
},
},
};
},
};
}
it('5m markers route creation tokens to ephemeral_5m_input_tokens', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hi', cache_control: { type: 'ephemeral' } },
] },
],
}, fakeChat({
cache_creation_input_tokens: 100,
cache_creation: { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 0 },
}));
assert.equal(result.status, 200);
assert.equal(result.body.usage.cache_creation_input_tokens, 100);
assert.deepEqual(result.body.usage.cache_creation, {
ephemeral_5m_input_tokens: 100,
ephemeral_1h_input_tokens: 0,
});
});
it('1h markers route creation tokens to ephemeral_1h_input_tokens', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [
{ role: 'user', content: [
{ type: 'text', text: 'hi', cache_control: { type: 'ephemeral', ttl: '1h' } },
] },
],
}, fakeChat({
cache_creation_input_tokens: 200,
cache_creation: { ephemeral_5m_input_tokens: 0, ephemeral_1h_input_tokens: 200 },
}));
assert.equal(result.status, 200);
assert.equal(result.body.usage.cache_creation_input_tokens, 200);
assert.deepEqual(result.body.usage.cache_creation, {
ephemeral_5m_input_tokens: 0,
ephemeral_1h_input_tokens: 200,
});
});
it('cascade pool entry honours ttlHintMs longer than default', async () => {
poolClear();
const baseEntry = {
cascadeId: 'c1', sessionId: 's1', lsPort: 12345, apiKey: 'k',
createdAt: Date.now(),
};
// Default-TTL entry: should expire at the 30-min default.
poolCheckin('fp_default', { ...baseEntry });
// 1h-hint entry: should outlive the default.
poolCheckin('fp_1h', { ...baseEntry }, '', 90 * 60 * 1000);
// After 35 min the default entry is gone, the 1h entry remains.
// We can't fast-forward time without mocking; instead simulate by
// mutating lastAccess on the stored entries directly via checkout +
// re-checkin with an old timestamp, but the simpler check is just
// that the entry struct keeps the hint. Verify by checkout while
// both are still fresh (< pool default), then by the surface fact
// that the 1h-hint entry still has its hint after restore.
const entry = poolCheckout('fp_1h');
assert.equal(entry?.ttlHintMs, 90 * 60 * 1000);
poolClear();
});
it('cascade pool checkin preserves ttlHintMs when restoring without an explicit hint', () => {
poolClear();
const e = { cascadeId: 'c', sessionId: 's', lsPort: 1, apiKey: 'k', ttlHintMs: 90 * 60 * 1000 };
poolCheckin('fp1', e);
const got = poolCheckout('fp1');
assert.equal(got.ttlHintMs, 90 * 60 * 1000);
poolClear();
});
it('emits both flat fields and nested split when no markers were sent', async () => {
const result = await handleMessages({
model: 'claude-sonnet-4.6',
max_tokens: 16,
messages: [{ role: 'user', content: 'hi' }],
}, fakeChat({
cache_creation_input_tokens: 50,
}));
assert.equal(result.status, 200);
const u = result.body.usage;
// Both shapes coexist; the flat total equals the split sum.
assert.equal(u.cache_creation_input_tokens, 50);
assert.equal(u.cache_read_input_tokens, 0);
assert.equal(
u.cache_creation.ephemeral_5m_input_tokens + u.cache_creation.ephemeral_1h_input_tokens,
u.cache_creation_input_tokens,
);
});
});