W
File size: 7,628 Bytes
2b64d42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import { describe, it, beforeEach } from 'node:test';
import assert from 'node:assert/strict';
import { extractCachePolicy } from '../src/handlers/messages.js';
import { handleMessages } from '../src/handlers/messages.js';
import { checkin as poolCheckin, checkout as poolCheckout, poolClear } from '../src/conversation-pool.js';

// Anthropic prompt-caching markers (cache_control: { type: 'ephemeral',
// ttl?: '5m' | '1h' }) appear on tools[], system[] blocks, and
// messages[].content[] blocks. Cascade upstream doesn't speak this
// dialect — the proxy parses, summarises, and strips them so they
// don't leak into Cascade requests, then attributes the resulting
// cache_creation tokens to ephemeral_5m or ephemeral_1h based on the
// presence of any 1h marker.

describe('extractCachePolicy — strip + summarise cache_control markers', () => {
  it('counts 5m markers across tools, system, messages and strips them', () => {
    const body = {
      tools: [
        { name: 't1', cache_control: { type: 'ephemeral' } },
        { name: 't2' },
      ],
      system: [
        { type: 'text', text: 'sys1' },
        { type: 'text', text: 'sys2', cache_control: { type: 'ephemeral', ttl: '5m' } },
      ],
      messages: [
        { role: 'user', content: [
          { type: 'text', text: 'hello' },
          { type: 'text', text: 'tagged', cache_control: { type: 'ephemeral' } },
        ] },
      ],
    };
    const policy = extractCachePolicy(body);
    assert.equal(policy.breakpointCount, 3);
    assert.equal(policy.has1h, false);
    // markers stripped in place
    assert.equal(body.tools[0].cache_control, undefined);
    assert.equal(body.system[1].cache_control, undefined);
    assert.equal(body.messages[0].content[1].cache_control, undefined);
  });

  it('flags has1h when any marker requests 1h ttl', () => {
    const body = {
      system: [
        { type: 'text', text: 'a', cache_control: { type: 'ephemeral', ttl: '5m' } },
        { type: 'text', text: 'b', cache_control: { type: 'ephemeral', ttl: '1h' } },
      ],
    };
    const p = extractCachePolicy(body);
    assert.equal(p.breakpointCount, 2);
    assert.equal(p.has1h, true);
  });

  it('returns zero policy and no mutation when no markers present', () => {
    const body = {
      tools: [{ name: 't' }],
      system: [{ type: 'text', text: 'x' }],
      messages: [{ role: 'user', content: 'hi' }],
    };
    const p = extractCachePolicy(body);
    assert.equal(p.breakpointCount, 0);
    assert.equal(p.has1h, false);
  });

  it('strips top-level cache_control auto-cache hint', () => {
    const body = {
      cache_control: { type: 'ephemeral', ttl: '1h' },
      messages: [{ role: 'user', content: 'hi' }],
    };
    const p = extractCachePolicy(body);
    assert.equal(p.breakpointCount, 1);
    assert.equal(p.has1h, true);
    assert.equal(body.cache_control, undefined);
  });

  it('does not throw on malformed bodies', () => {
    assert.doesNotThrow(() => extractCachePolicy({}));
    assert.doesNotThrow(() => extractCachePolicy({ tools: null, system: 'x' }));
    assert.doesNotThrow(() => extractCachePolicy({ messages: [{ role: 'user', content: null }] }));
  });
});

describe('handleMessages — cache_control round-trip into Anthropic usage shape', () => {
  function fakeChat(usagePatch) {
    return {
      async handleChatCompletions(body, ctx) {
        // body.__cachePolicy must reach chat.js
        return {
          status: 200,
          body: {
            id: 'chat_1', object: 'chat.completion', created: 1, model: body.model,
            choices: [{ index: 0, message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
            usage: {
              prompt_tokens: 10, completion_tokens: 1, total_tokens: 11,
              prompt_tokens_details: { cached_tokens: 0 },
              cache_creation_input_tokens: 100,
              cache_read_input_tokens: 0,
              ...usagePatch,
            },
          },
        };
      },
    };
  }

  it('5m markers route creation tokens to ephemeral_5m_input_tokens', async () => {
    const result = await handleMessages({
      model: 'claude-sonnet-4.6',
      max_tokens: 16,
      messages: [
        { role: 'user', content: [
          { type: 'text', text: 'hi', cache_control: { type: 'ephemeral' } },
        ] },
      ],
    }, fakeChat({
      cache_creation_input_tokens: 100,
      cache_creation: { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 0 },
    }));
    assert.equal(result.status, 200);
    assert.equal(result.body.usage.cache_creation_input_tokens, 100);
    assert.deepEqual(result.body.usage.cache_creation, {
      ephemeral_5m_input_tokens: 100,
      ephemeral_1h_input_tokens: 0,
    });
  });

  it('1h markers route creation tokens to ephemeral_1h_input_tokens', async () => {
    const result = await handleMessages({
      model: 'claude-sonnet-4.6',
      max_tokens: 16,
      messages: [
        { role: 'user', content: [
          { type: 'text', text: 'hi', cache_control: { type: 'ephemeral', ttl: '1h' } },
        ] },
      ],
    }, fakeChat({
      cache_creation_input_tokens: 200,
      cache_creation: { ephemeral_5m_input_tokens: 0, ephemeral_1h_input_tokens: 200 },
    }));
    assert.equal(result.status, 200);
    assert.equal(result.body.usage.cache_creation_input_tokens, 200);
    assert.deepEqual(result.body.usage.cache_creation, {
      ephemeral_5m_input_tokens: 0,
      ephemeral_1h_input_tokens: 200,
    });
  });

  it('cascade pool entry honours ttlHintMs longer than default', async () => {
    poolClear();
    const baseEntry = {
      cascadeId: 'c1', sessionId: 's1', lsPort: 12345, apiKey: 'k',
      createdAt: Date.now(),
    };
    // Default-TTL entry: should expire at the 30-min default.
    poolCheckin('fp_default', { ...baseEntry });
    // 1h-hint entry: should outlive the default.
    poolCheckin('fp_1h', { ...baseEntry }, '', 90 * 60 * 1000);
    // After 35 min the default entry is gone, the 1h entry remains.
    // We can't fast-forward time without mocking; instead simulate by
    // mutating lastAccess on the stored entries directly via checkout +
    // re-checkin with an old timestamp, but the simpler check is just
    // that the entry struct keeps the hint. Verify by checkout while
    // both are still fresh (< pool default), then by the surface fact
    // that the 1h-hint entry still has its hint after restore.
    const entry = poolCheckout('fp_1h');
    assert.equal(entry?.ttlHintMs, 90 * 60 * 1000);
    poolClear();
  });

  it('cascade pool checkin preserves ttlHintMs when restoring without an explicit hint', () => {
    poolClear();
    const e = { cascadeId: 'c', sessionId: 's', lsPort: 1, apiKey: 'k', ttlHintMs: 90 * 60 * 1000 };
    poolCheckin('fp1', e);
    const got = poolCheckout('fp1');
    assert.equal(got.ttlHintMs, 90 * 60 * 1000);
    poolClear();
  });

  it('emits both flat fields and nested split when no markers were sent', async () => {
    const result = await handleMessages({
      model: 'claude-sonnet-4.6',
      max_tokens: 16,
      messages: [{ role: 'user', content: 'hi' }],
    }, fakeChat({
      cache_creation_input_tokens: 50,
    }));
    assert.equal(result.status, 200);
    const u = result.body.usage;
    // Both shapes coexist; the flat total equals the split sum.
    assert.equal(u.cache_creation_input_tokens, 50);
    assert.equal(u.cache_read_input_tokens, 0);
    assert.equal(
      u.cache_creation.ephemeral_5m_input_tokens + u.cache_creation.ephemeral_1h_input_tokens,
      u.cache_creation_input_tokens,
    );
  });
});