W
File size: 28,873 Bytes
2b64d42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
/**
 * Cascade conversation reuse pool (experimental).
 *
 * Goal: when a multi-turn chat continues a previous exchange, reuse the same
 * Windsurf `cascade_id` instead of starting a fresh one. This lets the
 * Windsurf backend keep its own per-cascade context cached β€” we avoid
 * resending the full history on each turn and the server responds faster.
 *
 * The key is a "state digest" of the caller-visible trajectory up to (but
 * not including) the newest user/tool result turn. v2.0.25 upgraded the key
 * from a relaxed "user text only" projection to a server-state semantic key
 * that includes assistant text + tool_calls digest, normalized system,
 * stable media digests, and (when tool-emulating) the tool schema digest.
 * This trades some hit rate for correctness: when the client's prior
 * assistant / system / tool context drifts, we miss instead of silently
 * resuming a stale upstream cascade.
 *
 * Safety rails:
 *   - Entries are pinned to a specific (apiKey, lsPort) pair. We must reuse
 *     the same LS and the same account or the cascade_id is meaningless.
 *   - A checked-out entry is removed from the pool. Concurrent second request
 *     with the same fingerprint falls back to a fresh cascade.
 *   - TTL defaults to 30 min (override with CASCADE_POOL_TTL_MS); LRU eviction
 *     at 500 entries.
 */

import { createHash } from 'crypto';

function positiveIntEnv(name, fallback) {
  const n = parseInt(process.env[name] || '', 10);
  return Number.isFinite(n) && n > 0 ? n : fallback;
}

const POOL_TTL_MS = positiveIntEnv('CASCADE_POOL_TTL_MS', 30 * 60 * 1000);
const POOL_MAX = 500;
const KEY_VERSION = 2;

const _pool = new Map();

const stats = { hits: 0, misses: 0, stores: 0, evictions: 0, expired: 0 };

function sha256(s) {
  return createHash('sha256').update(s).digest('hex');
}

function shortDigest(s, n = 16) {
  return sha256(String(s ?? '')).slice(0, n);
}

// Client-injected meta tags whose bodies change every turn (cwd snapshot,
// todo state, current time, hook output, slash-command echo). If we hash
// these, the fingerprint drifts even when the real user text is unchanged
// and Cascade reuse silently falls back to fresh for every call
// (issue #24). Strip them before hashing.
const META_TAG_NAMES = new Set([
  'system-reminder',
  'command-message',
  'command-name',
  'command-args',
  'local-command-stdout',
  'local-command-stderr',
  'user-prompt-submit-hook',
  'analysis',
  'summary',
  'example',
]);

function buildMetaTagRe() {
  const escaped = [...META_TAG_NAMES].map(t => t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
  return new RegExp(
    `<(${escaped.join('|')})[^>]*>[\\s\\S]*?</\\1>`,
    'g'
  );
}
let META_TAG_RE = buildMetaTagRe();

function stripMetaTags(s) {
  if (typeof s !== 'string' || !s) return s;
  const stripped = s.replace(META_TAG_RE, '').replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim();
  // Unknown tags are caller content. Never learn them into the global
  // stripping set or fingerprints stop being a pure function of the request.
  const remaining = stripped.match(/<([a-z][-a-z_]*)[^>]*>[\s\S]*?<\/\1>/g);
  if (remaining?.length) {
    const tagNames = remaining.map(m => m.match(/^<([a-z][-a-z_]*)/)?.[1]).filter(Boolean);
    const unknown = tagNames.filter(t => !META_TAG_NAMES.has(t));
    if (unknown.length) {
      console.error(`[META_TAG_AUDIT] Unknown XML tags in user message: ${[...new Set(unknown)].join(', ')}`);
    }
  }
  return stripped;
}

// v2.0.61 (#111) β€” normalize dynamic chunks of the system prompt that
// drift across turns (today's date, ISO timestamps, working directory,
// session UUIDs) so the same logical Claude Code session keeps the
// same cascade fingerprint instead of cache-missing every turn.
//
// Without this, Claude Code's 26KB system prompt (which embeds the
// current date / cwd / session id) hashed differently every request,
// reuse silently fell back to fresh, and the model looked like it was
// "looping" because each call started a new cascade.
//
// Patterns are conservative β€” only normalize tokens that are
// (a) verifiably temporal/identifier-shaped and (b) common enough in
// real Claude Code system prompts that their presence dominated the
// hash. Plain prose drift remains in the hash so genuine prompt edits
// still create a fresh cascade.
function normalizeSystemPromptForHash(s) {
  let out = String(s || '');
  // ─── temporal / identifier tokens ────────────────────────────
  out = out
    // ISO 8601 timestamps (with or without ms / tz)
    .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g, '<ts>')
    // "Today's date is YYYY-MM-DD" / "Today is YYYY-MM-DD" / etc.
    .replace(/\b(Today(?:'s)?\s+(?:date|is)(?:\s+is)?\s*[:\-]?\s*)\d{4}-\d{2}-\d{2}/gi, '$1<date>')
    // Bare YYYY-MM-DD lines (date-only) when standalone
    .replace(/(?<!\d)\d{4}-\d{2}-\d{2}(?!\d|T)/g, '<date>')
    // UUIDs (8-4-4-4-12 hex) β€” session/account ids, always
    .replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<uuid>')
    // Working directory lines (Claude Code prepends each turn).
    //
    // v2.0.78 (audit H-3): the previous form
    //   `'$&'.replace(/[^::]+$/, ' <cwd>')`
    // was a parse-time evaluation of `'$&'.replace(...)` which
    // collapsed to the literal string `' <cwd>'` β€” every match was
    // replaced by the bare placeholder, dropping the label too. Two
    // sessions whose only label difference was `Working directory:`
    // vs `cwd:` would hash identically (potential cross-session
    // reuse). Now uses a real capture group so the label is
    // preserved.
    .replace(/(^[ \t]*[-β€’]?\s*(?:Working\s+directory|Current\s+working\s+directory|cwd|CWD)\s*[::])[^\n]*/gim, '$1 <cwd>')
    // "Current time:" / "Time:" lines (same fix as above)
    .replace(/(^[ \t]*[-β€’]?\s*(?:Current\s+(?:date|time)|Time)\s*[::])[^\n]*/gim, '$1 <time>')
    // Session ID lines (Claude Code 2.x emits these)
    .replace(/(^[ \t]*[-β€’]?\s*(?:Session\s*ID|sessionId|session_id)\s*[::])[^\n]*/gim, '$1 <sessionid>')
    // Epoch timestamps in seconds (10 digits) or ms (13 digits) when
    // bare (not part of a longer number). Claude Code's status line and
    // some MCP servers emit these. 1700000000 (2023-11) to 2099999999
    // (2036) covers the realistic range without hitting other 10-digit
    // numbers like phone numbers or IDs.
    .replace(/(?<![\d.])(?:1[7-9]|20)\d{8}(?:\d{3})?(?![\d.])/g, '<epoch>');

  // ─── git status / recent commits block (Claude Code 2.x) ─────
  //
  // v2.0.74 (#116 zhangzhang-bit). Claude Code prepends a `gitStatus:`
  // block to the system prompt:
  //
  //     gitStatus: This is the git status at the start of the conversation.
  //
  //     Current branch: master
  //     Main branch (you will usually use this for PRs): master
  //     Git user: dwgx
  //     Status:
  //     M src/foo.js
  //     ?? newfile.txt
  //
  //     Recent commits:
  //     abc1234 release: 2.0.73 β€” ...
  //     def5678 release: 2.0.72 β€” ...
  //
  // The body shifts every time the user commits or touches a file but
  // the labels are invariant. zhangzhang-bit's #116 log shows
  // 26892-byte system prompts hashing differently across 30 turns β€”
  // commit-hash diffs in this block keep total length stable while
  // changing content, so length-based detection misses it.
  //
  // Strategy: collapse the body of `Status:`, `Recent commits:`, and
  // `Recent files:` to a stable placeholder. Headings + Branch / Main
  // branch / Git user keep their literal values (those rarely move
  // and meaningfully separate caches when they do). Lookahead anchors
  // the body extent at the next labelled heading or a blank line; if
  // the block runs to end-of-input, $(?![\s\S]) catches that too
  // since plain `$` under /m only matches end-of-line in JS.
  const NEXT_HEADING = '(?:Status|Recent commits|Recent files|gitStatus|Current branch|Main branch|Git user)\\s*:';
  const blockEnd = `(?=^[ \\t]*${NEXT_HEADING}|^\\s*$|$(?![\\s\\S]))`;
  out = out.replace(
    new RegExp(`^([ \\t]*Status\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
    '$1\n<git-status>\n',
  );
  out = out.replace(
    new RegExp(`^([ \\t]*Recent commits\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
    '$1\n<recent-commits>\n',
  );
  out = out.replace(
    new RegExp(`^([ \\t]*Recent files\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
    '$1\n<recent-files>\n',
  );

  // ─── git short hashes ────────────────────────────────────────
  // After Recent commits is folded above we still want to catch stray
  // short hashes that callers paste inline ("see commit abc1234"). 7-12
  // hex chars with word boundaries; require at least one digit AND at
  // least one a-f letter so we skip both ordinary words like deadbeef
  // (no digits) and bare integers like 1234567890 (no hex letters).
  // Skip if wrapped in quotes (likely a literal string the user is
  // asking about β€” preserving it lets distinct queries hash distinctly).
  out = out.replace(/(?<![`'"\w])(?=[a-f0-9]*\d)(?=[a-f0-9]*[a-f])[a-f0-9]{7,12}(?![`'"\w])/gi, '<gitsha>');

  return out;
}

// Stable JSON: recursively sort object keys so {b:1,a:2} and {a:2,b:1}
// produce the same string. Without this, two equivalent inputs hash
// differently when client serialization order varies.
function stableStringify(v) {
  if (v === null || typeof v !== 'object') return JSON.stringify(v);
  if (Array.isArray(v)) return '[' + v.map(stableStringify).join(',') + ']';
  const keys = Object.keys(v).sort();
  return '{' + keys.map(k => JSON.stringify(k) + ':' + stableStringify(v[k])).join(',') + '}';
}

// Project one content block into a typed canonical record. Returns
// { type, ...payload } where payload uses stable hashes for binary media.
// `unhashable=true` flags blocks we cannot stably digest β€” caller's
// canonicalise() turns this into a `null` fingerprint, disabling reuse for
// the request rather than silently colliding distinct media inputs.
function canonicalContentBlock(part) {
  if (typeof part?.text === 'string') return { type: 'text', text: stripMetaTags(part.text) };
  if (typeof part === 'string') return { type: 'text', text: stripMetaTags(part) };
  const type = String(part?.type || '').toLowerCase();
  // image_url block (OpenAI / Anthropic image_url style)
  if (type === 'image_url' || type === 'image' || type === 'input_image') {
    const url = part?.image_url?.url || part?.url || '';
    if (typeof url === 'string' && url.startsWith('data:')) {
      const comma = url.indexOf(',');
      const meta = comma > 0 ? url.slice(5, comma) : '';
      const data = comma > 0 ? url.slice(comma + 1) : url;
      return { type: 'image', meta, hash: shortDigest(data, 16) };
    }
    if (typeof url === 'string' && url) return { type: 'image', url };
    if (typeof part?.source === 'object') {
      const src = part.source;
      if (src.type === 'base64' && typeof src.data === 'string') {
        return { type: 'image', meta: src.media_type || '', hash: shortDigest(src.data, 16) };
      }
      if (src.type === 'url' && typeof src.url === 'string') {
        return { type: 'image', url: src.url };
      }
      if (typeof src.file_id === 'string') return { type: 'image', file_id: src.file_id };
    }
    return { type: 'image', unhashable: true };
  }
  // file / document block
  if (type === 'document' || type === 'file' || type === 'input_file') {
    const fileId = part?.file_id || part?.source?.file_id;
    if (typeof fileId === 'string') return { type: 'file', file_id: fileId };
    if (part?.source?.type === 'base64' && typeof part.source.data === 'string') {
      return { type: 'file', meta: part.source.media_type || '', hash: shortDigest(part.source.data, 16) };
    }
    if (typeof part?.source?.url === 'string') return { type: 'file', url: part.source.url };
    return { type: 'file', unhashable: true };
  }
  // Any other typed block β€” stable JSON of the whole part. Catches things
  // like { type: 'tool_use', ... } when they appear in mixed content arrays.
  return { type: type || 'unknown', json: stableStringify(part ?? '') };
}

function canonicaliseContent(content) {
  if (typeof content === 'string') return [{ type: 'text', text: stripMetaTags(content) }];
  if (!Array.isArray(content)) return [{ type: 'json', json: stableStringify(content ?? '') }];
  return content.map(canonicalContentBlock);
}

function hasUnhashableMedia(blocks) {
  return Array.isArray(blocks) && blocks.some(b => b?.unhashable === true);
}

// Project assistant tool_calls into a stable digest. Both OpenAI
// `tool_calls: [{id, function:{name, arguments}}]` and Anthropic
// `content: [{type:'tool_use', name, input}]` shapes need to map to the
// same canonical form so the same logical call digests identically.
function projectAssistantToolCalls(m) {
  const calls = [];
  if (Array.isArray(m?.tool_calls)) {
    for (const tc of m.tool_calls) {
      const name = tc?.function?.name || tc?.name || '';
      const args = tc?.function?.arguments;
      let argsCanonical;
      if (typeof args === 'string') {
        try { argsCanonical = stableStringify(JSON.parse(args)); }
        catch { argsCanonical = args; }
      } else if (args !== undefined) {
        argsCanonical = stableStringify(args);
      } else if (tc?.input !== undefined) {
        argsCanonical = stableStringify(tc.input);
      } else {
        argsCanonical = '';
      }
      calls.push({ name, args: argsCanonical });
    }
  }
  if (Array.isArray(m?.content)) {
    for (const part of m.content) {
      if (part?.type === 'tool_use') {
        calls.push({ name: part.name || '', args: stableStringify(part.input ?? null) });
      }
    }
  }
  return calls;
}

function projectMessage(m) {
  const role = m?.role;
  if (role === 'system') {
    const blocks = canonicaliseContent(m.content);
    return { role: 'system', content: blocks };
  }
  if (role === 'user') {
    const blocks = canonicaliseContent(m.content);
    return { role: 'user', content: blocks };
  }
  if (role === 'tool') {
    return {
      role: 'tool_result',
      tool_call_id: typeof m?.tool_call_id === 'string' ? m.tool_call_id : '',
      content: canonicaliseContent(m.content),
    };
  }
  if (role === 'assistant') {
    // Project to a stable text + tool_calls digest. Drop reasoning / metadata
    // / id fields that drift across re-renders.
    const blocks = canonicaliseContent(m.content);
    const text = blocks
      .filter(b => b.type === 'text')
      .map(b => (b.text || '').replace(/\s+/g, ' ').trim())
      .join('\n')
      .trim();
    const toolCalls = projectAssistantToolCalls(m);
    return { role: 'assistant', text, tool_calls: toolCalls };
  }
  // Unknown role β€” preserve as-is so it's neither swallowed nor confused
  // with a known projection.
  return { role: String(role || 'unknown'), content: canonicaliseContent(m?.content) };
}

function systemDigest(messages) {
  // CASCADE_REUSE_HASH_SYSTEM=0 is an explicit opt-out for callers whose
  // system prompt drifts every turn (Claude Code with `cwd` snapshots etc.)
  // and who care more about hit rate than strict isolation. Default ON
  // since the audit found that "default exclude system" caused silent
  // cross-system reuse.
  if (process.env.CASCADE_REUSE_HASH_SYSTEM === '0') return '';
  const sys = messages.filter(m => m?.role === 'system');
  if (!sys.length) return '';
  // v2.0.61 (#111) β€” apply normalizeSystemPromptForHash to each text
  // block so dynamic chunks (today's date / cwd / session id / ISO ts /
  // UUIDs) don't drift the system fingerprint each turn.
  const normalized = sys.map(m => {
    const projected = projectMessage(m);
    if (Array.isArray(projected.content)) {
      projected.content = projected.content.map(b => {
        if (b?.type === 'text' && typeof b.text === 'string') {
          return { ...b, text: normalizeSystemPromptForHash(b.text) };
        }
        return b;
      });
    }
    return projected;
  });
  return shortDigest(stableStringify(normalized), 32);
}

function toolContextDigest(opts = {}) {
  if (!opts.emulateTools) return '';
  // v2.0.61 (#111) β€” sort tools by name before hashing so client-side
  // ordering changes (Claude Code 2.x occasionally reshuffles its 70+
  // tool list across turns) don't drift the tool fingerprint.
  const tools = (Array.isArray(opts.tools) ? opts.tools.map(t => {
    const fn = t?.function || t;
    return {
      name: fn?.name || '',
      description: fn?.description || '',
      parameters: fn?.parameters ?? fn?.input_schema ?? null,
    };
  }) : []).sort((a, b) => (a.name || '').localeCompare(b.name || ''));
  return shortDigest(stableStringify({
    tools,
    tool_choice: opts.toolChoice ?? null,
    preambleTier: opts.preambleTier ?? null,
    toolPreambleHash: opts.toolPreamble ? shortDigest(opts.toolPreamble, 16) : '',
  }), 32);
}

// Build the array of stable turns up to (but not including) the newest user
// or tool turn. This is what fpBefore digests. It includes every assistant
// turn and every system/user/tool turn except the trailing user/tool turn.
function priorTurnsForBefore(messages) {
  if (!Array.isArray(messages)) return null;
  // Find newest user/tool turn β€” that's the "newest" we drop.
  let newestStable = -1;
  for (let i = messages.length - 1; i >= 0; i--) {
    const r = messages[i]?.role;
    if (r === 'user' || r === 'tool') { newestStable = i; break; }
  }
  if (newestStable < 0) return null;
  // Need at least one prior turn to make reuse meaningful.
  if (newestStable === 0) return null;
  return messages.slice(0, newestStable);
}

function projectTurns(turns) {
  if (!Array.isArray(turns)) return null;
  const projected = [];
  for (const m of turns) {
    if (m?.role === 'system') continue; // system handled separately
    const p = projectMessage(m);
    if (Array.isArray(p.content) && hasUnhashableMedia(p.content)) return { unhashable: true };
    projected.push(p);
  }
  return { turns: projected };
}

function buildKeyPayload({ messages, modelKey, callerKey, opts, scope }) {
  const sys = systemDigest(messages);
  const tools = toolContextDigest(opts);
  const turnSlice = scope === 'after' ? messages : priorTurnsForBefore(messages);
  if (!turnSlice) return null;
  const projection = projectTurns(turnSlice);
  if (!projection) return null;
  if (projection.unhashable) return null;
  return stableStringify({
    v: KEY_VERSION,
    caller: String(callerKey || ''),
    model: String(modelKey || ''),
    route: opts?.route || 'chat',
    sys,
    tools,
    turns: projection.turns,
  });
}

/**
 * Fingerprint for "I'm about to send this newest user turn β€” find me a
 * cascade I can resume." Hashes everything before the newest user/tool
 * turn (including assistant text + tool_calls digest, system, tools).
 *
 * Signatures (backward-compatible):
 *   fingerprintBefore(messages)
 *   fingerprintBefore(messages, modelKey)
 *   fingerprintBefore(messages, modelKey, callerKey)
 *   fingerprintBefore(messages, modelKey, callerKey, opts)
 * where opts = { tools, toolChoice, toolPreamble, emulateTools,
 *                preambleTier, route }
 *
 * Returns null when reuse should be disabled (single-turn, unhashable
 * media in prior history, etc.).
 */
export function fingerprintBefore(messages, modelKey = '', callerKey = '', opts = {}) {
  const payload = buildKeyPayload({ messages, modelKey, callerKey, opts, scope: 'before' });
  if (!payload) return null;
  return sha256(payload);
}

/**
 * Fingerprint for "I just finished a turn β€” store the cascade under the
 * key the next request will look up." Same shape as fingerprintBefore but
 * over the FULL message list (the newest user turn is included so the
 * post-turn fingerprint represents server state right after that turn).
 */
export function fingerprintAfter(messages, modelKey = '', callerKey = '', opts = {}) {
  if (!Array.isArray(messages) || !messages.length) return null;
  // For "after" we want the entire trajectory we've seen, including the
  // newest user/tool/assistant turn the caller just exchanged with us.
  const sys = systemDigest(messages);
  const tools = toolContextDigest(opts);
  const projection = projectTurns(messages.filter(m => m?.role !== 'system'));
  if (!projection || projection.unhashable) return null;
  return sha256(stableStringify({
    v: KEY_VERSION,
    caller: String(callerKey || ''),
    model: String(modelKey || ''),
    route: opts?.route || 'chat',
    sys,
    tools,
    turns: projection.turns,
  }));
}

function effectiveTtl(entry) {
  const hint = Number(entry?.ttlHintMs);
  return Number.isFinite(hint) && hint > 0 ? hint : POOL_TTL_MS;
}

function prune(now) {
  for (const [fp, e] of _pool) {
    if (now - e.lastAccess > effectiveTtl(e)) { _pool.delete(fp); stats.expired++; }
  }
  if (_pool.size <= POOL_MAX) return;
  const entries = [..._pool.entries()].sort((a, b) => a[1].lastAccess - b[1].lastAccess);
  const toDrop = entries.length - POOL_MAX;
  for (let i = 0; i < toDrop; i++) {
    _pool.delete(entries[i][0]);
    stats.evictions++;
  }
}

/**
 * Check out a conversation if we have a matching fingerprint AND the caller
 * is willing to use the same (apiKey, lsPort) we stored. Removes the entry
 * from the pool β€” caller is expected to call `checkin()` with a new
 * fingerprint on success (or just drop it on failure and a fresh cascade
 * will be created next turn).
 *
 * v2.0.25 added optional `expected` for atomic owner verification at the
 * pool boundary (MED-3). Pass `{ apiKey, lsPort, lsGeneration }` and a
 * mismatch returns null + counts a miss without leaking the entry.
 */
export function checkout(fingerprint, callerKey = '', expected = null) {
  if (!fingerprint) { stats.misses++; return null; }
  const entry = _pool.get(fingerprint);
  if (!entry) { stats.misses++; return null; }

  // Validate BEFORE removing from the pool. The previous order
  // (`delete` first, then check) had a subtle leak: when a caller's
  // request fingerprinted the same as someone else's (different
  // callerKey) we deleted the rightful owner's entry on the way to
  // returning null, so the legitimate caller lost their cascade
  // resume forever. Keep the entry in place on mismatch so the
  // owner's next turn still finds it.
  if (entry.callerKey && callerKey && entry.callerKey !== callerKey) {
    stats.misses++;
    return null;
  }
  if (Date.now() - entry.lastAccess > effectiveTtl(entry)) {
    _pool.delete(fingerprint);
    stats.expired++;
    stats.misses++;
    return null;
  }
  if (expected) {
    if (expected.apiKey && entry.apiKey && expected.apiKey !== entry.apiKey) { stats.misses++; return null; }
    if (expected.lsPort && entry.lsPort && expected.lsPort !== entry.lsPort) { stats.misses++; return null; }
    if (expected.lsGeneration != null && entry.lsGeneration != null && expected.lsGeneration !== entry.lsGeneration) {
      stats.misses++;
      return null;
    }
  }

  // Validated. Now remove and hand to the caller.
  _pool.delete(fingerprint);
  stats.hits++;
  return entry;
}

/**
 * Store (or restore) a conversation entry under a new fingerprint.
 *
 * `fingerprint` accepts a single string OR an array of strings. When
 * an array is given the SAME entry is indexed under each fingerprint β€”
 * used by v2.0.87 auto-fallback (#129 wnfilm) to keep the cascade
 * findable under both the original-model fingerprint AND the
 * fallback-model fingerprint, so the next turn from the client (under
 * the original model name) doesn't miss the pool and force the LLM to
 * re-read history from scratch.
 *
 * `ttlHintMs` (optional) extends this entry's expiry past the pool's
 * default 30 min β€” used to honour Anthropic prompt-caching markers that
 * request a 1h ttl. Pass `undefined` (default) to keep the existing
 * entry-level hint when restoring across turns. Pass `0` (or negative)
 * to clear any inherited hint and fall back to the default TTL β€” used
 * when the next request explicitly does NOT carry a 1h marker so a stale
 * 1h window doesn't outlive its source request (MED-2).
 */
export function checkin(fingerprint, entry, callerKey = '', ttlHintMs) {
  if (!entry) return;
  const fingerprints = Array.isArray(fingerprint)
    ? fingerprint.filter((fp) => typeof fp === 'string' && fp)
    : (fingerprint ? [fingerprint] : []);
  if (!fingerprints.length) return;
  const now = Date.now();
  let resolvedHint;
  if (ttlHintMs === undefined) {
    resolvedHint = entry.ttlHintMs;
  } else if (ttlHintMs === null || !Number.isFinite(ttlHintMs) || ttlHintMs <= 0) {
    resolvedHint = undefined;
  } else {
    resolvedHint = ttlHintMs;
  }
  // Build the canonical entry once, then write under each requested
  // fingerprint. Each Map slot holds a separate object instance so a
  // future invalidate/checkout under one key doesn't accidentally
  // mutate the others.
  for (const fp of fingerprints) {
    _pool.set(fp, {
      cascadeId: entry.cascadeId,
      sessionId: entry.sessionId,
      lsPort: entry.lsPort,
      lsGeneration: entry.lsGeneration,
      apiKey: entry.apiKey,
      callerKey: callerKey || entry.callerKey || '',
      stepOffset: Number.isFinite(entry.stepOffset) ? entry.stepOffset : 0,
      generatorOffset: Number.isFinite(entry.generatorOffset) ? entry.generatorOffset : 0,
      historyCoverage: entry.historyCoverage || null,
      createdAt: entry.createdAt || now,
      lastAccess: now,
      ...(Number.isFinite(resolvedHint) && resolvedHint > 0 ? { ttlHintMs: resolvedHint } : {}),
    });
  }
  // v2.0.88 (audit M-1): count once per logical checkin (one
  // conversation), not once per fingerprint slot. Otherwise the
  // dashboard's "stores" counter doubles whenever auto-fallback fires
  // and operators read pool efficiency wrong.
  stats.stores++;
  if (fingerprints.length > 1) stats.aliasWrites = (stats.aliasWrites || 0) + (fingerprints.length - 1);
  prune(now);
}

/**
 * Drop any entries that belong to a (apiKey, lsPort, lsGeneration) tuple
 * that just went away (account removed, LS restarted, LS replaced on the
 * same port). Keeps the pool honest.
 */
export function invalidateFor({ apiKey, lsPort, lsGeneration } = {}) {
  let dropped = 0;
  // v2.0.88 (audit H-2) β€” two-pass scan. v2.0.87 dual-write checkin
  // can index the same cascadeId under multiple fingerprint slots.
  // The previous single-pass `if (lsPort) delete` would only drop the
  // first slot it scanned; if the per-port scan iterated past a sibling
  // slot under different ordering, that slot stayed in the pool
  // pointing to a now-dead cascadeId on a now-restarted LS. Next turn
  // would hit `cascade not found`, set reuseEntryDead, and force a
  // full history replay β€” silent one-turn failure on every LS restart.
  // Pass 1: collect every cascadeId tied to the going-away tuple.
  // Pass 2: drop every slot pointing at any of those cascadeIds.
  const targetCascadeIds = new Set();
  for (const [, e] of _pool) {
    let hit = false;
    if (apiKey && e.apiKey === apiKey) hit = true;
    if (!hit && lsPort && e.lsPort === lsPort) {
      if (lsGeneration == null || e.lsGeneration == null || e.lsGeneration === lsGeneration) hit = true;
    }
    if (hit && e.cascadeId) targetCascadeIds.add(e.cascadeId);
  }
  for (const [fp, e] of _pool) {
    let drop = false;
    if (apiKey && e.apiKey === apiKey) drop = true;
    if (!drop && lsPort && e.lsPort === lsPort) {
      if (lsGeneration == null || e.lsGeneration == null || e.lsGeneration === lsGeneration) drop = true;
    }
    // Sibling-cleanup: any slot pointing at a cascadeId we already
    // decided to drop must also go, even if its own apiKey/lsPort
    // happens not to match (e.g. an alias slot whose apiKey was set
    // from the entry instead of the going-away account).
    if (!drop && e.cascadeId && targetCascadeIds.has(e.cascadeId)) drop = true;
    if (drop) {
      _pool.delete(fp);
      dropped++;
    }
  }
  return dropped;
}

export function poolStats() {
  return {
    size: _pool.size,
    maxSize: POOL_MAX,
    ttlMs: POOL_TTL_MS,
    ...stats,
    hitRate: stats.hits + stats.misses > 0
      ? ((stats.hits / (stats.hits + stats.misses)) * 100).toFixed(1)
      : '0.0',
  };
}

export function poolClear() {
  const n = _pool.size;
  _pool.clear();
  return n;
}

// Background prune β€” without this, expired entries accumulate when there
// are no checkin() calls for a while (e.g. a quiet weekend). .unref() so
// this timer never holds the process open past real work.
setInterval(() => prune(Date.now()), 5 * 60 * 1000).unref();