File size: 28,873 Bytes
2b64d42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 | /**
* Cascade conversation reuse pool (experimental).
*
* Goal: when a multi-turn chat continues a previous exchange, reuse the same
* Windsurf `cascade_id` instead of starting a fresh one. This lets the
* Windsurf backend keep its own per-cascade context cached β we avoid
* resending the full history on each turn and the server responds faster.
*
* The key is a "state digest" of the caller-visible trajectory up to (but
* not including) the newest user/tool result turn. v2.0.25 upgraded the key
* from a relaxed "user text only" projection to a server-state semantic key
* that includes assistant text + tool_calls digest, normalized system,
* stable media digests, and (when tool-emulating) the tool schema digest.
* This trades some hit rate for correctness: when the client's prior
* assistant / system / tool context drifts, we miss instead of silently
* resuming a stale upstream cascade.
*
* Safety rails:
* - Entries are pinned to a specific (apiKey, lsPort) pair. We must reuse
* the same LS and the same account or the cascade_id is meaningless.
* - A checked-out entry is removed from the pool. Concurrent second request
* with the same fingerprint falls back to a fresh cascade.
* - TTL defaults to 30 min (override with CASCADE_POOL_TTL_MS); LRU eviction
* at 500 entries.
*/
import { createHash } from 'crypto';
function positiveIntEnv(name, fallback) {
const n = parseInt(process.env[name] || '', 10);
return Number.isFinite(n) && n > 0 ? n : fallback;
}
const POOL_TTL_MS = positiveIntEnv('CASCADE_POOL_TTL_MS', 30 * 60 * 1000);
const POOL_MAX = 500;
const KEY_VERSION = 2;
const _pool = new Map();
const stats = { hits: 0, misses: 0, stores: 0, evictions: 0, expired: 0 };
function sha256(s) {
return createHash('sha256').update(s).digest('hex');
}
function shortDigest(s, n = 16) {
return sha256(String(s ?? '')).slice(0, n);
}
// Client-injected meta tags whose bodies change every turn (cwd snapshot,
// todo state, current time, hook output, slash-command echo). If we hash
// these, the fingerprint drifts even when the real user text is unchanged
// and Cascade reuse silently falls back to fresh for every call
// (issue #24). Strip them before hashing.
const META_TAG_NAMES = new Set([
'system-reminder',
'command-message',
'command-name',
'command-args',
'local-command-stdout',
'local-command-stderr',
'user-prompt-submit-hook',
'analysis',
'summary',
'example',
]);
function buildMetaTagRe() {
const escaped = [...META_TAG_NAMES].map(t => t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
return new RegExp(
`<(${escaped.join('|')})[^>]*>[\\s\\S]*?</\\1>`,
'g'
);
}
let META_TAG_RE = buildMetaTagRe();
function stripMetaTags(s) {
if (typeof s !== 'string' || !s) return s;
const stripped = s.replace(META_TAG_RE, '').replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim();
// Unknown tags are caller content. Never learn them into the global
// stripping set or fingerprints stop being a pure function of the request.
const remaining = stripped.match(/<([a-z][-a-z_]*)[^>]*>[\s\S]*?<\/\1>/g);
if (remaining?.length) {
const tagNames = remaining.map(m => m.match(/^<([a-z][-a-z_]*)/)?.[1]).filter(Boolean);
const unknown = tagNames.filter(t => !META_TAG_NAMES.has(t));
if (unknown.length) {
console.error(`[META_TAG_AUDIT] Unknown XML tags in user message: ${[...new Set(unknown)].join(', ')}`);
}
}
return stripped;
}
// v2.0.61 (#111) β normalize dynamic chunks of the system prompt that
// drift across turns (today's date, ISO timestamps, working directory,
// session UUIDs) so the same logical Claude Code session keeps the
// same cascade fingerprint instead of cache-missing every turn.
//
// Without this, Claude Code's 26KB system prompt (which embeds the
// current date / cwd / session id) hashed differently every request,
// reuse silently fell back to fresh, and the model looked like it was
// "looping" because each call started a new cascade.
//
// Patterns are conservative β only normalize tokens that are
// (a) verifiably temporal/identifier-shaped and (b) common enough in
// real Claude Code system prompts that their presence dominated the
// hash. Plain prose drift remains in the hash so genuine prompt edits
// still create a fresh cascade.
function normalizeSystemPromptForHash(s) {
let out = String(s || '');
// βββ temporal / identifier tokens ββββββββββββββββββββββββββββ
out = out
// ISO 8601 timestamps (with or without ms / tz)
.replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g, '<ts>')
// "Today's date is YYYY-MM-DD" / "Today is YYYY-MM-DD" / etc.
.replace(/\b(Today(?:'s)?\s+(?:date|is)(?:\s+is)?\s*[:\-]?\s*)\d{4}-\d{2}-\d{2}/gi, '$1<date>')
// Bare YYYY-MM-DD lines (date-only) when standalone
.replace(/(?<!\d)\d{4}-\d{2}-\d{2}(?!\d|T)/g, '<date>')
// UUIDs (8-4-4-4-12 hex) β session/account ids, always
.replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<uuid>')
// Working directory lines (Claude Code prepends each turn).
//
// v2.0.78 (audit H-3): the previous form
// `'$&'.replace(/[^:οΌ]+$/, ' <cwd>')`
// was a parse-time evaluation of `'$&'.replace(...)` which
// collapsed to the literal string `' <cwd>'` β every match was
// replaced by the bare placeholder, dropping the label too. Two
// sessions whose only label difference was `Working directory:`
// vs `cwd:` would hash identically (potential cross-session
// reuse). Now uses a real capture group so the label is
// preserved.
.replace(/(^[ \t]*[-β’]?\s*(?:Working\s+directory|Current\s+working\s+directory|cwd|CWD)\s*[:οΌ])[^\n]*/gim, '$1 <cwd>')
// "Current time:" / "Time:" lines (same fix as above)
.replace(/(^[ \t]*[-β’]?\s*(?:Current\s+(?:date|time)|Time)\s*[:οΌ])[^\n]*/gim, '$1 <time>')
// Session ID lines (Claude Code 2.x emits these)
.replace(/(^[ \t]*[-β’]?\s*(?:Session\s*ID|sessionId|session_id)\s*[:οΌ])[^\n]*/gim, '$1 <sessionid>')
// Epoch timestamps in seconds (10 digits) or ms (13 digits) when
// bare (not part of a longer number). Claude Code's status line and
// some MCP servers emit these. 1700000000 (2023-11) to 2099999999
// (2036) covers the realistic range without hitting other 10-digit
// numbers like phone numbers or IDs.
.replace(/(?<![\d.])(?:1[7-9]|20)\d{8}(?:\d{3})?(?![\d.])/g, '<epoch>');
// βββ git status / recent commits block (Claude Code 2.x) βββββ
//
// v2.0.74 (#116 zhangzhang-bit). Claude Code prepends a `gitStatus:`
// block to the system prompt:
//
// gitStatus: This is the git status at the start of the conversation.
//
// Current branch: master
// Main branch (you will usually use this for PRs): master
// Git user: dwgx
// Status:
// M src/foo.js
// ?? newfile.txt
//
// Recent commits:
// abc1234 release: 2.0.73 β ...
// def5678 release: 2.0.72 β ...
//
// The body shifts every time the user commits or touches a file but
// the labels are invariant. zhangzhang-bit's #116 log shows
// 26892-byte system prompts hashing differently across 30 turns β
// commit-hash diffs in this block keep total length stable while
// changing content, so length-based detection misses it.
//
// Strategy: collapse the body of `Status:`, `Recent commits:`, and
// `Recent files:` to a stable placeholder. Headings + Branch / Main
// branch / Git user keep their literal values (those rarely move
// and meaningfully separate caches when they do). Lookahead anchors
// the body extent at the next labelled heading or a blank line; if
// the block runs to end-of-input, $(?![\s\S]) catches that too
// since plain `$` under /m only matches end-of-line in JS.
const NEXT_HEADING = '(?:Status|Recent commits|Recent files|gitStatus|Current branch|Main branch|Git user)\\s*:';
const blockEnd = `(?=^[ \\t]*${NEXT_HEADING}|^\\s*$|$(?![\\s\\S]))`;
out = out.replace(
new RegExp(`^([ \\t]*Status\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
'$1\n<git-status>\n',
);
out = out.replace(
new RegExp(`^([ \\t]*Recent commits\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
'$1\n<recent-commits>\n',
);
out = out.replace(
new RegExp(`^([ \\t]*Recent files\\s*:)[ \\t]*\\n[\\s\\S]*?${blockEnd}`, 'gim'),
'$1\n<recent-files>\n',
);
// βββ git short hashes ββββββββββββββββββββββββββββββββββββββββ
// After Recent commits is folded above we still want to catch stray
// short hashes that callers paste inline ("see commit abc1234"). 7-12
// hex chars with word boundaries; require at least one digit AND at
// least one a-f letter so we skip both ordinary words like deadbeef
// (no digits) and bare integers like 1234567890 (no hex letters).
// Skip if wrapped in quotes (likely a literal string the user is
// asking about β preserving it lets distinct queries hash distinctly).
out = out.replace(/(?<![`'"\w])(?=[a-f0-9]*\d)(?=[a-f0-9]*[a-f])[a-f0-9]{7,12}(?![`'"\w])/gi, '<gitsha>');
return out;
}
// Stable JSON: recursively sort object keys so {b:1,a:2} and {a:2,b:1}
// produce the same string. Without this, two equivalent inputs hash
// differently when client serialization order varies.
function stableStringify(v) {
if (v === null || typeof v !== 'object') return JSON.stringify(v);
if (Array.isArray(v)) return '[' + v.map(stableStringify).join(',') + ']';
const keys = Object.keys(v).sort();
return '{' + keys.map(k => JSON.stringify(k) + ':' + stableStringify(v[k])).join(',') + '}';
}
// Project one content block into a typed canonical record. Returns
// { type, ...payload } where payload uses stable hashes for binary media.
// `unhashable=true` flags blocks we cannot stably digest β caller's
// canonicalise() turns this into a `null` fingerprint, disabling reuse for
// the request rather than silently colliding distinct media inputs.
function canonicalContentBlock(part) {
if (typeof part?.text === 'string') return { type: 'text', text: stripMetaTags(part.text) };
if (typeof part === 'string') return { type: 'text', text: stripMetaTags(part) };
const type = String(part?.type || '').toLowerCase();
// image_url block (OpenAI / Anthropic image_url style)
if (type === 'image_url' || type === 'image' || type === 'input_image') {
const url = part?.image_url?.url || part?.url || '';
if (typeof url === 'string' && url.startsWith('data:')) {
const comma = url.indexOf(',');
const meta = comma > 0 ? url.slice(5, comma) : '';
const data = comma > 0 ? url.slice(comma + 1) : url;
return { type: 'image', meta, hash: shortDigest(data, 16) };
}
if (typeof url === 'string' && url) return { type: 'image', url };
if (typeof part?.source === 'object') {
const src = part.source;
if (src.type === 'base64' && typeof src.data === 'string') {
return { type: 'image', meta: src.media_type || '', hash: shortDigest(src.data, 16) };
}
if (src.type === 'url' && typeof src.url === 'string') {
return { type: 'image', url: src.url };
}
if (typeof src.file_id === 'string') return { type: 'image', file_id: src.file_id };
}
return { type: 'image', unhashable: true };
}
// file / document block
if (type === 'document' || type === 'file' || type === 'input_file') {
const fileId = part?.file_id || part?.source?.file_id;
if (typeof fileId === 'string') return { type: 'file', file_id: fileId };
if (part?.source?.type === 'base64' && typeof part.source.data === 'string') {
return { type: 'file', meta: part.source.media_type || '', hash: shortDigest(part.source.data, 16) };
}
if (typeof part?.source?.url === 'string') return { type: 'file', url: part.source.url };
return { type: 'file', unhashable: true };
}
// Any other typed block β stable JSON of the whole part. Catches things
// like { type: 'tool_use', ... } when they appear in mixed content arrays.
return { type: type || 'unknown', json: stableStringify(part ?? '') };
}
function canonicaliseContent(content) {
if (typeof content === 'string') return [{ type: 'text', text: stripMetaTags(content) }];
if (!Array.isArray(content)) return [{ type: 'json', json: stableStringify(content ?? '') }];
return content.map(canonicalContentBlock);
}
function hasUnhashableMedia(blocks) {
return Array.isArray(blocks) && blocks.some(b => b?.unhashable === true);
}
// Project assistant tool_calls into a stable digest. Both OpenAI
// `tool_calls: [{id, function:{name, arguments}}]` and Anthropic
// `content: [{type:'tool_use', name, input}]` shapes need to map to the
// same canonical form so the same logical call digests identically.
function projectAssistantToolCalls(m) {
const calls = [];
if (Array.isArray(m?.tool_calls)) {
for (const tc of m.tool_calls) {
const name = tc?.function?.name || tc?.name || '';
const args = tc?.function?.arguments;
let argsCanonical;
if (typeof args === 'string') {
try { argsCanonical = stableStringify(JSON.parse(args)); }
catch { argsCanonical = args; }
} else if (args !== undefined) {
argsCanonical = stableStringify(args);
} else if (tc?.input !== undefined) {
argsCanonical = stableStringify(tc.input);
} else {
argsCanonical = '';
}
calls.push({ name, args: argsCanonical });
}
}
if (Array.isArray(m?.content)) {
for (const part of m.content) {
if (part?.type === 'tool_use') {
calls.push({ name: part.name || '', args: stableStringify(part.input ?? null) });
}
}
}
return calls;
}
function projectMessage(m) {
const role = m?.role;
if (role === 'system') {
const blocks = canonicaliseContent(m.content);
return { role: 'system', content: blocks };
}
if (role === 'user') {
const blocks = canonicaliseContent(m.content);
return { role: 'user', content: blocks };
}
if (role === 'tool') {
return {
role: 'tool_result',
tool_call_id: typeof m?.tool_call_id === 'string' ? m.tool_call_id : '',
content: canonicaliseContent(m.content),
};
}
if (role === 'assistant') {
// Project to a stable text + tool_calls digest. Drop reasoning / metadata
// / id fields that drift across re-renders.
const blocks = canonicaliseContent(m.content);
const text = blocks
.filter(b => b.type === 'text')
.map(b => (b.text || '').replace(/\s+/g, ' ').trim())
.join('\n')
.trim();
const toolCalls = projectAssistantToolCalls(m);
return { role: 'assistant', text, tool_calls: toolCalls };
}
// Unknown role β preserve as-is so it's neither swallowed nor confused
// with a known projection.
return { role: String(role || 'unknown'), content: canonicaliseContent(m?.content) };
}
function systemDigest(messages) {
// CASCADE_REUSE_HASH_SYSTEM=0 is an explicit opt-out for callers whose
// system prompt drifts every turn (Claude Code with `cwd` snapshots etc.)
// and who care more about hit rate than strict isolation. Default ON
// since the audit found that "default exclude system" caused silent
// cross-system reuse.
if (process.env.CASCADE_REUSE_HASH_SYSTEM === '0') return '';
const sys = messages.filter(m => m?.role === 'system');
if (!sys.length) return '';
// v2.0.61 (#111) β apply normalizeSystemPromptForHash to each text
// block so dynamic chunks (today's date / cwd / session id / ISO ts /
// UUIDs) don't drift the system fingerprint each turn.
const normalized = sys.map(m => {
const projected = projectMessage(m);
if (Array.isArray(projected.content)) {
projected.content = projected.content.map(b => {
if (b?.type === 'text' && typeof b.text === 'string') {
return { ...b, text: normalizeSystemPromptForHash(b.text) };
}
return b;
});
}
return projected;
});
return shortDigest(stableStringify(normalized), 32);
}
function toolContextDigest(opts = {}) {
if (!opts.emulateTools) return '';
// v2.0.61 (#111) β sort tools by name before hashing so client-side
// ordering changes (Claude Code 2.x occasionally reshuffles its 70+
// tool list across turns) don't drift the tool fingerprint.
const tools = (Array.isArray(opts.tools) ? opts.tools.map(t => {
const fn = t?.function || t;
return {
name: fn?.name || '',
description: fn?.description || '',
parameters: fn?.parameters ?? fn?.input_schema ?? null,
};
}) : []).sort((a, b) => (a.name || '').localeCompare(b.name || ''));
return shortDigest(stableStringify({
tools,
tool_choice: opts.toolChoice ?? null,
preambleTier: opts.preambleTier ?? null,
toolPreambleHash: opts.toolPreamble ? shortDigest(opts.toolPreamble, 16) : '',
}), 32);
}
// Build the array of stable turns up to (but not including) the newest user
// or tool turn. This is what fpBefore digests. It includes every assistant
// turn and every system/user/tool turn except the trailing user/tool turn.
function priorTurnsForBefore(messages) {
if (!Array.isArray(messages)) return null;
// Find newest user/tool turn β that's the "newest" we drop.
let newestStable = -1;
for (let i = messages.length - 1; i >= 0; i--) {
const r = messages[i]?.role;
if (r === 'user' || r === 'tool') { newestStable = i; break; }
}
if (newestStable < 0) return null;
// Need at least one prior turn to make reuse meaningful.
if (newestStable === 0) return null;
return messages.slice(0, newestStable);
}
function projectTurns(turns) {
if (!Array.isArray(turns)) return null;
const projected = [];
for (const m of turns) {
if (m?.role === 'system') continue; // system handled separately
const p = projectMessage(m);
if (Array.isArray(p.content) && hasUnhashableMedia(p.content)) return { unhashable: true };
projected.push(p);
}
return { turns: projected };
}
function buildKeyPayload({ messages, modelKey, callerKey, opts, scope }) {
const sys = systemDigest(messages);
const tools = toolContextDigest(opts);
const turnSlice = scope === 'after' ? messages : priorTurnsForBefore(messages);
if (!turnSlice) return null;
const projection = projectTurns(turnSlice);
if (!projection) return null;
if (projection.unhashable) return null;
return stableStringify({
v: KEY_VERSION,
caller: String(callerKey || ''),
model: String(modelKey || ''),
route: opts?.route || 'chat',
sys,
tools,
turns: projection.turns,
});
}
/**
* Fingerprint for "I'm about to send this newest user turn β find me a
* cascade I can resume." Hashes everything before the newest user/tool
* turn (including assistant text + tool_calls digest, system, tools).
*
* Signatures (backward-compatible):
* fingerprintBefore(messages)
* fingerprintBefore(messages, modelKey)
* fingerprintBefore(messages, modelKey, callerKey)
* fingerprintBefore(messages, modelKey, callerKey, opts)
* where opts = { tools, toolChoice, toolPreamble, emulateTools,
* preambleTier, route }
*
* Returns null when reuse should be disabled (single-turn, unhashable
* media in prior history, etc.).
*/
export function fingerprintBefore(messages, modelKey = '', callerKey = '', opts = {}) {
const payload = buildKeyPayload({ messages, modelKey, callerKey, opts, scope: 'before' });
if (!payload) return null;
return sha256(payload);
}
/**
* Fingerprint for "I just finished a turn β store the cascade under the
* key the next request will look up." Same shape as fingerprintBefore but
* over the FULL message list (the newest user turn is included so the
* post-turn fingerprint represents server state right after that turn).
*/
export function fingerprintAfter(messages, modelKey = '', callerKey = '', opts = {}) {
if (!Array.isArray(messages) || !messages.length) return null;
// For "after" we want the entire trajectory we've seen, including the
// newest user/tool/assistant turn the caller just exchanged with us.
const sys = systemDigest(messages);
const tools = toolContextDigest(opts);
const projection = projectTurns(messages.filter(m => m?.role !== 'system'));
if (!projection || projection.unhashable) return null;
return sha256(stableStringify({
v: KEY_VERSION,
caller: String(callerKey || ''),
model: String(modelKey || ''),
route: opts?.route || 'chat',
sys,
tools,
turns: projection.turns,
}));
}
function effectiveTtl(entry) {
const hint = Number(entry?.ttlHintMs);
return Number.isFinite(hint) && hint > 0 ? hint : POOL_TTL_MS;
}
function prune(now) {
for (const [fp, e] of _pool) {
if (now - e.lastAccess > effectiveTtl(e)) { _pool.delete(fp); stats.expired++; }
}
if (_pool.size <= POOL_MAX) return;
const entries = [..._pool.entries()].sort((a, b) => a[1].lastAccess - b[1].lastAccess);
const toDrop = entries.length - POOL_MAX;
for (let i = 0; i < toDrop; i++) {
_pool.delete(entries[i][0]);
stats.evictions++;
}
}
/**
* Check out a conversation if we have a matching fingerprint AND the caller
* is willing to use the same (apiKey, lsPort) we stored. Removes the entry
* from the pool β caller is expected to call `checkin()` with a new
* fingerprint on success (or just drop it on failure and a fresh cascade
* will be created next turn).
*
* v2.0.25 added optional `expected` for atomic owner verification at the
* pool boundary (MED-3). Pass `{ apiKey, lsPort, lsGeneration }` and a
* mismatch returns null + counts a miss without leaking the entry.
*/
export function checkout(fingerprint, callerKey = '', expected = null) {
if (!fingerprint) { stats.misses++; return null; }
const entry = _pool.get(fingerprint);
if (!entry) { stats.misses++; return null; }
// Validate BEFORE removing from the pool. The previous order
// (`delete` first, then check) had a subtle leak: when a caller's
// request fingerprinted the same as someone else's (different
// callerKey) we deleted the rightful owner's entry on the way to
// returning null, so the legitimate caller lost their cascade
// resume forever. Keep the entry in place on mismatch so the
// owner's next turn still finds it.
if (entry.callerKey && callerKey && entry.callerKey !== callerKey) {
stats.misses++;
return null;
}
if (Date.now() - entry.lastAccess > effectiveTtl(entry)) {
_pool.delete(fingerprint);
stats.expired++;
stats.misses++;
return null;
}
if (expected) {
if (expected.apiKey && entry.apiKey && expected.apiKey !== entry.apiKey) { stats.misses++; return null; }
if (expected.lsPort && entry.lsPort && expected.lsPort !== entry.lsPort) { stats.misses++; return null; }
if (expected.lsGeneration != null && entry.lsGeneration != null && expected.lsGeneration !== entry.lsGeneration) {
stats.misses++;
return null;
}
}
// Validated. Now remove and hand to the caller.
_pool.delete(fingerprint);
stats.hits++;
return entry;
}
/**
* Store (or restore) a conversation entry under a new fingerprint.
*
* `fingerprint` accepts a single string OR an array of strings. When
* an array is given the SAME entry is indexed under each fingerprint β
* used by v2.0.87 auto-fallback (#129 wnfilm) to keep the cascade
* findable under both the original-model fingerprint AND the
* fallback-model fingerprint, so the next turn from the client (under
* the original model name) doesn't miss the pool and force the LLM to
* re-read history from scratch.
*
* `ttlHintMs` (optional) extends this entry's expiry past the pool's
* default 30 min β used to honour Anthropic prompt-caching markers that
* request a 1h ttl. Pass `undefined` (default) to keep the existing
* entry-level hint when restoring across turns. Pass `0` (or negative)
* to clear any inherited hint and fall back to the default TTL β used
* when the next request explicitly does NOT carry a 1h marker so a stale
* 1h window doesn't outlive its source request (MED-2).
*/
export function checkin(fingerprint, entry, callerKey = '', ttlHintMs) {
if (!entry) return;
const fingerprints = Array.isArray(fingerprint)
? fingerprint.filter((fp) => typeof fp === 'string' && fp)
: (fingerprint ? [fingerprint] : []);
if (!fingerprints.length) return;
const now = Date.now();
let resolvedHint;
if (ttlHintMs === undefined) {
resolvedHint = entry.ttlHintMs;
} else if (ttlHintMs === null || !Number.isFinite(ttlHintMs) || ttlHintMs <= 0) {
resolvedHint = undefined;
} else {
resolvedHint = ttlHintMs;
}
// Build the canonical entry once, then write under each requested
// fingerprint. Each Map slot holds a separate object instance so a
// future invalidate/checkout under one key doesn't accidentally
// mutate the others.
for (const fp of fingerprints) {
_pool.set(fp, {
cascadeId: entry.cascadeId,
sessionId: entry.sessionId,
lsPort: entry.lsPort,
lsGeneration: entry.lsGeneration,
apiKey: entry.apiKey,
callerKey: callerKey || entry.callerKey || '',
stepOffset: Number.isFinite(entry.stepOffset) ? entry.stepOffset : 0,
generatorOffset: Number.isFinite(entry.generatorOffset) ? entry.generatorOffset : 0,
historyCoverage: entry.historyCoverage || null,
createdAt: entry.createdAt || now,
lastAccess: now,
...(Number.isFinite(resolvedHint) && resolvedHint > 0 ? { ttlHintMs: resolvedHint } : {}),
});
}
// v2.0.88 (audit M-1): count once per logical checkin (one
// conversation), not once per fingerprint slot. Otherwise the
// dashboard's "stores" counter doubles whenever auto-fallback fires
// and operators read pool efficiency wrong.
stats.stores++;
if (fingerprints.length > 1) stats.aliasWrites = (stats.aliasWrites || 0) + (fingerprints.length - 1);
prune(now);
}
/**
* Drop any entries that belong to a (apiKey, lsPort, lsGeneration) tuple
* that just went away (account removed, LS restarted, LS replaced on the
* same port). Keeps the pool honest.
*/
export function invalidateFor({ apiKey, lsPort, lsGeneration } = {}) {
let dropped = 0;
// v2.0.88 (audit H-2) β two-pass scan. v2.0.87 dual-write checkin
// can index the same cascadeId under multiple fingerprint slots.
// The previous single-pass `if (lsPort) delete` would only drop the
// first slot it scanned; if the per-port scan iterated past a sibling
// slot under different ordering, that slot stayed in the pool
// pointing to a now-dead cascadeId on a now-restarted LS. Next turn
// would hit `cascade not found`, set reuseEntryDead, and force a
// full history replay β silent one-turn failure on every LS restart.
// Pass 1: collect every cascadeId tied to the going-away tuple.
// Pass 2: drop every slot pointing at any of those cascadeIds.
const targetCascadeIds = new Set();
for (const [, e] of _pool) {
let hit = false;
if (apiKey && e.apiKey === apiKey) hit = true;
if (!hit && lsPort && e.lsPort === lsPort) {
if (lsGeneration == null || e.lsGeneration == null || e.lsGeneration === lsGeneration) hit = true;
}
if (hit && e.cascadeId) targetCascadeIds.add(e.cascadeId);
}
for (const [fp, e] of _pool) {
let drop = false;
if (apiKey && e.apiKey === apiKey) drop = true;
if (!drop && lsPort && e.lsPort === lsPort) {
if (lsGeneration == null || e.lsGeneration == null || e.lsGeneration === lsGeneration) drop = true;
}
// Sibling-cleanup: any slot pointing at a cascadeId we already
// decided to drop must also go, even if its own apiKey/lsPort
// happens not to match (e.g. an alias slot whose apiKey was set
// from the entry instead of the going-away account).
if (!drop && e.cascadeId && targetCascadeIds.has(e.cascadeId)) drop = true;
if (drop) {
_pool.delete(fp);
dropped++;
}
}
return dropped;
}
export function poolStats() {
return {
size: _pool.size,
maxSize: POOL_MAX,
ttlMs: POOL_TTL_MS,
...stats,
hitRate: stats.hits + stats.misses > 0
? ((stats.hits / (stats.hits + stats.misses)) * 100).toFixed(1)
: '0.0',
};
}
export function poolClear() {
const n = _pool.size;
_pool.clear();
return n;
}
// Background prune β without this, expired entries accumulate when there
// are no checkin() calls for a while (e.g. a quiet weekend). .unref() so
// this timer never holds the process open past real work.
setInterval(() => prune(Date.now()), 5 * 60 * 1000).unref();
|