Anurag commited on
Commit
affbd4d
·
1 Parent(s): fb81013

Keep Gemini failures model scoped

Browse files
.env.example CHANGED
@@ -173,9 +173,10 @@ LLM_API_KEY_FALLBACK_ENABLED=true
173
  # overloading one key when many requests arrive in parallel.
174
  # KEY_MAX_INFLIGHT_PER_KEY=3
175
  #
176
- # Safety lease for in-flight counters (default: 5 min). Prevents abandoned
177
- # streaming/undici requests from making keys look permanently saturated.
178
- # KEY_INFLIGHT_TTL_MS=300000
 
179
  #
180
  # Max request-body bytes to inspect for model names on streaming
181
  # OpenAI-compatible Gemini calls (default: 256 KiB).
@@ -191,6 +192,7 @@ LLM_API_KEY_FALLBACK_ENABLED=true
191
  # model-scoped and per-request round-robin can burn 2-3 keys for one chat turn.
192
  # KEY_STICKY_UNTIL_FAILURE=true
193
  # KEY_STICKY_PROVIDERS=gemini
 
194
  #
195
  # Optional auto-retry count for fetch requests on retryable errors/statuses.
196
  # Default 0 means one upstream attempt per caller request; set 1-2 to opt in.
 
173
  # overloading one key when many requests arrive in parallel.
174
  # KEY_MAX_INFLIGHT_PER_KEY=3
175
  #
176
+ # Safety lease for in-flight counters (default: 30s). If a picked key gets no
177
+ # provider headers/completion/error before this, it is marked transient and
178
+ # sticky mode rotates away instead of leaving permanent pending counts.
179
+ # KEY_INFLIGHT_TTL_MS=30000
180
  #
181
  # Max request-body bytes to inspect for model names on streaming
182
  # OpenAI-compatible Gemini calls (default: 256 KiB).
 
192
  # model-scoped and per-request round-robin can burn 2-3 keys for one chat turn.
193
  # KEY_STICKY_UNTIL_FAILURE=true
194
  # KEY_STICKY_PROVIDERS=gemini
195
+ # KEY_STICKY_SCOPE=auto # auto = per-model for Gemini, provider-level for others
196
  #
197
  # Optional auto-retry count for fetch requests on retryable errors/statuses.
198
  # Default 0 means one upstream attempt per caller request; set 1-2 to opt in.
README.md CHANGED
@@ -296,11 +296,12 @@ Optional tuning:
296
  - `KEY_PERM_SUSPEND_MS` (default `57600000`) — long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
297
  - `KEY_FAILURE_DECAY_MS` (default `900000`) — recent-failure decay window used to deprioritize keys.
298
  - `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) — soft concurrent request cap per key.
299
- - `KEY_INFLIGHT_TTL_MS` (default `300000`) — safety lease for in-flight counters, preventing stale undici streams from making keys look permanently saturated.
300
  - `KEY_MODEL_SNIFF_MAX_BYTES` (default `262144`) — max request-body bytes to inspect for model names on streaming OpenAI-compatible Gemini calls.
301
  - `KEY_ERROR_BODY_SNIFF_MAX_BYTES` (default `65536`) — max error-response bytes to inspect so provider quota/rate bodies such as 403 quota errors are scoped correctly instead of being treated as permanent auth failures.
302
  - `KEY_STICKY_UNTIL_FAILURE` (default `true`) — keep sticky providers on one key until that key fails/exhausts.
303
  - `KEY_STICKY_PROVIDERS` (default `gemini`) — comma-separated provider names that should use sticky key selection instead of per-request round-robin.
 
304
  - `KEY_FETCH_MAX_RETRIES` (default `0`) — optional auto-retry count for retryable failures on **GET/HEAD/OPTIONS/POST** with a different key. Default `0` means the rotator does **not** spend extra upstream attempts for a single caller request.
305
  - `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) — base delay for retry backoff (respects `Retry-After`, capped to 10s).
306
  - `KEY_ROTATOR_ASSERT_NO_EXTRA_CALLS=true` — optional diagnostic warning if a single caller fetch creates more than one upstream provider attempt.
 
296
  - `KEY_PERM_SUSPEND_MS` (default `57600000`) — long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
297
  - `KEY_FAILURE_DECAY_MS` (default `900000`) — recent-failure decay window used to deprioritize keys.
298
  - `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) — soft concurrent request cap per key.
299
+ - `KEY_INFLIGHT_TTL_MS` (default `30000`) — safety lease for picked keys with no provider headers/completion/error; stale picks are marked transient so sticky keys can rotate.
300
  - `KEY_MODEL_SNIFF_MAX_BYTES` (default `262144`) — max request-body bytes to inspect for model names on streaming OpenAI-compatible Gemini calls.
301
  - `KEY_ERROR_BODY_SNIFF_MAX_BYTES` (default `65536`) — max error-response bytes to inspect so provider quota/rate bodies such as 403 quota errors are scoped correctly instead of being treated as permanent auth failures.
302
  - `KEY_STICKY_UNTIL_FAILURE` (default `true`) — keep sticky providers on one key until that key fails/exhausts.
303
  - `KEY_STICKY_PROVIDERS` (default `gemini`) — comma-separated provider names that should use sticky key selection instead of per-request round-robin.
304
+ - `KEY_STICKY_SCOPE` (default `auto`) — `auto` uses per-model sticky buckets for Gemini/per-model providers and provider-level buckets for others; set `provider` or `model` to override.
305
  - `KEY_FETCH_MAX_RETRIES` (default `0`) — optional auto-retry count for retryable failures on **GET/HEAD/OPTIONS/POST** with a different key. Default `0` means the rotator does **not** spend extra upstream attempts for a single caller request.
306
  - `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) — base delay for retry backoff (respects `Retry-After`, capped to 10s).
307
  - `KEY_ROTATOR_ASSERT_NO_EXTRA_CALLS=true` — optional diagnostic warning if a single caller fetch creates more than one upstream provider attempt.
env-builder.js CHANGED
@@ -612,9 +612,9 @@ const FIELDS = [
612
  "k": "KEY_INFLIGHT_TTL_MS",
613
  "lbl": "Key rotation in-flight safety lease (ms)",
614
  "type": "text",
615
- "ph": "300000",
616
  "tag": "advanced",
617
- "help": "Clears stale in-flight counters if a streaming/undici request never completes cleanly. Default: 5 minutes."
618
  },
619
  {
620
  "g": "Plugins",
@@ -656,6 +656,16 @@ const FIELDS = [
656
  "tag": "advanced",
657
  "help": "Provider names that should reuse one key until failure/quota exhaustion. Default: gemini."
658
  },
 
 
 
 
 
 
 
 
 
 
659
  {
660
  "g": "Plugins",
661
  "icon": "🔄",
 
612
  "k": "KEY_INFLIGHT_TTL_MS",
613
  "lbl": "Key rotation in-flight safety lease (ms)",
614
  "type": "text",
615
+ "ph": "30000",
616
  "tag": "advanced",
617
+ "help": "Marks picked keys stale if no provider headers/completion/error are observed. Default: 30 seconds."
618
  },
619
  {
620
  "g": "Plugins",
 
656
  "tag": "advanced",
657
  "help": "Provider names that should reuse one key until failure/quota exhaustion. Default: gemini."
658
  },
659
+ {
660
+ "g": "Plugins",
661
+ "icon": "🔄",
662
+ "k": "KEY_STICKY_SCOPE",
663
+ "lbl": "Sticky key scope (auto/provider/model)",
664
+ "type": "text",
665
+ "ph": "auto",
666
+ "tag": "advanced",
667
+ "help": "Default auto uses per-model sticky buckets for Gemini/per-model providers and provider-level buckets for others."
668
+ },
669
  {
670
  "g": "Plugins",
671
  "icon": "🔄",
key-rotator-manager.html CHANGED
@@ -5,7 +5,7 @@
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
  <title>HuggingClaw · API Key Rotator</title>
7
  <style>
8
- :root{color-scheme:dark;--bg:#070711;--panel:#111120;--panel2:#17172a;--line:#292945;--text:#f8f7ff;--muted:#9892b8;--soft:#c7c2e6;--good:#22c55e;--warn:#f5c542;--bad:#fb7185;--blue:#60a5fa;--violet:#a78bfa}*{box-sizing:border-box}body{margin:0;min-height:100vh;background:radial-gradient(circle at top left,#25145a 0,#070711 34%,#070711 100%);font-family:Inter,ui-sans-serif,system-ui,-apple-system,sans-serif;color:var(--text);font-size:13px}main{width:min(1280px,calc(100% - 28px));margin:0 auto;padding:28px 0 44px}.top{display:flex;align-items:flex-start;justify-content:space-between;gap:18px;margin-bottom:18px}.eyebrow{font-size:.7rem;letter-spacing:.18em;text-transform:uppercase;color:var(--muted);font-weight:900}h1{margin:6px 0 8px;font-size:clamp(1.6rem,4vw,2.7rem);line-height:1}.sub{color:var(--soft);max-width:860px;line-height:1.55}.actions{display:flex;gap:10px;flex-wrap:wrap}.btn{border:1px solid var(--line);background:rgba(255,255,255,.06);color:var(--text);border-radius:11px;padding:10px 14px;text-decoration:none;font-weight:850;cursor:pointer}.btn.primary{background:#fff;color:#050510}.btn:hover{filter:brightness(1.08)}.grid{display:grid;grid-template-columns:repeat(5,minmax(0,1fr));gap:12px;margin:18px 0}.card{background:linear-gradient(180deg,rgba(255,255,255,.055),rgba(255,255,255,.025));border:1px solid var(--line);border-radius:18px;padding:16px;box-shadow:0 18px 45px rgba(0,0,0,.22)}.metric-title{color:var(--muted);text-transform:uppercase;letter-spacing:.16em;font-size:.66rem;font-weight:900}.metric-value{font-size:1.65rem;font-weight:950;margin-top:8px}.metric-detail{color:var(--muted);margin-top:6px;line-height:1.45}.ok{color:var(--good)}.warn{color:var(--warn)}.bad{color:var(--bad)}.blue{color:var(--blue)}.layout{display:grid;grid-template-columns:340px minmax(0,1fr);gap:14px}.panel-title{display:flex;justify-content:space-between;align-items:center;gap:10px;margin-bottom:12px}.panel-title h2{font-size:1rem;margin:0}.pill{display:inline-flex;align-items:center;gap:6px;border:1px solid var(--line);border-radius:999px;padding:5px 9px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem;font-weight:850}.dot{width:7px;height:7px;border-radius:50%;background:var(--muted)}.dot.live{background:var(--good);box-shadow:0 0 15px var(--good)}.providers{display:flex;flex-direction:column;gap:9px}.provider{border:1px solid var(--line);border-radius:14px;background:rgba(0,0,0,.12);padding:12px;cursor:pointer}.provider.active{border-color:var(--blue);box-shadow:0 0 0 1px rgba(96,165,250,.25)}.provider-top{display:flex;justify-content:space-between;align-items:center;gap:12px}.provider-name{font-weight:950}.provider-meta{color:var(--muted);font-size:.78rem;margin-top:5px}.bar{height:8px;background:#22223a;border-radius:999px;overflow:hidden;margin-top:10px}.bar>span{display:block;height:100%;background:linear-gradient(90deg,var(--violet),var(--blue));border-radius:999px}.toolbar{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px}.input{flex:1;min-width:180px;border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px;outline:none}.select{border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px}.toggle{display:flex;align-items:center;gap:7px;color:var(--soft);font-weight:800}.events{display:flex;flex-direction:column;gap:9px;max-height:620px;overflow:auto;padding-right:4px}.event{border:1px solid var(--line);border-left-width:4px;border-radius:14px;background:rgba(0,0,0,.16);padding:12px;display:grid;grid-template-columns:155px minmax(0,1fr) auto;gap:12px}.event.pick,.event.sticky_pick,.event.pick_retry_fresh,.event.model_detected{border-left-color:var(--blue)}.event.success{border-left-color:var(--good)}.event.rate_limited,.event.auth_failed,.event.all_suspended_pick,.event.all_suspended_withheld{border-left-color:var(--warn)}.event.network_retryable,.event.transient_status,.event.saturated_reuse,.event.sticky_saturated_reuse,.event.inflight_timeout{border-left-color:var(--bad)}.time{color:var(--muted);font-variant-numeric:tabular-nums}.etype{font-weight:950}.msg{color:var(--soft);line-height:1.45;word-break:break-word}.key,.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;color:#e9d5ff}.empty{padding:34px;text-align:center;color:var(--muted);border:1px dashed var(--line);border-radius:16px}.foot{color:var(--muted);margin-top:16px;line-height:1.55}.kbd{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;background:#23233a;border:1px solid #363653;border-radius:7px;padding:2px 6px;color:var(--text)}.detail-grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:10px;margin-bottom:12px}.mini{border:1px solid var(--line);border-radius:13px;padding:11px;background:rgba(0,0,0,.12)}.mini b{display:block;font-size:1.15rem;margin-top:4px}.key-table-wrap{overflow:auto}.key-table{width:100%;border-collapse:separate;border-spacing:0 8px;min-width:780px}.key-table th{color:var(--muted);font-size:.68rem;text-align:left;text-transform:uppercase;letter-spacing:.12em}.key-table td{background:rgba(0,0,0,.15);border-top:1px solid var(--line);border-bottom:1px solid var(--line);padding:10px;vertical-align:top}.key-table td:first-child{border-left:1px solid var(--line);border-radius:12px 0 0 12px}.key-table td:last-child{border-right:1px solid var(--line);border-radius:0 12px 12px 0}.status{font-weight:900}.status.used{color:var(--good)}.status.unused{color:var(--muted)}.model-chip{display:inline-flex;margin:2px 4px 2px 0;padding:4px 7px;border:1px solid var(--line);border-radius:999px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem}.section{margin-top:14px}.session-note{border:1px solid rgba(96,165,250,.28);background:rgba(96,165,250,.08);border-radius:14px;padding:12px;color:var(--soft);margin-bottom:12px}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}@media(max-width:1050px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.detail-grid{grid-template-columns:repeat(2,minmax(0,1fr))}.layout{grid-template-columns:1fr}.top{flex-direction:column}.event{grid-template-columns:1fr}.events{max-height:none}}@media(max-width:620px){.grid,.detail-grid{grid-template-columns:1fr}}
9
  </style>
10
  </head>
11
  <body>
@@ -56,14 +56,16 @@
56
  let state={events:[],providers:[],runtime:{routes:[]},paused:false,selected:localStorage.getItem('hc.keyRotator.provider')||''};
57
  const $=id=>document.getElementById(id), HTML_ESCAPE={'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'};
58
  const PICK_TYPES=['pick','sticky_pick','pick_retry_fresh','all_suspended_pick','saturated_reuse','sticky_saturated_reuse'];
 
59
  function esc(v){return String(v??'').replace(/[&<>"']/g,ch=>HTML_ESCAPE[ch]);}
60
  function eventClass(v){return String(v||'event').replace(/[^a-z0-9_-]/gi,'_');}
61
  function fmtTime(ts){const d=new Date(ts);return isNaN(d)?'—':d.toLocaleString();}
62
  function isKeyEvent(e){return e.provider&&e.slot&&(e.key||e.type==='all_suspended_withheld');}
63
  function keyId(provider,slot,key){return `${provider}::${slot||''}::${key||''}`;}
64
  function eventText(e){const bits=[];if(e.provider)bits.push(e.provider);if(e.slot)bits.push(`#${e.slot}/${e.total}`);if(e.key)bits.push(e.key);if(e.model)bits.push(`model=${e.model}`);if(e.status)bits.push(`status=${e.status}`);if(e.errorStatus)bits.push(`error=${e.errorStatus}`);if(e.errorReason)bits.push(`reason=${e.errorReason}`);if(e.errorType)bits.push(`type=${e.errorType}`);if(e.waitMs)bits.push(`wait=${Math.round(e.waitMs/1000)}s`);if(e.code)bits.push(`code=${e.code}`);if(e.errorCode)bits.push(`errorCode=${e.errorCode}`);if(e.inflight)bits.push(`inflight=${e.inflight}/${e.maxInflight||'?'}`);return bits.join(' · ');}
65
- function emptyStat(provider,slot,total,key){return{provider,slot,total,key,picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',models:new Map()};}
66
- function buildStats(){const stats=new Map();for(const p of state.providers){for(const k of (p.keys||[]))stats.set(keyId(p.name,k.slot,k.key),emptyStat(p.name,k.slot,k.total,k.key));}for(const e of state.events){if(!isKeyEvent(e))continue;const id=keyId(e.provider,e.slot,e.key);if(!stats.has(id))stats.set(id,emptyStat(e.provider,e.slot,e.total,e.key||'***'));const s=stats.get(id);const isPick=PICK_TYPES.includes(e.type);const isOutcome=['success','rate_limited','network_retryable','auth_failed','transient_status'].includes(e.type);if(isPick)s.picks++;if(e.type==='success')s.success++;if(e.type==='rate_limited')s.rate++;if(e.type==='network_retryable')s.retry++;if(e.type==='auth_failed')s.auth++;if(e.type==='transient_status')s.transient++;s.last=e.ts||s.last;if(e.model){let m=s.models.get(e.model);if(!m)m={picks:0,success:0,rate:0,retry:0,auth:0,transient:0,observed:false};if(e.type==='model_detected')m.observed=true;if(isPick)m.picks++;if(e.type==='success')m.success++;if(e.type==='rate_limited')m.rate++;if(e.type==='network_retryable')m.retry++;if(e.type==='auth_failed')m.auth++;if(e.type==='transient_status')m.transient++;s.models.set(e.model,m);}else if(isPick||isOutcome){s.unscoped=s.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};if(isPick)s.unscoped.picks++;if(e.type==='success')s.unscoped.success++;if(e.type==='rate_limited')s.unscoped.rate++;if(e.type==='network_retryable')s.unscoped.retry++;if(e.type==='auth_failed')s.unscoped.auth++;if(e.type==='transient_status')s.unscoped.transient++;}}return stats;}
 
67
  function providerRows(provider,stats){return (provider?.keys||[]).map(k=>stats.get(keyId(provider.name,k.slot,k.key))||emptyStat(provider.name,k.slot,k.total,k.key));}
68
  function pendingCount(v){return Math.max(0,(v.picks||0)-((v.success||0)+(v.rate||0)+(v.retry||0)+(v.transient||0)+(v.auth||0)));}
69
  function modelChips(row){const entries=[...row.models.entries()];const un=row.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};const unPending=pendingCount(un);if(!entries.length){const total=row.picks+row.success+row.rate+row.retry+row.transient+row.auth;return total?`<span class="model-chip">unscoped · p:${row.picks} ok:${row.success} pending:${pendingCount(row)} rl:${row.rate} retry:${row.retry+row.transient}</span>`:'<span class="model-chip">no model events yet</span>';}const chips=entries.map(([m,v])=>{const onlyObserved=v.observed&&!v.picks&&!v.success&&!v.rate&&!v.retry&&!v.transient&&!v.auth;return `<span class="model-chip">${esc(m)} · ${onlyObserved?'observed':`p:${v.picks} ok:${v.success} pending:${pendingCount(v)} rl:${v.rate} retry:${v.retry+v.transient}`}</span>`;});if(un.picks||un.success||un.rate||un.retry||un.transient||un.auth)chips.push(`<span class="model-chip">unscoped totals · p:${un.picks} ok:${un.success} pending:${unPending} rl:${un.rate} retry:${un.retry+un.transient}</span>`);return chips.join('');}
 
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
  <title>HuggingClaw · API Key Rotator</title>
7
  <style>
8
+ :root{color-scheme:dark;--bg:#070711;--panel:#111120;--panel2:#17172a;--line:#292945;--text:#f8f7ff;--muted:#9892b8;--soft:#c7c2e6;--good:#22c55e;--warn:#f5c542;--bad:#fb7185;--blue:#60a5fa;--violet:#a78bfa}*{box-sizing:border-box}body{margin:0;min-height:100vh;background:radial-gradient(circle at top left,#25145a 0,#070711 34%,#070711 100%);font-family:Inter,ui-sans-serif,system-ui,-apple-system,sans-serif;color:var(--text);font-size:13px}main{width:min(1280px,calc(100% - 28px));margin:0 auto;padding:28px 0 44px}.top{display:flex;align-items:flex-start;justify-content:space-between;gap:18px;margin-bottom:18px}.eyebrow{font-size:.7rem;letter-spacing:.18em;text-transform:uppercase;color:var(--muted);font-weight:900}h1{margin:6px 0 8px;font-size:clamp(1.6rem,4vw,2.7rem);line-height:1}.sub{color:var(--soft);max-width:860px;line-height:1.55}.actions{display:flex;gap:10px;flex-wrap:wrap}.btn{border:1px solid var(--line);background:rgba(255,255,255,.06);color:var(--text);border-radius:11px;padding:10px 14px;text-decoration:none;font-weight:850;cursor:pointer}.btn.primary{background:#fff;color:#050510}.btn:hover{filter:brightness(1.08)}.grid{display:grid;grid-template-columns:repeat(5,minmax(0,1fr));gap:12px;margin:18px 0}.card{background:linear-gradient(180deg,rgba(255,255,255,.055),rgba(255,255,255,.025));border:1px solid var(--line);border-radius:18px;padding:16px;box-shadow:0 18px 45px rgba(0,0,0,.22)}.metric-title{color:var(--muted);text-transform:uppercase;letter-spacing:.16em;font-size:.66rem;font-weight:900}.metric-value{font-size:1.65rem;font-weight:950;margin-top:8px}.metric-detail{color:var(--muted);margin-top:6px;line-height:1.45}.ok{color:var(--good)}.warn{color:var(--warn)}.bad{color:var(--bad)}.blue{color:var(--blue)}.layout{display:grid;grid-template-columns:340px minmax(0,1fr);gap:14px}.panel-title{display:flex;justify-content:space-between;align-items:center;gap:10px;margin-bottom:12px}.panel-title h2{font-size:1rem;margin:0}.pill{display:inline-flex;align-items:center;gap:6px;border:1px solid var(--line);border-radius:999px;padding:5px 9px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem;font-weight:850}.dot{width:7px;height:7px;border-radius:50%;background:var(--muted)}.dot.live{background:var(--good);box-shadow:0 0 15px var(--good)}.providers{display:flex;flex-direction:column;gap:9px}.provider{border:1px solid var(--line);border-radius:14px;background:rgba(0,0,0,.12);padding:12px;cursor:pointer}.provider.active{border-color:var(--blue);box-shadow:0 0 0 1px rgba(96,165,250,.25)}.provider-top{display:flex;justify-content:space-between;align-items:center;gap:12px}.provider-name{font-weight:950}.provider-meta{color:var(--muted);font-size:.78rem;margin-top:5px}.bar{height:8px;background:#22223a;border-radius:999px;overflow:hidden;margin-top:10px}.bar>span{display:block;height:100%;background:linear-gradient(90deg,var(--violet),var(--blue));border-radius:999px}.toolbar{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px}.input{flex:1;min-width:180px;border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px;outline:none}.select{border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px}.toggle{display:flex;align-items:center;gap:7px;color:var(--soft);font-weight:800}.events{display:flex;flex-direction:column;gap:9px;max-height:620px;overflow:auto;padding-right:4px}.event{border:1px solid var(--line);border-left-width:4px;border-radius:14px;background:rgba(0,0,0,.16);padding:12px;display:grid;grid-template-columns:155px minmax(0,1fr) auto;gap:12px}.event.pick,.event.sticky_pick,.event.pick_retry_fresh,.event.model_detected{border-left-color:var(--blue)}.event.success{border-left-color:var(--good)}.event.rate_limited,.event.auth_failed,.event.all_suspended_pick,.event.all_suspended_withheld{border-left-color:var(--warn)}.event.network_retryable,.event.transient_status,.event.saturated_reuse,.event.sticky_saturated_reuse,.event.sticky_saturated_rotate,.event.inflight_timeout{border-left-color:var(--bad)}.time{color:var(--muted);font-variant-numeric:tabular-nums}.etype{font-weight:950}.msg{color:var(--soft);line-height:1.45;word-break:break-word}.key,.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;color:#e9d5ff}.empty{padding:34px;text-align:center;color:var(--muted);border:1px dashed var(--line);border-radius:16px}.foot{color:var(--muted);margin-top:16px;line-height:1.55}.kbd{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;background:#23233a;border:1px solid #363653;border-radius:7px;padding:2px 6px;color:var(--text)}.detail-grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:10px;margin-bottom:12px}.mini{border:1px solid var(--line);border-radius:13px;padding:11px;background:rgba(0,0,0,.12)}.mini b{display:block;font-size:1.15rem;margin-top:4px}.key-table-wrap{overflow:auto}.key-table{width:100%;border-collapse:separate;border-spacing:0 8px;min-width:780px}.key-table th{color:var(--muted);font-size:.68rem;text-align:left;text-transform:uppercase;letter-spacing:.12em}.key-table td{background:rgba(0,0,0,.15);border-top:1px solid var(--line);border-bottom:1px solid var(--line);padding:10px;vertical-align:top}.key-table td:first-child{border-left:1px solid var(--line);border-radius:12px 0 0 12px}.key-table td:last-child{border-right:1px solid var(--line);border-radius:0 12px 12px 0}.status{font-weight:900}.status.used{color:var(--good)}.status.unused{color:var(--muted)}.model-chip{display:inline-flex;margin:2px 4px 2px 0;padding:4px 7px;border:1px solid var(--line);border-radius:999px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem}.section{margin-top:14px}.session-note{border:1px solid rgba(96,165,250,.28);background:rgba(96,165,250,.08);border-radius:14px;padding:12px;color:var(--soft);margin-bottom:12px}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}@media(max-width:1050px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.detail-grid{grid-template-columns:repeat(2,minmax(0,1fr))}.layout{grid-template-columns:1fr}.top{flex-direction:column}.event{grid-template-columns:1fr}.events{max-height:none}}@media(max-width:620px){.grid,.detail-grid{grid-template-columns:1fr}}
9
  </style>
10
  </head>
11
  <body>
 
56
  let state={events:[],providers:[],runtime:{routes:[]},paused:false,selected:localStorage.getItem('hc.keyRotator.provider')||''};
57
  const $=id=>document.getElementById(id), HTML_ESCAPE={'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'};
58
  const PICK_TYPES=['pick','sticky_pick','pick_retry_fresh','all_suspended_pick','saturated_reuse','sticky_saturated_reuse'];
59
+ const STALE_PENDING_MS=35000;
60
  function esc(v){return String(v??'').replace(/[&<>"']/g,ch=>HTML_ESCAPE[ch]);}
61
  function eventClass(v){return String(v||'event').replace(/[^a-z0-9_-]/gi,'_');}
62
  function fmtTime(ts){const d=new Date(ts);return isNaN(d)?'—':d.toLocaleString();}
63
  function isKeyEvent(e){return e.provider&&e.slot&&(e.key||e.type==='all_suspended_withheld');}
64
  function keyId(provider,slot,key){return `${provider}::${slot||''}::${key||''}`;}
65
  function eventText(e){const bits=[];if(e.provider)bits.push(e.provider);if(e.slot)bits.push(`#${e.slot}/${e.total}`);if(e.key)bits.push(e.key);if(e.model)bits.push(`model=${e.model}`);if(e.status)bits.push(`status=${e.status}`);if(e.errorStatus)bits.push(`error=${e.errorStatus}`);if(e.errorReason)bits.push(`reason=${e.errorReason}`);if(e.errorType)bits.push(`type=${e.errorType}`);if(e.waitMs)bits.push(`wait=${Math.round(e.waitMs/1000)}s`);if(e.code)bits.push(`code=${e.code}`);if(e.errorCode)bits.push(`errorCode=${e.errorCode}`);if(e.inflight)bits.push(`inflight=${e.inflight}/${e.maxInflight||'?'}`);return bits.join(' · ');}
66
+ function emptyStat(provider,slot,total,key){return{provider,slot,total,key,picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:'',models:new Map()};}
67
+ function buildStats(){const stats=new Map();for(const p of state.providers){for(const k of (p.keys||[]))stats.set(keyId(p.name,k.slot,k.key),emptyStat(p.name,k.slot,k.total,k.key));}for(const e of state.events){if(!isKeyEvent(e))continue;const id=keyId(e.provider,e.slot,e.key);if(!stats.has(id))stats.set(id,emptyStat(e.provider,e.slot,e.total,e.key||'***'));const s=stats.get(id);const isPick=PICK_TYPES.includes(e.type);const isOutcome=['success','rate_limited','network_retryable','auth_failed','transient_status','inflight_timeout'].includes(e.type);if(isPick){s.picks++;s.lastPick=e.ts||s.lastPick;}if(e.type==='success')s.success++;if(e.type==='rate_limited')s.rate++;if(e.type==='network_retryable')s.retry++;if(e.type==='auth_failed')s.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')s.transient++;s.last=e.ts||s.last;if(e.model){let m=s.models.get(e.model);if(!m)m={picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:'',observed:false};if(e.type==='model_detected')m.observed=true;if(isPick){m.picks++;m.lastPick=e.ts||m.lastPick;}if(e.type==='success')m.success++;if(e.type==='rate_limited')m.rate++;if(e.type==='network_retryable')m.retry++;if(e.type==='auth_failed')m.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')m.transient++;m.last=e.ts||m.last;s.models.set(e.model,m);}else if(isPick||isOutcome){s.unscoped=s.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:''};if(isPick){s.unscoped.picks++;s.unscoped.lastPick=e.ts||s.unscoped.lastPick;}if(e.type==='success')s.unscoped.success++;if(e.type==='rate_limited')s.unscoped.rate++;if(e.type==='network_retryable')s.unscoped.retry++;if(e.type==='auth_failed')s.unscoped.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')s.unscoped.transient++;s.unscoped.last=e.ts||s.unscoped.last;}}const now=Date.now();for(const s of stats.values()){ageStalePending(s,now);for(const m of s.models.values())ageStalePending(m,now);if(s.unscoped)ageStalePending(s.unscoped,now);}return stats;}
68
+ function ageStalePending(v,now){const pending=pendingCount(v);if(!pending)return;const ts=Date.parse(v.lastPick||v.last||'');if(Number.isFinite(ts)&&now-ts>STALE_PENDING_MS){v.transient=(v.transient||0)+pending;v.stalePending=(v.stalePending||0)+pending;}}
69
  function providerRows(provider,stats){return (provider?.keys||[]).map(k=>stats.get(keyId(provider.name,k.slot,k.key))||emptyStat(provider.name,k.slot,k.total,k.key));}
70
  function pendingCount(v){return Math.max(0,(v.picks||0)-((v.success||0)+(v.rate||0)+(v.retry||0)+(v.transient||0)+(v.auth||0)));}
71
  function modelChips(row){const entries=[...row.models.entries()];const un=row.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};const unPending=pendingCount(un);if(!entries.length){const total=row.picks+row.success+row.rate+row.retry+row.transient+row.auth;return total?`<span class="model-chip">unscoped · p:${row.picks} ok:${row.success} pending:${pendingCount(row)} rl:${row.rate} retry:${row.retry+row.transient}</span>`:'<span class="model-chip">no model events yet</span>';}const chips=entries.map(([m,v])=>{const onlyObserved=v.observed&&!v.picks&&!v.success&&!v.rate&&!v.retry&&!v.transient&&!v.auth;return `<span class="model-chip">${esc(m)} · ${onlyObserved?'observed':`p:${v.picks} ok:${v.success} pending:${pendingCount(v)} rl:${v.rate} retry:${v.retry+v.transient}`}</span>`;});if(un.picks||un.success||un.rate||un.retry||un.transient||un.auth)chips.push(`<span class="model-chip">unscoped totals · p:${un.picks} ok:${un.success} pending:${unPending} rl:${un.rate} retry:${un.retry+un.transient}</span>`);return chips.join('');}
multi-provider-key-rotator.cjs CHANGED
@@ -112,7 +112,7 @@ const EVENT_LOG_FILE = process.env.KEY_ROTATOR_EVENT_LOG_FILE || '/tmp/huggingcl
112
  const EVENT_LOG_MAX_BYTES = Math.max(64 * 1024, parseInt(process.env.KEY_ROTATOR_EVENT_LOG_MAX_BYTES || '', 10) || 1024 * 1024);
113
  const INFLIGHT_TTL_MS = Math.max(
114
  30_000,
115
- Math.min(30 * 60_000, parseInt(process.env.KEY_INFLIGHT_TTL_MS || '', 10) || 5 * 60_000),
116
  );
117
  const REQUEST_MODEL_SNIFF_MAX_BYTES = Math.max(
118
  16 * 1024,
@@ -141,6 +141,7 @@ const STICKY_PROVIDER_SET = new Set(
141
  .map(s => s.trim().toLowerCase())
142
  .filter(Boolean),
143
  );
 
144
 
145
  // Maximum ms to respect from a Retry-After header.
146
  // Old cap was 10s — too low for Gemini/Google which often returns 60s+.
@@ -440,17 +441,27 @@ function isGeminiOpenAICompatPath(pathOrUrl) {
440
  */
441
  function getKeyExpiry(p, key, model) {
442
  let expiry = p.keyState.get(key)?.blacklistedUntil ?? 0;
443
- if (p.modelKeyState && model) {
444
- const mks = p.modelKeyState.get(`${key}:${model}`);
445
  if (mks && mks.blacklistedUntil > expiry) expiry = mks.blacklistedUntil;
446
  }
447
  return expiry;
448
  }
449
 
450
  function stickyBucketForProvider(p, model) {
451
- const rawScope = String(process.env.KEY_STICKY_SCOPE || 'provider').trim().toLowerCase();
452
- const scope = rawScope === 'model' || rawScope === 'per-model' ? 'model' : 'provider';
453
- return scope === 'provider' ? '__provider__' : (model || '__default__');
 
 
 
 
 
 
 
 
 
 
454
  }
455
 
456
  function isStickyProvider(p) {
@@ -464,14 +475,17 @@ function rememberStickyKey(p, model, key) {
464
 
465
  function clearStickyKey(p, key, model) {
466
  if (!p?.stickyKeys || !key) return;
467
- if (model) {
 
468
  const bucket = stickyBucketForProvider(p, model);
469
  if (p.stickyKeys.get(bucket) === key) p.stickyKeys.delete(bucket);
470
  // Also clear the ambiguous fallback bucket if this key was selected before
471
  // a Gemini OpenAI-compatible request body revealed its model. Do not clear
472
  // other model buckets: Gemini quota failures are model-scoped.
473
- const fallbackBucket = stickyBucketForProvider(p, null);
474
- if (p.stickyKeys.get(fallbackBucket) === key) p.stickyKeys.delete(fallbackBucket);
 
 
475
  return;
476
  }
477
  for (const [bucket, stickyKey] of p.stickyKeys) {
@@ -632,14 +646,15 @@ function isActive(p, key, model) {
632
  }
633
 
634
  // ── Per-model check (gemini etc.) ──────────────────────────────────────────
635
- if (p.modelKeyState && model) {
636
- const mKey = `${key}:${model}`;
 
637
  const mks = p.modelKeyState.get(mKey);
638
  if (mks && mks.blacklistedUntil !== 0) {
639
- if (Date.now() < mks.blacklistedUntil) return false; // blocked for this model
640
  mks.blacklistedUntil = 0;
641
  if (mks.strikes > 0) mks.strikes -= 1;
642
- debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} back in pool for model=${model} (strikes now ${mks.strikes})`);
643
  }
644
  }
645
 
@@ -668,8 +683,9 @@ function recordFailure(p, key, model, retryAfterMs) {
668
  // our exponential cooldown. This prevents hammering a key before its quota resets.
669
  const serverHintMs = (typeof retryAfterMs === 'number' && retryAfterMs > 0) ? retryAfterMs : 0;
670
 
671
- if (p.modelKeyState && model) {
672
- const mKey = `${key}:${model}`;
 
673
  let mks = p.modelKeyState.get(mKey);
674
  if (!mks) { mks = makeKeyState(); p.modelKeyState.set(mKey, mks); }
675
 
@@ -689,9 +705,9 @@ function recordFailure(p, key, model, retryAfterMs) {
689
  // ★ Set blacklistedUntil FIRST so it is always written even if the log below throws.
690
  mks.blacklistedUntil = Math.max(mks.blacklistedUntil || 0, Date.now() + cooldown);
691
  if (isPerm)
692
- warn(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} hit ${MAX_STRIKES} strikes — suspended for ${formatHours(PERM_SUSPEND_MS)}h (quota likely exhausted for this model)`);
693
  else
694
- debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} strike ${mks.strikes}/${MAX_STRIKES} — backoff ${Math.round(cooldown / 1000)}s${serverHintMs > 0 ? ` (server-hint ${Math.round(serverHintMs/1000)}s)` : ''}`);
695
  return;
696
  }
697
 
@@ -725,15 +741,18 @@ function recordFailure(p, key, model, retryAfterMs) {
725
  * Called on transient retryable failures (non-quota/rate):
726
  * applies short cooldown without incrementing strikes.
727
  */
728
- function recordTransientFailure(p, key) {
729
- let ks = p.keyState.get(key);
730
- if (!ks) { ks = makeKeyState(); p.keyState.set(key, ks); }
 
 
 
731
  ks.lastFailureAt = Date.now();
732
  const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
733
  const cooldown = Math.max(1000, Math.round(BASE_COOLDOWN_MS * jitter));
734
  ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
735
  const secs = Math.round(cooldown / 1000);
736
- debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} transient backoff ${secs}s (strikes unchanged)`);
737
  }
738
 
739
  /**
@@ -754,15 +773,17 @@ function recordSuccess(p, key, model) {
754
  }
755
  }
756
 
757
- // Also clear model-specific state on success
758
- if (p.modelKeyState && model) {
759
- const mKey = `${key}:${model}`;
 
 
760
  const mks = p.modelKeyState.get(mKey);
761
  if (mks && (mks.strikes > 0 || mks.blacklistedUntil > 0)) {
762
  mks.strikes = 0;
763
  mks.lastFailureAt = 0;
764
  mks.blacklistedUntil = 0;
765
- debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} recovered — strikes reset`);
766
  }
767
  }
768
  }
@@ -802,9 +823,9 @@ function removeInFlightToken(p, key, token) {
802
  else p.inFlightTimers.delete(key);
803
  }
804
 
805
- function beginInFlight(p, key) {
806
  if (!p || !key) return null;
807
- const token = { done: false, timer: null };
808
  p.inFlight.set(key, (p.inFlight.get(key) || 0) + 1);
809
  token.timer = setTimeout(() => {
810
  if (token.done) return;
@@ -813,7 +834,14 @@ function beginInFlight(p, key) {
813
  const before = p.inFlight.get(key) || 0;
814
  if (before > 0) {
815
  const next = decrementInFlight(p, key);
816
- emitEvent('inflight_timeout', p, key, { inflightBefore: before, inflightAfter: next, ttlMs: INFLIGHT_TTL_MS });
 
 
 
 
 
 
 
817
  }
818
  }, INFLIGHT_TTL_MS);
819
  token.timer.unref?.();
@@ -874,16 +902,25 @@ function nextKey(p, model) {
874
  const stickyKey = p.stickyKeys.get(stickyBucketForProvider(p, model));
875
  if (stickyKey && p.keys.includes(stickyKey) && isActive(p, stickyKey, model)) {
876
  const inflight = p.inFlight.get(stickyKey) || 0;
877
- verbosePickLog(`[key-rotator] ${p.name}: sticky picked ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY}`);
878
- if (inflight >= MAX_INFLIGHT_PER_KEY) {
879
- warn(`[key-rotator] ${p.name}: sticky key saturated, still reusing ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY} until it fails/exhausts`);
880
- emitEvent('sticky_saturated_reuse', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
881
- } else {
882
  emitEvent('sticky_pick', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
 
883
  }
884
- return { key: stickyKey, waitMs: 0 };
 
 
 
 
 
 
 
 
 
 
 
 
885
  }
886
- if (stickyKey) clearStickyKey(p, stickyKey, model);
887
  }
888
 
889
  let bestPick = null;
@@ -907,7 +944,7 @@ function nextKey(p, model) {
907
  // Score: prefer keys with fewer recent failures and lower in-flight count.
908
  // For perModelLimits, also factor in model-specific strike count.
909
  const ks = p.keyState.get(key) || makeKeyState();
910
- const mks = (p.modelKeyState && model) ? (p.modelKeyState.get(`${key}:${model}`) || makeKeyState()) : makeKeyState();
911
  const recentFailPenalty =
912
  (ks.lastFailureAt > 0 && (Date.now() - ks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0) +
913
  (mks.lastFailureAt > 0 && (Date.now() - mks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0);
@@ -1047,9 +1084,11 @@ function handleStatus(p, key, status, model, retryAfterMs, errorInfo) {
1047
  }
1048
 
1049
  if (failureKind === 'transient') {
1050
- // Transient server errors are not model-specific penalise key globally.
1051
- recordTransientFailure(p, key);
1052
- clearStickyKey(p, key);
 
 
1053
  warn(`[key-rotator] ${p.name}: transient status=${status} on ${keySlot(p, key)}${keyMask(key)}`);
1054
  emitEvent('transient_status', p, key, { status, model, ...errorFields });
1055
  return;
@@ -1061,7 +1100,7 @@ function handleStatus(p, key, status, model, retryAfterMs, errorInfo) {
1061
  }
1062
  }
1063
 
1064
- function handleTransportError(p, key, err) {
1065
  if (!p || !key) return;
1066
  // Node.js 18+ undici fetch throws TypeError: "fetch failed" where the actual
1067
  // network error code lives in err.cause.code (e.g. ECONNRESET, ETIMEDOUT,
@@ -1071,12 +1110,22 @@ function handleTransportError(p, key, err) {
1071
  ? String(err.code || err.cause?.code).toUpperCase()
1072
  : '';
1073
  const name = String(err?.name || '');
 
 
 
 
 
 
 
 
 
 
1074
  const retryable = classifyRetryableFailure(undefined, code) || name === 'AbortError';
1075
  if (retryable) {
1076
- recordTransientFailure(p, key);
1077
- clearStickyKey(p, key);
1078
- warn(`[key-rotator] ${p.name}: retryable network ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}`);
1079
- emitEvent('network_retryable', p, key, { name: name || 'Error', code });
1080
  }
1081
  }
1082
 
@@ -1614,7 +1663,7 @@ function wrapUndiciHandler(handler, provider, key, inFlightToken, getModel) {
1614
  } finally {
1615
  // User handlers may throw/rethrow; the rotator still owns the
1616
  // in-flight token and transport error classification for this key.
1617
- settle(() => { if (!statusHandled) { try { handleTransportError(provider, key, err); } catch (_) {} } });
1618
  }
1619
  };
1620
  }
@@ -1680,7 +1729,7 @@ function patchUndiciDispatch(proto, tag) {
1680
 
1681
  if (key) {
1682
  usedKey = key; usedProvider = provider; usedModel = model;
1683
- usedInFlight = beginInFlight(usedProvider, usedKey);
1684
 
1685
  const newOptions = { ...options };
1686
 
@@ -1709,7 +1758,7 @@ function patchUndiciDispatch(proto, tag) {
1709
  usedProvider,
1710
  usedKey,
1711
  () => usedModel,
1712
- (model) => { usedModel = model; },
1713
  );
1714
  const wrappedHandler = wrapUndiciHandler(handler, usedProvider, usedKey, usedInFlight, () => usedModel);
1715
  return runInRotatorRequest(() => origDispatch.call(this, newOptions, wrappedHandler));
@@ -1892,7 +1941,7 @@ function patchFetch() {
1892
  if (key) {
1893
  triedKeys.add(key);
1894
  usedKey = key;
1895
- usedInFlight = beginInFlight(provider, key);
1896
  }
1897
 
1898
  const attemptArgs = buildAttemptFetchArgs(input, init, provider, usedKey);
@@ -1923,7 +1972,7 @@ function patchFetch() {
1923
  } catch (err) {
1924
  lastErr = err;
1925
  try { endInFlight(provider, usedKey, usedInFlight); } catch (_) {}
1926
- try { handleTransportError(provider, usedKey, err); } catch (_) {}
1927
  // Node.js 18+ undici fetch: network errors are TypeError("fetch failed")
1928
  // where the real code (ECONNRESET, ETIMEDOUT, ENOTFOUND …) is in
1929
  // err.cause.code. Check that first before falling back to err.code.
@@ -1989,7 +2038,7 @@ function patchHttpModule(mod) {
1989
 
1990
  if (key) {
1991
  usedKey = key; usedProvider = provider; usedModel = model;
1992
- usedInFlight = beginInFlight(usedProvider, usedKey);
1993
  if (provider.queryParam) {
1994
  const hasOptionsArg = args[1] && typeof args[1] === 'object' && typeof args[1].on !== 'function';
1995
  const u = new URL(String(
@@ -2081,6 +2130,7 @@ function patchHttpModule(mod) {
2081
  const bodyModel = extractModelFromBody(fullBody);
2082
  if (bodyModel) {
2083
  usedModel = bodyModel;
 
2084
  promoteStickyKeyModel(usedProvider, usedKey, null, usedModel);
2085
  emitEvent('model_detected', usedProvider, usedKey, { model: usedModel, source: 'http_request_body' });
2086
  debug(`[key-rotator] ${usedProvider.name}: (http) model extracted from request body: ${usedModel}`);
@@ -2149,7 +2199,7 @@ function patchHttpModule(mod) {
2149
  req.on('error', (err) => {
2150
  try { endInFlight(usedProvider, usedKey, usedInFlight); } catch (_) {}
2151
  if (!statusHandled) {
2152
- try { handleTransportError(usedProvider, usedKey, err); } catch (_) {}
2153
  }
2154
  });
2155
  }
@@ -2181,7 +2231,7 @@ if (hasProviderKeys) {
2181
  patchUndici(); // covers OpenClaw gateway's bundled undici AI calls
2182
  startDiagnostics();
2183
 
2184
- debug(`[key-rotator] loaded — cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} max-retry-after:${MAX_RETRY_AFTER_MS/1000}s max-key-wait:${MAX_KEY_WAIT_MS/1000}s diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'} log-level:${LOG_LEVEL} verbose-picks:${VERBOSE_PICKS ? 'on' : 'off'} suspended-last-resort:${USE_SUSPENDED_KEY_AS_LAST_RESORT ? 'on' : 'off'} per-model-providers:${providerState.filter(p => p.perModelLimits).map(p => p.name).join(',') || 'none'} model-from-body:on model-sniff-max:${REQUEST_MODEL_SNIFF_MAX_BYTES} error-sniff-max:${ERROR_BODY_SNIFF_MAX_BYTES} inflight-ttl:${INFLIGHT_TTL_MS}ms sticky-until-failure:${STICKY_UNTIL_FAILURE ? 'on' : 'off'} sticky-scope:${String(process.env.KEY_STICKY_SCOPE || 'provider').trim().toLowerCase() || 'provider'} sticky-providers:${[...STICKY_PROVIDER_SET].join(',') || 'none'} llm-fallback-providers:${LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET].join(',') : 'all'}`);
2185
  emitEvent('rotator_loaded', null, null, {
2186
  providers: providerState.filter(p => p.keys.length).map(p => ({ name: p.name, total: p.keys.length })),
2187
  logLevel: LOG_LEVEL,
@@ -2190,7 +2240,7 @@ if (hasProviderKeys) {
2190
  modelSniffMaxBytes: REQUEST_MODEL_SNIFF_MAX_BYTES,
2191
  errorBodySniffMaxBytes: ERROR_BODY_SNIFF_MAX_BYTES,
2192
  stickyUntilFailure: STICKY_UNTIL_FAILURE,
2193
- stickyScope: String(process.env.KEY_STICKY_SCOPE || 'provider').trim().toLowerCase() || 'provider',
2194
  stickyProviders: [...STICKY_PROVIDER_SET],
2195
  llmFallbackProviders: LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET] : ['*'],
2196
  });
 
112
  const EVENT_LOG_MAX_BYTES = Math.max(64 * 1024, parseInt(process.env.KEY_ROTATOR_EVENT_LOG_MAX_BYTES || '', 10) || 1024 * 1024);
113
  const INFLIGHT_TTL_MS = Math.max(
114
  30_000,
115
+ Math.min(30 * 60_000, parseInt(process.env.KEY_INFLIGHT_TTL_MS || '', 10) || 30_000),
116
  );
117
  const REQUEST_MODEL_SNIFF_MAX_BYTES = Math.max(
118
  16 * 1024,
 
141
  .map(s => s.trim().toLowerCase())
142
  .filter(Boolean),
143
  );
144
+ const UNKNOWN_MODEL_SCOPE = '__unknown_model__';
145
 
146
  // Maximum ms to respect from a Retry-After header.
147
  // Old cap was 10s — too low for Gemini/Google which often returns 60s+.
 
441
  */
442
  function getKeyExpiry(p, key, model) {
443
  let expiry = p.keyState.get(key)?.blacklistedUntil ?? 0;
444
+ if (p.modelKeyState) {
445
+ const mks = p.modelKeyState.get(`${key}:${scopedModelKey(model)}`);
446
  if (mks && mks.blacklistedUntil > expiry) expiry = mks.blacklistedUntil;
447
  }
448
  return expiry;
449
  }
450
 
451
  function stickyBucketForProvider(p, model) {
452
+ const rawScope = String(process.env.KEY_STICKY_SCOPE || '').trim().toLowerCase();
453
+ const scope = rawScope === 'provider'
454
+ ? 'provider'
455
+ : rawScope === 'model' || rawScope === 'per-model'
456
+ ? 'model'
457
+ : p?.perModelLimits
458
+ ? 'model'
459
+ : 'provider';
460
+ return scope === 'provider' ? '__provider__' : (model || UNKNOWN_MODEL_SCOPE);
461
+ }
462
+
463
+ function scopedModelKey(model) {
464
+ return model || UNKNOWN_MODEL_SCOPE;
465
  }
466
 
467
  function isStickyProvider(p) {
 
475
 
476
  function clearStickyKey(p, key, model) {
477
  if (!p?.stickyKeys || !key) return;
478
+ const hasScopedModelArg = arguments.length >= 3;
479
+ if (hasScopedModelArg) {
480
  const bucket = stickyBucketForProvider(p, model);
481
  if (p.stickyKeys.get(bucket) === key) p.stickyKeys.delete(bucket);
482
  // Also clear the ambiguous fallback bucket if this key was selected before
483
  // a Gemini OpenAI-compatible request body revealed its model. Do not clear
484
  // other model buckets: Gemini quota failures are model-scoped.
485
+ if (model) {
486
+ const fallbackBucket = stickyBucketForProvider(p, null);
487
+ if (p.stickyKeys.get(fallbackBucket) === key) p.stickyKeys.delete(fallbackBucket);
488
+ }
489
  return;
490
  }
491
  for (const [bucket, stickyKey] of p.stickyKeys) {
 
646
  }
647
 
648
  // ── Per-model check (gemini etc.) ──────────────────────────────────────────
649
+ if (p.modelKeyState) {
650
+ const scopedModel = scopedModelKey(model);
651
+ const mKey = `${key}:${scopedModel}`;
652
  const mks = p.modelKeyState.get(mKey);
653
  if (mks && mks.blacklistedUntil !== 0) {
654
+ if (Date.now() < mks.blacklistedUntil) return false; // blocked for this model/unknown-model scope
655
  mks.blacklistedUntil = 0;
656
  if (mks.strikes > 0) mks.strikes -= 1;
657
+ debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} back in pool for model=${model || 'unknown'} (strikes now ${mks.strikes})`);
658
  }
659
  }
660
 
 
683
  // our exponential cooldown. This prevents hammering a key before its quota resets.
684
  const serverHintMs = (typeof retryAfterMs === 'number' && retryAfterMs > 0) ? retryAfterMs : 0;
685
 
686
+ if (p.modelKeyState) {
687
+ const scopedModel = scopedModelKey(model);
688
+ const mKey = `${key}:${scopedModel}`;
689
  let mks = p.modelKeyState.get(mKey);
690
  if (!mks) { mks = makeKeyState(); p.modelKeyState.set(mKey, mks); }
691
 
 
705
  // ★ Set blacklistedUntil FIRST so it is always written even if the log below throws.
706
  mks.blacklistedUntil = Math.max(mks.blacklistedUntil || 0, Date.now() + cooldown);
707
  if (isPerm)
708
+ warn(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} hit ${MAX_STRIKES} strikes — suspended for ${formatHours(PERM_SUSPEND_MS)}h (quota likely exhausted for this model)`);
709
  else
710
+ debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} strike ${mks.strikes}/${MAX_STRIKES} — backoff ${Math.round(cooldown / 1000)}s${serverHintMs > 0 ? ` (server-hint ${Math.round(serverHintMs/1000)}s)` : ''}`);
711
  return;
712
  }
713
 
 
741
  * Called on transient retryable failures (non-quota/rate):
742
  * applies short cooldown without incrementing strikes.
743
  */
744
+ function recordTransientFailure(p, key, model = null) {
745
+ if (!p || !key) return;
746
+ const stateMap = p.modelKeyState ? p.modelKeyState : p.keyState;
747
+ const stateKey = p.modelKeyState ? `${key}:${scopedModelKey(model)}` : key;
748
+ let ks = stateMap.get(stateKey);
749
+ if (!ks) { ks = makeKeyState(); stateMap.set(stateKey, ks); }
750
  ks.lastFailureAt = Date.now();
751
  const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
752
  const cooldown = Math.max(1000, Math.round(BASE_COOLDOWN_MS * jitter));
753
  ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
754
  const secs = Math.round(cooldown / 1000);
755
+ debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} transient backoff ${secs}s${p.modelKeyState ? ` model=${model || 'unknown'}` : ''} (strikes unchanged)`);
756
  }
757
 
758
  /**
 
773
  }
774
  }
775
 
776
+ // Also clear model-specific state on success. If model is still unknown,
777
+ // clear only the unknown-model scope; never clear other Gemini model buckets.
778
+ if (p.modelKeyState) {
779
+ const scopedModel = scopedModelKey(model);
780
+ const mKey = `${key}:${scopedModel}`;
781
  const mks = p.modelKeyState.get(mKey);
782
  if (mks && (mks.strikes > 0 || mks.blacklistedUntil > 0)) {
783
  mks.strikes = 0;
784
  mks.lastFailureAt = 0;
785
  mks.blacklistedUntil = 0;
786
+ debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} recovered — strikes reset`);
787
  }
788
  }
789
  }
 
823
  else p.inFlightTimers.delete(key);
824
  }
825
 
826
+ function beginInFlight(p, key, model = null) {
827
  if (!p || !key) return null;
828
+ const token = { done: false, timer: null, model: model || null };
829
  p.inFlight.set(key, (p.inFlight.get(key) || 0) + 1);
830
  token.timer = setTimeout(() => {
831
  if (token.done) return;
 
834
  const before = p.inFlight.get(key) || 0;
835
  if (before > 0) {
836
  const next = decrementInFlight(p, key);
837
+ // A timeout here means the rotator saw a key pick but no provider headers,
838
+ // completion, or transport error before the TTL. For OpenClaw failover
839
+ // paths this otherwise leaves the sticky bucket pinned forever and the
840
+ // dashboard shows only "pending/no response observed". Treat it as a
841
+ // transient key failure so the next request can rotate to another key.
842
+ try { recordTransientFailure(p, key, token.model || null); } catch (_) {}
843
+ try { clearStickyKey(p, key, token.model || null); } catch (_) {}
844
+ emitEvent('inflight_timeout', p, key, { model: token.model || null, inflightBefore: before, inflightAfter: next, ttlMs: INFLIGHT_TTL_MS, classifiedAs: 'transient' });
845
  }
846
  }, INFLIGHT_TTL_MS);
847
  token.timer.unref?.();
 
902
  const stickyKey = p.stickyKeys.get(stickyBucketForProvider(p, model));
903
  if (stickyKey && p.keys.includes(stickyKey) && isActive(p, stickyKey, model)) {
904
  const inflight = p.inFlight.get(stickyKey) || 0;
905
+ if (inflight < MAX_INFLIGHT_PER_KEY) {
906
+ verbosePickLog(`[key-rotator] ${p.name}: sticky picked ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY}`);
 
 
 
907
  emitEvent('sticky_pick', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
908
+ return { key: stickyKey, waitMs: 0 };
909
  }
910
+
911
+ // Do not keep piling requests onto one sticky key when OpenClaw has not
912
+ // produced provider headers/completion/error for previous picks. That was
913
+ // the real cause of dashboards like "pick 14, pending 14, rate 0": sticky
914
+ // mode intentionally reused the same key even after it was saturated. Clear
915
+ // the bucket and let the normal active-key scan below choose another key;
916
+ // only if every key is saturated will the fallback path reuse the least-bad
917
+ // candidate.
918
+ warn(`[key-rotator] ${p.name}: sticky key saturated, rotating away from ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight}/${MAX_INFLIGHT_PER_KEY}`);
919
+ emitEvent('sticky_saturated_rotate', p, stickyKey, { model, inflight, maxInflight: MAX_INFLIGHT_PER_KEY });
920
+ clearStickyKey(p, stickyKey, model);
921
+ } else if (stickyKey) {
922
+ clearStickyKey(p, stickyKey, model);
923
  }
 
924
  }
925
 
926
  let bestPick = null;
 
944
  // Score: prefer keys with fewer recent failures and lower in-flight count.
945
  // For perModelLimits, also factor in model-specific strike count.
946
  const ks = p.keyState.get(key) || makeKeyState();
947
+ const mks = p.modelKeyState ? (p.modelKeyState.get(`${key}:${scopedModelKey(model)}`) || makeKeyState()) : makeKeyState();
948
  const recentFailPenalty =
949
  (ks.lastFailureAt > 0 && (Date.now() - ks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0) +
950
  (mks.lastFailureAt > 0 && (Date.now() - mks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0);
 
1084
  }
1085
 
1086
  if (failureKind === 'transient') {
1087
+ // For per-model providers, keep transient cooldowns scoped to the current
1088
+ // model/unknown-model bucket so one Gemini model does not suppress the key
1089
+ // for all other Gemini models.
1090
+ recordTransientFailure(p, key, model);
1091
+ clearStickyKey(p, key, model);
1092
  warn(`[key-rotator] ${p.name}: transient status=${status} on ${keySlot(p, key)}${keyMask(key)}`);
1093
  emitEvent('transient_status', p, key, { status, model, ...errorFields });
1094
  return;
 
1100
  }
1101
  }
1102
 
1103
+ function handleTransportError(p, key, err, model = null) {
1104
  if (!p || !key) return;
1105
  // Node.js 18+ undici fetch throws TypeError: "fetch failed" where the actual
1106
  // network error code lives in err.cause.code (e.g. ECONNRESET, ETIMEDOUT,
 
1110
  ? String(err.code || err.cause?.code).toUpperCase()
1111
  : '';
1112
  const name = String(err?.name || '');
1113
+ const message = String(err?.message || err?.cause?.message || '');
1114
+ const haystack = `${name} ${message}`.toLowerCase();
1115
+ const looksRateOrQuota = /rate.?limit|too.?many|quota|resource.?exhaust|usage.?limit|insufficient.?quota|capacity.?exceeded|tokens?.?per|requests?.?per|rate_limit|rate.?limited|userratelimit|dailylimit|limitexceeded/.test(haystack);
1116
+ if (looksRateOrQuota) {
1117
+ recordFailure(p, key, model, 0);
1118
+ clearStickyKey(p, key, model);
1119
+ warn(`[key-rotator] ${p.name}: transport/failover quota signal ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}${model ? ` model=${model}` : ''}`);
1120
+ emitEvent('rate_limited', p, key, { model, name: name || 'Error', code, source: 'transport_error', message: message.slice(0, 240) });
1121
+ return;
1122
+ }
1123
  const retryable = classifyRetryableFailure(undefined, code) || name === 'AbortError';
1124
  if (retryable) {
1125
+ recordTransientFailure(p, key, model);
1126
+ clearStickyKey(p, key, model);
1127
+ warn(`[key-rotator] ${p.name}: retryable network ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}${model ? ` model=${model}` : ''}`);
1128
+ emitEvent('network_retryable', p, key, { model, name: name || 'Error', code });
1129
  }
1130
  }
1131
 
 
1663
  } finally {
1664
  // User handlers may throw/rethrow; the rotator still owns the
1665
  // in-flight token and transport error classification for this key.
1666
+ settle(() => { if (!statusHandled) { try { handleTransportError(provider, key, err, currentModel()); } catch (_) {} } });
1667
  }
1668
  };
1669
  }
 
1729
 
1730
  if (key) {
1731
  usedKey = key; usedProvider = provider; usedModel = model;
1732
+ usedInFlight = beginInFlight(usedProvider, usedKey, usedModel);
1733
 
1734
  const newOptions = { ...options };
1735
 
 
1758
  usedProvider,
1759
  usedKey,
1760
  () => usedModel,
1761
+ (model) => { usedModel = model; if (usedInFlight) usedInFlight.model = model; },
1762
  );
1763
  const wrappedHandler = wrapUndiciHandler(handler, usedProvider, usedKey, usedInFlight, () => usedModel);
1764
  return runInRotatorRequest(() => origDispatch.call(this, newOptions, wrappedHandler));
 
1941
  if (key) {
1942
  triedKeys.add(key);
1943
  usedKey = key;
1944
+ usedInFlight = beginInFlight(provider, key, model);
1945
  }
1946
 
1947
  const attemptArgs = buildAttemptFetchArgs(input, init, provider, usedKey);
 
1972
  } catch (err) {
1973
  lastErr = err;
1974
  try { endInFlight(provider, usedKey, usedInFlight); } catch (_) {}
1975
+ try { handleTransportError(provider, usedKey, err, model); } catch (_) {}
1976
  // Node.js 18+ undici fetch: network errors are TypeError("fetch failed")
1977
  // where the real code (ECONNRESET, ETIMEDOUT, ENOTFOUND …) is in
1978
  // err.cause.code. Check that first before falling back to err.code.
 
2038
 
2039
  if (key) {
2040
  usedKey = key; usedProvider = provider; usedModel = model;
2041
+ usedInFlight = beginInFlight(usedProvider, usedKey, usedModel);
2042
  if (provider.queryParam) {
2043
  const hasOptionsArg = args[1] && typeof args[1] === 'object' && typeof args[1].on !== 'function';
2044
  const u = new URL(String(
 
2130
  const bodyModel = extractModelFromBody(fullBody);
2131
  if (bodyModel) {
2132
  usedModel = bodyModel;
2133
+ if (usedInFlight) usedInFlight.model = usedModel;
2134
  promoteStickyKeyModel(usedProvider, usedKey, null, usedModel);
2135
  emitEvent('model_detected', usedProvider, usedKey, { model: usedModel, source: 'http_request_body' });
2136
  debug(`[key-rotator] ${usedProvider.name}: (http) model extracted from request body: ${usedModel}`);
 
2199
  req.on('error', (err) => {
2200
  try { endInFlight(usedProvider, usedKey, usedInFlight); } catch (_) {}
2201
  if (!statusHandled) {
2202
+ try { handleTransportError(usedProvider, usedKey, err, usedModel); } catch (_) {}
2203
  }
2204
  });
2205
  }
 
2231
  patchUndici(); // covers OpenClaw gateway's bundled undici AI calls
2232
  startDiagnostics();
2233
 
2234
+ debug(`[key-rotator] loaded — cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} max-retry-after:${MAX_RETRY_AFTER_MS/1000}s max-key-wait:${MAX_KEY_WAIT_MS/1000}s diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'} log-level:${LOG_LEVEL} verbose-picks:${VERBOSE_PICKS ? 'on' : 'off'} suspended-last-resort:${USE_SUSPENDED_KEY_AS_LAST_RESORT ? 'on' : 'off'} per-model-providers:${providerState.filter(p => p.perModelLimits).map(p => p.name).join(',') || 'none'} model-from-body:on model-sniff-max:${REQUEST_MODEL_SNIFF_MAX_BYTES} error-sniff-max:${ERROR_BODY_SNIFF_MAX_BYTES} inflight-ttl:${INFLIGHT_TTL_MS}ms sticky-until-failure:${STICKY_UNTIL_FAILURE ? 'on' : 'off'} sticky-scope:${String(process.env.KEY_STICKY_SCOPE || 'auto').trim().toLowerCase() || 'auto'} sticky-providers:${[...STICKY_PROVIDER_SET].join(',') || 'none'} llm-fallback-providers:${LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET].join(',') : 'all'}`);
2235
  emitEvent('rotator_loaded', null, null, {
2236
  providers: providerState.filter(p => p.keys.length).map(p => ({ name: p.name, total: p.keys.length })),
2237
  logLevel: LOG_LEVEL,
 
2240
  modelSniffMaxBytes: REQUEST_MODEL_SNIFF_MAX_BYTES,
2241
  errorBodySniffMaxBytes: ERROR_BODY_SNIFF_MAX_BYTES,
2242
  stickyUntilFailure: STICKY_UNTIL_FAILURE,
2243
+ stickyScope: String(process.env.KEY_STICKY_SCOPE || 'auto').trim().toLowerCase() || 'auto',
2244
  stickyProviders: [...STICKY_PROVIDER_SET],
2245
  llmFallbackProviders: LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET] : ['*'],
2246
  });