Spaces:
Running
Running
Anurag commited on
Commit ·
affbd4d
1
Parent(s): fb81013
Keep Gemini failures model scoped
Browse files- .env.example +5 -3
- README.md +2 -1
- env-builder.js +12 -2
- key-rotator-manager.html +5 -3
- multi-provider-key-rotator.cjs +103 -53
.env.example
CHANGED
|
@@ -173,9 +173,10 @@ LLM_API_KEY_FALLBACK_ENABLED=true
|
|
| 173 |
# overloading one key when many requests arrive in parallel.
|
| 174 |
# KEY_MAX_INFLIGHT_PER_KEY=3
|
| 175 |
#
|
| 176 |
-
# Safety lease for in-flight counters (default:
|
| 177 |
-
#
|
| 178 |
-
#
|
|
|
|
| 179 |
#
|
| 180 |
# Max request-body bytes to inspect for model names on streaming
|
| 181 |
# OpenAI-compatible Gemini calls (default: 256 KiB).
|
|
@@ -191,6 +192,7 @@ LLM_API_KEY_FALLBACK_ENABLED=true
|
|
| 191 |
# model-scoped and per-request round-robin can burn 2-3 keys for one chat turn.
|
| 192 |
# KEY_STICKY_UNTIL_FAILURE=true
|
| 193 |
# KEY_STICKY_PROVIDERS=gemini
|
|
|
|
| 194 |
#
|
| 195 |
# Optional auto-retry count for fetch requests on retryable errors/statuses.
|
| 196 |
# Default 0 means one upstream attempt per caller request; set 1-2 to opt in.
|
|
|
|
| 173 |
# overloading one key when many requests arrive in parallel.
|
| 174 |
# KEY_MAX_INFLIGHT_PER_KEY=3
|
| 175 |
#
|
| 176 |
+
# Safety lease for in-flight counters (default: 30s). If a picked key gets no
|
| 177 |
+
# provider headers/completion/error before this, it is marked transient and
|
| 178 |
+
# sticky mode rotates away instead of leaving permanent pending counts.
|
| 179 |
+
# KEY_INFLIGHT_TTL_MS=30000
|
| 180 |
#
|
| 181 |
# Max request-body bytes to inspect for model names on streaming
|
| 182 |
# OpenAI-compatible Gemini calls (default: 256 KiB).
|
|
|
|
| 192 |
# model-scoped and per-request round-robin can burn 2-3 keys for one chat turn.
|
| 193 |
# KEY_STICKY_UNTIL_FAILURE=true
|
| 194 |
# KEY_STICKY_PROVIDERS=gemini
|
| 195 |
+
# KEY_STICKY_SCOPE=auto # auto = per-model for Gemini, provider-level for others
|
| 196 |
#
|
| 197 |
# Optional auto-retry count for fetch requests on retryable errors/statuses.
|
| 198 |
# Default 0 means one upstream attempt per caller request; set 1-2 to opt in.
|
README.md
CHANGED
|
@@ -296,11 +296,12 @@ Optional tuning:
|
|
| 296 |
- `KEY_PERM_SUSPEND_MS` (default `57600000`) — long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
|
| 297 |
- `KEY_FAILURE_DECAY_MS` (default `900000`) — recent-failure decay window used to deprioritize keys.
|
| 298 |
- `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) — soft concurrent request cap per key.
|
| 299 |
-
- `KEY_INFLIGHT_TTL_MS` (default `
|
| 300 |
- `KEY_MODEL_SNIFF_MAX_BYTES` (default `262144`) — max request-body bytes to inspect for model names on streaming OpenAI-compatible Gemini calls.
|
| 301 |
- `KEY_ERROR_BODY_SNIFF_MAX_BYTES` (default `65536`) — max error-response bytes to inspect so provider quota/rate bodies such as 403 quota errors are scoped correctly instead of being treated as permanent auth failures.
|
| 302 |
- `KEY_STICKY_UNTIL_FAILURE` (default `true`) — keep sticky providers on one key until that key fails/exhausts.
|
| 303 |
- `KEY_STICKY_PROVIDERS` (default `gemini`) — comma-separated provider names that should use sticky key selection instead of per-request round-robin.
|
|
|
|
| 304 |
- `KEY_FETCH_MAX_RETRIES` (default `0`) — optional auto-retry count for retryable failures on **GET/HEAD/OPTIONS/POST** with a different key. Default `0` means the rotator does **not** spend extra upstream attempts for a single caller request.
|
| 305 |
- `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) — base delay for retry backoff (respects `Retry-After`, capped to 10s).
|
| 306 |
- `KEY_ROTATOR_ASSERT_NO_EXTRA_CALLS=true` — optional diagnostic warning if a single caller fetch creates more than one upstream provider attempt.
|
|
|
|
| 296 |
- `KEY_PERM_SUSPEND_MS` (default `57600000`) — long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
|
| 297 |
- `KEY_FAILURE_DECAY_MS` (default `900000`) — recent-failure decay window used to deprioritize keys.
|
| 298 |
- `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) — soft concurrent request cap per key.
|
| 299 |
+
- `KEY_INFLIGHT_TTL_MS` (default `30000`) — safety lease for picked keys with no provider headers/completion/error; stale picks are marked transient so sticky keys can rotate.
|
| 300 |
- `KEY_MODEL_SNIFF_MAX_BYTES` (default `262144`) — max request-body bytes to inspect for model names on streaming OpenAI-compatible Gemini calls.
|
| 301 |
- `KEY_ERROR_BODY_SNIFF_MAX_BYTES` (default `65536`) — max error-response bytes to inspect so provider quota/rate bodies such as 403 quota errors are scoped correctly instead of being treated as permanent auth failures.
|
| 302 |
- `KEY_STICKY_UNTIL_FAILURE` (default `true`) — keep sticky providers on one key until that key fails/exhausts.
|
| 303 |
- `KEY_STICKY_PROVIDERS` (default `gemini`) — comma-separated provider names that should use sticky key selection instead of per-request round-robin.
|
| 304 |
+
- `KEY_STICKY_SCOPE` (default `auto`) — `auto` uses per-model sticky buckets for Gemini/per-model providers and provider-level buckets for others; set `provider` or `model` to override.
|
| 305 |
- `KEY_FETCH_MAX_RETRIES` (default `0`) — optional auto-retry count for retryable failures on **GET/HEAD/OPTIONS/POST** with a different key. Default `0` means the rotator does **not** spend extra upstream attempts for a single caller request.
|
| 306 |
- `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) — base delay for retry backoff (respects `Retry-After`, capped to 10s).
|
| 307 |
- `KEY_ROTATOR_ASSERT_NO_EXTRA_CALLS=true` — optional diagnostic warning if a single caller fetch creates more than one upstream provider attempt.
|
env-builder.js
CHANGED
|
@@ -612,9 +612,9 @@ const FIELDS = [
|
|
| 612 |
"k": "KEY_INFLIGHT_TTL_MS",
|
| 613 |
"lbl": "Key rotation in-flight safety lease (ms)",
|
| 614 |
"type": "text",
|
| 615 |
-
"ph": "
|
| 616 |
"tag": "advanced",
|
| 617 |
-
"help": "
|
| 618 |
},
|
| 619 |
{
|
| 620 |
"g": "Plugins",
|
|
@@ -656,6 +656,16 @@ const FIELDS = [
|
|
| 656 |
"tag": "advanced",
|
| 657 |
"help": "Provider names that should reuse one key until failure/quota exhaustion. Default: gemini."
|
| 658 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
{
|
| 660 |
"g": "Plugins",
|
| 661 |
"icon": "🔄",
|
|
|
|
| 612 |
"k": "KEY_INFLIGHT_TTL_MS",
|
| 613 |
"lbl": "Key rotation in-flight safety lease (ms)",
|
| 614 |
"type": "text",
|
| 615 |
+
"ph": "30000",
|
| 616 |
"tag": "advanced",
|
| 617 |
+
"help": "Marks picked keys stale if no provider headers/completion/error are observed. Default: 30 seconds."
|
| 618 |
},
|
| 619 |
{
|
| 620 |
"g": "Plugins",
|
|
|
|
| 656 |
"tag": "advanced",
|
| 657 |
"help": "Provider names that should reuse one key until failure/quota exhaustion. Default: gemini."
|
| 658 |
},
|
| 659 |
+
{
|
| 660 |
+
"g": "Plugins",
|
| 661 |
+
"icon": "🔄",
|
| 662 |
+
"k": "KEY_STICKY_SCOPE",
|
| 663 |
+
"lbl": "Sticky key scope (auto/provider/model)",
|
| 664 |
+
"type": "text",
|
| 665 |
+
"ph": "auto",
|
| 666 |
+
"tag": "advanced",
|
| 667 |
+
"help": "Default auto uses per-model sticky buckets for Gemini/per-model providers and provider-level buckets for others."
|
| 668 |
+
},
|
| 669 |
{
|
| 670 |
"g": "Plugins",
|
| 671 |
"icon": "🔄",
|
key-rotator-manager.html
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
<title>HuggingClaw · API Key Rotator</title>
|
| 7 |
<style>
|
| 8 |
-
:root{color-scheme:dark;--bg:#070711;--panel:#111120;--panel2:#17172a;--line:#292945;--text:#f8f7ff;--muted:#9892b8;--soft:#c7c2e6;--good:#22c55e;--warn:#f5c542;--bad:#fb7185;--blue:#60a5fa;--violet:#a78bfa}*{box-sizing:border-box}body{margin:0;min-height:100vh;background:radial-gradient(circle at top left,#25145a 0,#070711 34%,#070711 100%);font-family:Inter,ui-sans-serif,system-ui,-apple-system,sans-serif;color:var(--text);font-size:13px}main{width:min(1280px,calc(100% - 28px));margin:0 auto;padding:28px 0 44px}.top{display:flex;align-items:flex-start;justify-content:space-between;gap:18px;margin-bottom:18px}.eyebrow{font-size:.7rem;letter-spacing:.18em;text-transform:uppercase;color:var(--muted);font-weight:900}h1{margin:6px 0 8px;font-size:clamp(1.6rem,4vw,2.7rem);line-height:1}.sub{color:var(--soft);max-width:860px;line-height:1.55}.actions{display:flex;gap:10px;flex-wrap:wrap}.btn{border:1px solid var(--line);background:rgba(255,255,255,.06);color:var(--text);border-radius:11px;padding:10px 14px;text-decoration:none;font-weight:850;cursor:pointer}.btn.primary{background:#fff;color:#050510}.btn:hover{filter:brightness(1.08)}.grid{display:grid;grid-template-columns:repeat(5,minmax(0,1fr));gap:12px;margin:18px 0}.card{background:linear-gradient(180deg,rgba(255,255,255,.055),rgba(255,255,255,.025));border:1px solid var(--line);border-radius:18px;padding:16px;box-shadow:0 18px 45px rgba(0,0,0,.22)}.metric-title{color:var(--muted);text-transform:uppercase;letter-spacing:.16em;font-size:.66rem;font-weight:900}.metric-value{font-size:1.65rem;font-weight:950;margin-top:8px}.metric-detail{color:var(--muted);margin-top:6px;line-height:1.45}.ok{color:var(--good)}.warn{color:var(--warn)}.bad{color:var(--bad)}.blue{color:var(--blue)}.layout{display:grid;grid-template-columns:340px minmax(0,1fr);gap:14px}.panel-title{display:flex;justify-content:space-between;align-items:center;gap:10px;margin-bottom:12px}.panel-title h2{font-size:1rem;margin:0}.pill{display:inline-flex;align-items:center;gap:6px;border:1px solid var(--line);border-radius:999px;padding:5px 9px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem;font-weight:850}.dot{width:7px;height:7px;border-radius:50%;background:var(--muted)}.dot.live{background:var(--good);box-shadow:0 0 15px var(--good)}.providers{display:flex;flex-direction:column;gap:9px}.provider{border:1px solid var(--line);border-radius:14px;background:rgba(0,0,0,.12);padding:12px;cursor:pointer}.provider.active{border-color:var(--blue);box-shadow:0 0 0 1px rgba(96,165,250,.25)}.provider-top{display:flex;justify-content:space-between;align-items:center;gap:12px}.provider-name{font-weight:950}.provider-meta{color:var(--muted);font-size:.78rem;margin-top:5px}.bar{height:8px;background:#22223a;border-radius:999px;overflow:hidden;margin-top:10px}.bar>span{display:block;height:100%;background:linear-gradient(90deg,var(--violet),var(--blue));border-radius:999px}.toolbar{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px}.input{flex:1;min-width:180px;border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px;outline:none}.select{border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px}.toggle{display:flex;align-items:center;gap:7px;color:var(--soft);font-weight:800}.events{display:flex;flex-direction:column;gap:9px;max-height:620px;overflow:auto;padding-right:4px}.event{border:1px solid var(--line);border-left-width:4px;border-radius:14px;background:rgba(0,0,0,.16);padding:12px;display:grid;grid-template-columns:155px minmax(0,1fr) auto;gap:12px}.event.pick,.event.sticky_pick,.event.pick_retry_fresh,.event.model_detected{border-left-color:var(--blue)}.event.success{border-left-color:var(--good)}.event.rate_limited,.event.auth_failed,.event.all_suspended_pick,.event.all_suspended_withheld{border-left-color:var(--warn)}.event.network_retryable,.event.transient_status,.event.saturated_reuse,.event.sticky_saturated_reuse,.event.inflight_timeout{border-left-color:var(--bad)}.time{color:var(--muted);font-variant-numeric:tabular-nums}.etype{font-weight:950}.msg{color:var(--soft);line-height:1.45;word-break:break-word}.key,.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;color:#e9d5ff}.empty{padding:34px;text-align:center;color:var(--muted);border:1px dashed var(--line);border-radius:16px}.foot{color:var(--muted);margin-top:16px;line-height:1.55}.kbd{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;background:#23233a;border:1px solid #363653;border-radius:7px;padding:2px 6px;color:var(--text)}.detail-grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:10px;margin-bottom:12px}.mini{border:1px solid var(--line);border-radius:13px;padding:11px;background:rgba(0,0,0,.12)}.mini b{display:block;font-size:1.15rem;margin-top:4px}.key-table-wrap{overflow:auto}.key-table{width:100%;border-collapse:separate;border-spacing:0 8px;min-width:780px}.key-table th{color:var(--muted);font-size:.68rem;text-align:left;text-transform:uppercase;letter-spacing:.12em}.key-table td{background:rgba(0,0,0,.15);border-top:1px solid var(--line);border-bottom:1px solid var(--line);padding:10px;vertical-align:top}.key-table td:first-child{border-left:1px solid var(--line);border-radius:12px 0 0 12px}.key-table td:last-child{border-right:1px solid var(--line);border-radius:0 12px 12px 0}.status{font-weight:900}.status.used{color:var(--good)}.status.unused{color:var(--muted)}.model-chip{display:inline-flex;margin:2px 4px 2px 0;padding:4px 7px;border:1px solid var(--line);border-radius:999px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem}.section{margin-top:14px}.session-note{border:1px solid rgba(96,165,250,.28);background:rgba(96,165,250,.08);border-radius:14px;padding:12px;color:var(--soft);margin-bottom:12px}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}@media(max-width:1050px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.detail-grid{grid-template-columns:repeat(2,minmax(0,1fr))}.layout{grid-template-columns:1fr}.top{flex-direction:column}.event{grid-template-columns:1fr}.events{max-height:none}}@media(max-width:620px){.grid,.detail-grid{grid-template-columns:1fr}}
|
| 9 |
</style>
|
| 10 |
</head>
|
| 11 |
<body>
|
|
@@ -56,14 +56,16 @@
|
|
| 56 |
let state={events:[],providers:[],runtime:{routes:[]},paused:false,selected:localStorage.getItem('hc.keyRotator.provider')||''};
|
| 57 |
const $=id=>document.getElementById(id), HTML_ESCAPE={'&':'&','<':'<','>':'>','"':'"',"'":'''};
|
| 58 |
const PICK_TYPES=['pick','sticky_pick','pick_retry_fresh','all_suspended_pick','saturated_reuse','sticky_saturated_reuse'];
|
|
|
|
| 59 |
function esc(v){return String(v??'').replace(/[&<>"']/g,ch=>HTML_ESCAPE[ch]);}
|
| 60 |
function eventClass(v){return String(v||'event').replace(/[^a-z0-9_-]/gi,'_');}
|
| 61 |
function fmtTime(ts){const d=new Date(ts);return isNaN(d)?'—':d.toLocaleString();}
|
| 62 |
function isKeyEvent(e){return e.provider&&e.slot&&(e.key||e.type==='all_suspended_withheld');}
|
| 63 |
function keyId(provider,slot,key){return `${provider}::${slot||''}::${key||''}`;}
|
| 64 |
function eventText(e){const bits=[];if(e.provider)bits.push(e.provider);if(e.slot)bits.push(`#${e.slot}/${e.total}`);if(e.key)bits.push(e.key);if(e.model)bits.push(`model=${e.model}`);if(e.status)bits.push(`status=${e.status}`);if(e.errorStatus)bits.push(`error=${e.errorStatus}`);if(e.errorReason)bits.push(`reason=${e.errorReason}`);if(e.errorType)bits.push(`type=${e.errorType}`);if(e.waitMs)bits.push(`wait=${Math.round(e.waitMs/1000)}s`);if(e.code)bits.push(`code=${e.code}`);if(e.errorCode)bits.push(`errorCode=${e.errorCode}`);if(e.inflight)bits.push(`inflight=${e.inflight}/${e.maxInflight||'?'}`);return bits.join(' · ');}
|
| 65 |
-
function emptyStat(provider,slot,total,key){return{provider,slot,total,key,picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',models:new Map()};}
|
| 66 |
-
function buildStats(){const stats=new Map();for(const p of state.providers){for(const k of (p.keys||[]))stats.set(keyId(p.name,k.slot,k.key),emptyStat(p.name,k.slot,k.total,k.key));}for(const e of state.events){if(!isKeyEvent(e))continue;const id=keyId(e.provider,e.slot,e.key);if(!stats.has(id))stats.set(id,emptyStat(e.provider,e.slot,e.total,e.key||'***'));const s=stats.get(id);const isPick=PICK_TYPES.includes(e.type);const isOutcome=['success','rate_limited','network_retryable','auth_failed','transient_status'].includes(e.type);if(isPick)s.picks++;if(e.type==='success')s.success++;if(e.type==='rate_limited')s.rate++;if(e.type==='network_retryable')s.retry++;if(e.type==='auth_failed')s.auth++;if(e.type==='transient_status')s.transient++;s.last=e.ts||s.last;if(e.model){let m=s.models.get(e.model);if(!m)m={picks:0,success:0,rate:0,retry:0,auth:0,transient:0,observed:false};if(e.type==='model_detected')m.observed=true;if(isPick)m.picks++;if(e.type==='success')m.success++;if(e.type==='rate_limited')m.rate++;if(e.type==='network_retryable')m.retry++;if(e.type==='auth_failed')m.auth++;if(e.type==='transient_status')m.transient++;s.models.set(e.model,m);}else if(isPick||isOutcome){s.unscoped=s.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};if(isPick)s.unscoped.picks++;if(e.type==='success')s.unscoped.success++;if(e.type==='rate_limited')s.unscoped.rate++;if(e.type==='network_retryable')s.unscoped.retry++;if(e.type==='auth_failed')s.unscoped.auth++;if(e.type==='transient_status')s.unscoped.transient++;}}return stats;}
|
|
|
|
| 67 |
function providerRows(provider,stats){return (provider?.keys||[]).map(k=>stats.get(keyId(provider.name,k.slot,k.key))||emptyStat(provider.name,k.slot,k.total,k.key));}
|
| 68 |
function pendingCount(v){return Math.max(0,(v.picks||0)-((v.success||0)+(v.rate||0)+(v.retry||0)+(v.transient||0)+(v.auth||0)));}
|
| 69 |
function modelChips(row){const entries=[...row.models.entries()];const un=row.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};const unPending=pendingCount(un);if(!entries.length){const total=row.picks+row.success+row.rate+row.retry+row.transient+row.auth;return total?`<span class="model-chip">unscoped · p:${row.picks} ok:${row.success} pending:${pendingCount(row)} rl:${row.rate} retry:${row.retry+row.transient}</span>`:'<span class="model-chip">no model events yet</span>';}const chips=entries.map(([m,v])=>{const onlyObserved=v.observed&&!v.picks&&!v.success&&!v.rate&&!v.retry&&!v.transient&&!v.auth;return `<span class="model-chip">${esc(m)} · ${onlyObserved?'observed':`p:${v.picks} ok:${v.success} pending:${pendingCount(v)} rl:${v.rate} retry:${v.retry+v.transient}`}</span>`;});if(un.picks||un.success||un.rate||un.retry||un.transient||un.auth)chips.push(`<span class="model-chip">unscoped totals · p:${un.picks} ok:${un.success} pending:${unPending} rl:${un.rate} retry:${un.retry+un.transient}</span>`);return chips.join('');}
|
|
|
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
<title>HuggingClaw · API Key Rotator</title>
|
| 7 |
<style>
|
| 8 |
+
:root{color-scheme:dark;--bg:#070711;--panel:#111120;--panel2:#17172a;--line:#292945;--text:#f8f7ff;--muted:#9892b8;--soft:#c7c2e6;--good:#22c55e;--warn:#f5c542;--bad:#fb7185;--blue:#60a5fa;--violet:#a78bfa}*{box-sizing:border-box}body{margin:0;min-height:100vh;background:radial-gradient(circle at top left,#25145a 0,#070711 34%,#070711 100%);font-family:Inter,ui-sans-serif,system-ui,-apple-system,sans-serif;color:var(--text);font-size:13px}main{width:min(1280px,calc(100% - 28px));margin:0 auto;padding:28px 0 44px}.top{display:flex;align-items:flex-start;justify-content:space-between;gap:18px;margin-bottom:18px}.eyebrow{font-size:.7rem;letter-spacing:.18em;text-transform:uppercase;color:var(--muted);font-weight:900}h1{margin:6px 0 8px;font-size:clamp(1.6rem,4vw,2.7rem);line-height:1}.sub{color:var(--soft);max-width:860px;line-height:1.55}.actions{display:flex;gap:10px;flex-wrap:wrap}.btn{border:1px solid var(--line);background:rgba(255,255,255,.06);color:var(--text);border-radius:11px;padding:10px 14px;text-decoration:none;font-weight:850;cursor:pointer}.btn.primary{background:#fff;color:#050510}.btn:hover{filter:brightness(1.08)}.grid{display:grid;grid-template-columns:repeat(5,minmax(0,1fr));gap:12px;margin:18px 0}.card{background:linear-gradient(180deg,rgba(255,255,255,.055),rgba(255,255,255,.025));border:1px solid var(--line);border-radius:18px;padding:16px;box-shadow:0 18px 45px rgba(0,0,0,.22)}.metric-title{color:var(--muted);text-transform:uppercase;letter-spacing:.16em;font-size:.66rem;font-weight:900}.metric-value{font-size:1.65rem;font-weight:950;margin-top:8px}.metric-detail{color:var(--muted);margin-top:6px;line-height:1.45}.ok{color:var(--good)}.warn{color:var(--warn)}.bad{color:var(--bad)}.blue{color:var(--blue)}.layout{display:grid;grid-template-columns:340px minmax(0,1fr);gap:14px}.panel-title{display:flex;justify-content:space-between;align-items:center;gap:10px;margin-bottom:12px}.panel-title h2{font-size:1rem;margin:0}.pill{display:inline-flex;align-items:center;gap:6px;border:1px solid var(--line);border-radius:999px;padding:5px 9px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem;font-weight:850}.dot{width:7px;height:7px;border-radius:50%;background:var(--muted)}.dot.live{background:var(--good);box-shadow:0 0 15px var(--good)}.providers{display:flex;flex-direction:column;gap:9px}.provider{border:1px solid var(--line);border-radius:14px;background:rgba(0,0,0,.12);padding:12px;cursor:pointer}.provider.active{border-color:var(--blue);box-shadow:0 0 0 1px rgba(96,165,250,.25)}.provider-top{display:flex;justify-content:space-between;align-items:center;gap:12px}.provider-name{font-weight:950}.provider-meta{color:var(--muted);font-size:.78rem;margin-top:5px}.bar{height:8px;background:#22223a;border-radius:999px;overflow:hidden;margin-top:10px}.bar>span{display:block;height:100%;background:linear-gradient(90deg,var(--violet),var(--blue));border-radius:999px}.toolbar{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px}.input{flex:1;min-width:180px;border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px;outline:none}.select{border:1px solid var(--line);background:#0c0c18;color:var(--text);border-radius:11px;padding:10px 12px}.toggle{display:flex;align-items:center;gap:7px;color:var(--soft);font-weight:800}.events{display:flex;flex-direction:column;gap:9px;max-height:620px;overflow:auto;padding-right:4px}.event{border:1px solid var(--line);border-left-width:4px;border-radius:14px;background:rgba(0,0,0,.16);padding:12px;display:grid;grid-template-columns:155px minmax(0,1fr) auto;gap:12px}.event.pick,.event.sticky_pick,.event.pick_retry_fresh,.event.model_detected{border-left-color:var(--blue)}.event.success{border-left-color:var(--good)}.event.rate_limited,.event.auth_failed,.event.all_suspended_pick,.event.all_suspended_withheld{border-left-color:var(--warn)}.event.network_retryable,.event.transient_status,.event.saturated_reuse,.event.sticky_saturated_reuse,.event.sticky_saturated_rotate,.event.inflight_timeout{border-left-color:var(--bad)}.time{color:var(--muted);font-variant-numeric:tabular-nums}.etype{font-weight:950}.msg{color:var(--soft);line-height:1.45;word-break:break-word}.key,.mono{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;color:#e9d5ff}.empty{padding:34px;text-align:center;color:var(--muted);border:1px dashed var(--line);border-radius:16px}.foot{color:var(--muted);margin-top:16px;line-height:1.55}.kbd{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;background:#23233a;border:1px solid #363653;border-radius:7px;padding:2px 6px;color:var(--text)}.detail-grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:10px;margin-bottom:12px}.mini{border:1px solid var(--line);border-radius:13px;padding:11px;background:rgba(0,0,0,.12)}.mini b{display:block;font-size:1.15rem;margin-top:4px}.key-table-wrap{overflow:auto}.key-table{width:100%;border-collapse:separate;border-spacing:0 8px;min-width:780px}.key-table th{color:var(--muted);font-size:.68rem;text-align:left;text-transform:uppercase;letter-spacing:.12em}.key-table td{background:rgba(0,0,0,.15);border-top:1px solid var(--line);border-bottom:1px solid var(--line);padding:10px;vertical-align:top}.key-table td:first-child{border-left:1px solid var(--line);border-radius:12px 0 0 12px}.key-table td:last-child{border-right:1px solid var(--line);border-radius:0 12px 12px 0}.status{font-weight:900}.status.used{color:var(--good)}.status.unused{color:var(--muted)}.model-chip{display:inline-flex;margin:2px 4px 2px 0;padding:4px 7px;border:1px solid var(--line);border-radius:999px;color:var(--soft);background:rgba(255,255,255,.035);font-size:.72rem}.section{margin-top:14px}.session-note{border:1px solid rgba(96,165,250,.28);background:rgba(96,165,250,.08);border-radius:14px;padding:12px;color:var(--soft);margin-bottom:12px}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}@media(max-width:1050px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.detail-grid{grid-template-columns:repeat(2,minmax(0,1fr))}.layout{grid-template-columns:1fr}.top{flex-direction:column}.event{grid-template-columns:1fr}.events{max-height:none}}@media(max-width:620px){.grid,.detail-grid{grid-template-columns:1fr}}
|
| 9 |
</style>
|
| 10 |
</head>
|
| 11 |
<body>
|
|
|
|
| 56 |
let state={events:[],providers:[],runtime:{routes:[]},paused:false,selected:localStorage.getItem('hc.keyRotator.provider')||''};
|
| 57 |
const $=id=>document.getElementById(id), HTML_ESCAPE={'&':'&','<':'<','>':'>','"':'"',"'":'''};
|
| 58 |
const PICK_TYPES=['pick','sticky_pick','pick_retry_fresh','all_suspended_pick','saturated_reuse','sticky_saturated_reuse'];
|
| 59 |
+
const STALE_PENDING_MS=35000;
|
| 60 |
function esc(v){return String(v??'').replace(/[&<>"']/g,ch=>HTML_ESCAPE[ch]);}
|
| 61 |
function eventClass(v){return String(v||'event').replace(/[^a-z0-9_-]/gi,'_');}
|
| 62 |
function fmtTime(ts){const d=new Date(ts);return isNaN(d)?'—':d.toLocaleString();}
|
| 63 |
function isKeyEvent(e){return e.provider&&e.slot&&(e.key||e.type==='all_suspended_withheld');}
|
| 64 |
function keyId(provider,slot,key){return `${provider}::${slot||''}::${key||''}`;}
|
| 65 |
function eventText(e){const bits=[];if(e.provider)bits.push(e.provider);if(e.slot)bits.push(`#${e.slot}/${e.total}`);if(e.key)bits.push(e.key);if(e.model)bits.push(`model=${e.model}`);if(e.status)bits.push(`status=${e.status}`);if(e.errorStatus)bits.push(`error=${e.errorStatus}`);if(e.errorReason)bits.push(`reason=${e.errorReason}`);if(e.errorType)bits.push(`type=${e.errorType}`);if(e.waitMs)bits.push(`wait=${Math.round(e.waitMs/1000)}s`);if(e.code)bits.push(`code=${e.code}`);if(e.errorCode)bits.push(`errorCode=${e.errorCode}`);if(e.inflight)bits.push(`inflight=${e.inflight}/${e.maxInflight||'?'}`);return bits.join(' · ');}
|
| 66 |
+
function emptyStat(provider,slot,total,key){return{provider,slot,total,key,picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:'',models:new Map()};}
|
| 67 |
+
function buildStats(){const stats=new Map();for(const p of state.providers){for(const k of (p.keys||[]))stats.set(keyId(p.name,k.slot,k.key),emptyStat(p.name,k.slot,k.total,k.key));}for(const e of state.events){if(!isKeyEvent(e))continue;const id=keyId(e.provider,e.slot,e.key);if(!stats.has(id))stats.set(id,emptyStat(e.provider,e.slot,e.total,e.key||'***'));const s=stats.get(id);const isPick=PICK_TYPES.includes(e.type);const isOutcome=['success','rate_limited','network_retryable','auth_failed','transient_status','inflight_timeout'].includes(e.type);if(isPick){s.picks++;s.lastPick=e.ts||s.lastPick;}if(e.type==='success')s.success++;if(e.type==='rate_limited')s.rate++;if(e.type==='network_retryable')s.retry++;if(e.type==='auth_failed')s.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')s.transient++;s.last=e.ts||s.last;if(e.model){let m=s.models.get(e.model);if(!m)m={picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:'',observed:false};if(e.type==='model_detected')m.observed=true;if(isPick){m.picks++;m.lastPick=e.ts||m.lastPick;}if(e.type==='success')m.success++;if(e.type==='rate_limited')m.rate++;if(e.type==='network_retryable')m.retry++;if(e.type==='auth_failed')m.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')m.transient++;m.last=e.ts||m.last;s.models.set(e.model,m);}else if(isPick||isOutcome){s.unscoped=s.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0,last:'',lastPick:''};if(isPick){s.unscoped.picks++;s.unscoped.lastPick=e.ts||s.unscoped.lastPick;}if(e.type==='success')s.unscoped.success++;if(e.type==='rate_limited')s.unscoped.rate++;if(e.type==='network_retryable')s.unscoped.retry++;if(e.type==='auth_failed')s.unscoped.auth++;if(e.type==='transient_status'||e.type==='inflight_timeout')s.unscoped.transient++;s.unscoped.last=e.ts||s.unscoped.last;}}const now=Date.now();for(const s of stats.values()){ageStalePending(s,now);for(const m of s.models.values())ageStalePending(m,now);if(s.unscoped)ageStalePending(s.unscoped,now);}return stats;}
|
| 68 |
+
function ageStalePending(v,now){const pending=pendingCount(v);if(!pending)return;const ts=Date.parse(v.lastPick||v.last||'');if(Number.isFinite(ts)&&now-ts>STALE_PENDING_MS){v.transient=(v.transient||0)+pending;v.stalePending=(v.stalePending||0)+pending;}}
|
| 69 |
function providerRows(provider,stats){return (provider?.keys||[]).map(k=>stats.get(keyId(provider.name,k.slot,k.key))||emptyStat(provider.name,k.slot,k.total,k.key));}
|
| 70 |
function pendingCount(v){return Math.max(0,(v.picks||0)-((v.success||0)+(v.rate||0)+(v.retry||0)+(v.transient||0)+(v.auth||0)));}
|
| 71 |
function modelChips(row){const entries=[...row.models.entries()];const un=row.unscoped||{picks:0,success:0,rate:0,retry:0,auth:0,transient:0};const unPending=pendingCount(un);if(!entries.length){const total=row.picks+row.success+row.rate+row.retry+row.transient+row.auth;return total?`<span class="model-chip">unscoped · p:${row.picks} ok:${row.success} pending:${pendingCount(row)} rl:${row.rate} retry:${row.retry+row.transient}</span>`:'<span class="model-chip">no model events yet</span>';}const chips=entries.map(([m,v])=>{const onlyObserved=v.observed&&!v.picks&&!v.success&&!v.rate&&!v.retry&&!v.transient&&!v.auth;return `<span class="model-chip">${esc(m)} · ${onlyObserved?'observed':`p:${v.picks} ok:${v.success} pending:${pendingCount(v)} rl:${v.rate} retry:${v.retry+v.transient}`}</span>`;});if(un.picks||un.success||un.rate||un.retry||un.transient||un.auth)chips.push(`<span class="model-chip">unscoped totals · p:${un.picks} ok:${un.success} pending:${unPending} rl:${un.rate} retry:${un.retry+un.transient}</span>`);return chips.join('');}
|
multi-provider-key-rotator.cjs
CHANGED
|
@@ -112,7 +112,7 @@ const EVENT_LOG_FILE = process.env.KEY_ROTATOR_EVENT_LOG_FILE || '/tmp/huggingcl
|
|
| 112 |
const EVENT_LOG_MAX_BYTES = Math.max(64 * 1024, parseInt(process.env.KEY_ROTATOR_EVENT_LOG_MAX_BYTES || '', 10) || 1024 * 1024);
|
| 113 |
const INFLIGHT_TTL_MS = Math.max(
|
| 114 |
30_000,
|
| 115 |
-
Math.min(30 * 60_000, parseInt(process.env.KEY_INFLIGHT_TTL_MS || '', 10) ||
|
| 116 |
);
|
| 117 |
const REQUEST_MODEL_SNIFF_MAX_BYTES = Math.max(
|
| 118 |
16 * 1024,
|
|
@@ -141,6 +141,7 @@ const STICKY_PROVIDER_SET = new Set(
|
|
| 141 |
.map(s => s.trim().toLowerCase())
|
| 142 |
.filter(Boolean),
|
| 143 |
);
|
|
|
|
| 144 |
|
| 145 |
// Maximum ms to respect from a Retry-After header.
|
| 146 |
// Old cap was 10s — too low for Gemini/Google which often returns 60s+.
|
|
@@ -440,17 +441,27 @@ function isGeminiOpenAICompatPath(pathOrUrl) {
|
|
| 440 |
*/
|
| 441 |
function getKeyExpiry(p, key, model) {
|
| 442 |
let expiry = p.keyState.get(key)?.blacklistedUntil ?? 0;
|
| 443 |
-
if (p.modelKeyState
|
| 444 |
-
const mks = p.modelKeyState.get(`${key}:${model}`);
|
| 445 |
if (mks && mks.blacklistedUntil > expiry) expiry = mks.blacklistedUntil;
|
| 446 |
}
|
| 447 |
return expiry;
|
| 448 |
}
|
| 449 |
|
| 450 |
function stickyBucketForProvider(p, model) {
|
| 451 |
-
const rawScope = String(process.env.KEY_STICKY_SCOPE || '
|
| 452 |
-
const scope = rawScope === '
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
}
|
| 455 |
|
| 456 |
function isStickyProvider(p) {
|
|
@@ -464,14 +475,17 @@ function rememberStickyKey(p, model, key) {
|
|
| 464 |
|
| 465 |
function clearStickyKey(p, key, model) {
|
| 466 |
if (!p?.stickyKeys || !key) return;
|
| 467 |
-
|
|
|
|
| 468 |
const bucket = stickyBucketForProvider(p, model);
|
| 469 |
if (p.stickyKeys.get(bucket) === key) p.stickyKeys.delete(bucket);
|
| 470 |
// Also clear the ambiguous fallback bucket if this key was selected before
|
| 471 |
// a Gemini OpenAI-compatible request body revealed its model. Do not clear
|
| 472 |
// other model buckets: Gemini quota failures are model-scoped.
|
| 473 |
-
|
| 474 |
-
|
|
|
|
|
|
|
| 475 |
return;
|
| 476 |
}
|
| 477 |
for (const [bucket, stickyKey] of p.stickyKeys) {
|
|
@@ -632,14 +646,15 @@ function isActive(p, key, model) {
|
|
| 632 |
}
|
| 633 |
|
| 634 |
// ── Per-model check (gemini etc.) ──────────────────────────────────────────
|
| 635 |
-
if (p.modelKeyState
|
| 636 |
-
const
|
|
|
|
| 637 |
const mks = p.modelKeyState.get(mKey);
|
| 638 |
if (mks && mks.blacklistedUntil !== 0) {
|
| 639 |
-
if (Date.now() < mks.blacklistedUntil) return false; // blocked for this model
|
| 640 |
mks.blacklistedUntil = 0;
|
| 641 |
if (mks.strikes > 0) mks.strikes -= 1;
|
| 642 |
-
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} back in pool for model=${model} (strikes now ${mks.strikes})`);
|
| 643 |
}
|
| 644 |
}
|
| 645 |
|
|
@@ -668,8 +683,9 @@ function recordFailure(p, key, model, retryAfterMs) {
|
|
| 668 |
// our exponential cooldown. This prevents hammering a key before its quota resets.
|
| 669 |
const serverHintMs = (typeof retryAfterMs === 'number' && retryAfterMs > 0) ? retryAfterMs : 0;
|
| 670 |
|
| 671 |
-
if (p.modelKeyState
|
| 672 |
-
const
|
|
|
|
| 673 |
let mks = p.modelKeyState.get(mKey);
|
| 674 |
if (!mks) { mks = makeKeyState(); p.modelKeyState.set(mKey, mks); }
|
| 675 |
|
|
@@ -689,9 +705,9 @@ function recordFailure(p, key, model, retryAfterMs) {
|
|
| 689 |
// ★ Set blacklistedUntil FIRST so it is always written even if the log below throws.
|
| 690 |
mks.blacklistedUntil = Math.max(mks.blacklistedUntil || 0, Date.now() + cooldown);
|
| 691 |
if (isPerm)
|
| 692 |
-
warn(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} hit ${MAX_STRIKES} strikes — suspended for ${formatHours(PERM_SUSPEND_MS)}h (quota likely exhausted for this model)`);
|
| 693 |
else
|
| 694 |
-
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} strike ${mks.strikes}/${MAX_STRIKES} — backoff ${Math.round(cooldown / 1000)}s${serverHintMs > 0 ? ` (server-hint ${Math.round(serverHintMs/1000)}s)` : ''}`);
|
| 695 |
return;
|
| 696 |
}
|
| 697 |
|
|
@@ -725,15 +741,18 @@ function recordFailure(p, key, model, retryAfterMs) {
|
|
| 725 |
* Called on transient retryable failures (non-quota/rate):
|
| 726 |
* applies short cooldown without incrementing strikes.
|
| 727 |
*/
|
| 728 |
-
function recordTransientFailure(p, key) {
|
| 729 |
-
|
| 730 |
-
|
|
|
|
|
|
|
|
|
|
| 731 |
ks.lastFailureAt = Date.now();
|
| 732 |
const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
|
| 733 |
const cooldown = Math.max(1000, Math.round(BASE_COOLDOWN_MS * jitter));
|
| 734 |
ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
|
| 735 |
const secs = Math.round(cooldown / 1000);
|
| 736 |
-
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} transient backoff ${secs}s (strikes unchanged)`);
|
| 737 |
}
|
| 738 |
|
| 739 |
/**
|
|
@@ -754,15 +773,17 @@ function recordSuccess(p, key, model) {
|
|
| 754 |
}
|
| 755 |
}
|
| 756 |
|
| 757 |
-
// Also clear model-specific state on success
|
| 758 |
-
|
| 759 |
-
|
|
|
|
|
|
|
| 760 |
const mks = p.modelKeyState.get(mKey);
|
| 761 |
if (mks && (mks.strikes > 0 || mks.blacklistedUntil > 0)) {
|
| 762 |
mks.strikes = 0;
|
| 763 |
mks.lastFailureAt = 0;
|
| 764 |
mks.blacklistedUntil = 0;
|
| 765 |
-
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model} recovered — strikes reset`);
|
| 766 |
}
|
| 767 |
}
|
| 768 |
}
|
|
@@ -802,9 +823,9 @@ function removeInFlightToken(p, key, token) {
|
|
| 802 |
else p.inFlightTimers.delete(key);
|
| 803 |
}
|
| 804 |
|
| 805 |
-
function beginInFlight(p, key) {
|
| 806 |
if (!p || !key) return null;
|
| 807 |
-
const token = { done: false, timer: null };
|
| 808 |
p.inFlight.set(key, (p.inFlight.get(key) || 0) + 1);
|
| 809 |
token.timer = setTimeout(() => {
|
| 810 |
if (token.done) return;
|
|
@@ -813,7 +834,14 @@ function beginInFlight(p, key) {
|
|
| 813 |
const before = p.inFlight.get(key) || 0;
|
| 814 |
if (before > 0) {
|
| 815 |
const next = decrementInFlight(p, key);
|
| 816 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
}
|
| 818 |
}, INFLIGHT_TTL_MS);
|
| 819 |
token.timer.unref?.();
|
|
@@ -874,16 +902,25 @@ function nextKey(p, model) {
|
|
| 874 |
const stickyKey = p.stickyKeys.get(stickyBucketForProvider(p, model));
|
| 875 |
if (stickyKey && p.keys.includes(stickyKey) && isActive(p, stickyKey, model)) {
|
| 876 |
const inflight = p.inFlight.get(stickyKey) || 0;
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
warn(`[key-rotator] ${p.name}: sticky key saturated, still reusing ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY} until it fails/exhausts`);
|
| 880 |
-
emitEvent('sticky_saturated_reuse', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
|
| 881 |
-
} else {
|
| 882 |
emitEvent('sticky_pick', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
|
|
|
|
| 883 |
}
|
| 884 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
}
|
| 886 |
-
if (stickyKey) clearStickyKey(p, stickyKey, model);
|
| 887 |
}
|
| 888 |
|
| 889 |
let bestPick = null;
|
|
@@ -907,7 +944,7 @@ function nextKey(p, model) {
|
|
| 907 |
// Score: prefer keys with fewer recent failures and lower in-flight count.
|
| 908 |
// For perModelLimits, also factor in model-specific strike count.
|
| 909 |
const ks = p.keyState.get(key) || makeKeyState();
|
| 910 |
-
const mks =
|
| 911 |
const recentFailPenalty =
|
| 912 |
(ks.lastFailureAt > 0 && (Date.now() - ks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0) +
|
| 913 |
(mks.lastFailureAt > 0 && (Date.now() - mks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0);
|
|
@@ -1047,9 +1084,11 @@ function handleStatus(p, key, status, model, retryAfterMs, errorInfo) {
|
|
| 1047 |
}
|
| 1048 |
|
| 1049 |
if (failureKind === 'transient') {
|
| 1050 |
-
//
|
| 1051 |
-
|
| 1052 |
-
|
|
|
|
|
|
|
| 1053 |
warn(`[key-rotator] ${p.name}: transient status=${status} on ${keySlot(p, key)}${keyMask(key)}`);
|
| 1054 |
emitEvent('transient_status', p, key, { status, model, ...errorFields });
|
| 1055 |
return;
|
|
@@ -1061,7 +1100,7 @@ function handleStatus(p, key, status, model, retryAfterMs, errorInfo) {
|
|
| 1061 |
}
|
| 1062 |
}
|
| 1063 |
|
| 1064 |
-
function handleTransportError(p, key, err) {
|
| 1065 |
if (!p || !key) return;
|
| 1066 |
// Node.js 18+ undici fetch throws TypeError: "fetch failed" where the actual
|
| 1067 |
// network error code lives in err.cause.code (e.g. ECONNRESET, ETIMEDOUT,
|
|
@@ -1071,12 +1110,22 @@ function handleTransportError(p, key, err) {
|
|
| 1071 |
? String(err.code || err.cause?.code).toUpperCase()
|
| 1072 |
: '';
|
| 1073 |
const name = String(err?.name || '');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
const retryable = classifyRetryableFailure(undefined, code) || name === 'AbortError';
|
| 1075 |
if (retryable) {
|
| 1076 |
-
recordTransientFailure(p, key);
|
| 1077 |
-
clearStickyKey(p, key);
|
| 1078 |
-
warn(`[key-rotator] ${p.name}: retryable network ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}`);
|
| 1079 |
-
emitEvent('network_retryable', p, key, { name: name || 'Error', code });
|
| 1080 |
}
|
| 1081 |
}
|
| 1082 |
|
|
@@ -1614,7 +1663,7 @@ function wrapUndiciHandler(handler, provider, key, inFlightToken, getModel) {
|
|
| 1614 |
} finally {
|
| 1615 |
// User handlers may throw/rethrow; the rotator still owns the
|
| 1616 |
// in-flight token and transport error classification for this key.
|
| 1617 |
-
settle(() => { if (!statusHandled) { try { handleTransportError(provider, key, err); } catch (_) {} } });
|
| 1618 |
}
|
| 1619 |
};
|
| 1620 |
}
|
|
@@ -1680,7 +1729,7 @@ function patchUndiciDispatch(proto, tag) {
|
|
| 1680 |
|
| 1681 |
if (key) {
|
| 1682 |
usedKey = key; usedProvider = provider; usedModel = model;
|
| 1683 |
-
usedInFlight = beginInFlight(usedProvider, usedKey);
|
| 1684 |
|
| 1685 |
const newOptions = { ...options };
|
| 1686 |
|
|
@@ -1709,7 +1758,7 @@ function patchUndiciDispatch(proto, tag) {
|
|
| 1709 |
usedProvider,
|
| 1710 |
usedKey,
|
| 1711 |
() => usedModel,
|
| 1712 |
-
(model) => { usedModel = model; },
|
| 1713 |
);
|
| 1714 |
const wrappedHandler = wrapUndiciHandler(handler, usedProvider, usedKey, usedInFlight, () => usedModel);
|
| 1715 |
return runInRotatorRequest(() => origDispatch.call(this, newOptions, wrappedHandler));
|
|
@@ -1892,7 +1941,7 @@ function patchFetch() {
|
|
| 1892 |
if (key) {
|
| 1893 |
triedKeys.add(key);
|
| 1894 |
usedKey = key;
|
| 1895 |
-
usedInFlight = beginInFlight(provider, key);
|
| 1896 |
}
|
| 1897 |
|
| 1898 |
const attemptArgs = buildAttemptFetchArgs(input, init, provider, usedKey);
|
|
@@ -1923,7 +1972,7 @@ function patchFetch() {
|
|
| 1923 |
} catch (err) {
|
| 1924 |
lastErr = err;
|
| 1925 |
try { endInFlight(provider, usedKey, usedInFlight); } catch (_) {}
|
| 1926 |
-
try { handleTransportError(provider, usedKey, err); } catch (_) {}
|
| 1927 |
// Node.js 18+ undici fetch: network errors are TypeError("fetch failed")
|
| 1928 |
// where the real code (ECONNRESET, ETIMEDOUT, ENOTFOUND …) is in
|
| 1929 |
// err.cause.code. Check that first before falling back to err.code.
|
|
@@ -1989,7 +2038,7 @@ function patchHttpModule(mod) {
|
|
| 1989 |
|
| 1990 |
if (key) {
|
| 1991 |
usedKey = key; usedProvider = provider; usedModel = model;
|
| 1992 |
-
usedInFlight = beginInFlight(usedProvider, usedKey);
|
| 1993 |
if (provider.queryParam) {
|
| 1994 |
const hasOptionsArg = args[1] && typeof args[1] === 'object' && typeof args[1].on !== 'function';
|
| 1995 |
const u = new URL(String(
|
|
@@ -2081,6 +2130,7 @@ function patchHttpModule(mod) {
|
|
| 2081 |
const bodyModel = extractModelFromBody(fullBody);
|
| 2082 |
if (bodyModel) {
|
| 2083 |
usedModel = bodyModel;
|
|
|
|
| 2084 |
promoteStickyKeyModel(usedProvider, usedKey, null, usedModel);
|
| 2085 |
emitEvent('model_detected', usedProvider, usedKey, { model: usedModel, source: 'http_request_body' });
|
| 2086 |
debug(`[key-rotator] ${usedProvider.name}: (http) model extracted from request body: ${usedModel}`);
|
|
@@ -2149,7 +2199,7 @@ function patchHttpModule(mod) {
|
|
| 2149 |
req.on('error', (err) => {
|
| 2150 |
try { endInFlight(usedProvider, usedKey, usedInFlight); } catch (_) {}
|
| 2151 |
if (!statusHandled) {
|
| 2152 |
-
try { handleTransportError(usedProvider, usedKey, err); } catch (_) {}
|
| 2153 |
}
|
| 2154 |
});
|
| 2155 |
}
|
|
@@ -2181,7 +2231,7 @@ if (hasProviderKeys) {
|
|
| 2181 |
patchUndici(); // covers OpenClaw gateway's bundled undici AI calls
|
| 2182 |
startDiagnostics();
|
| 2183 |
|
| 2184 |
-
debug(`[key-rotator] loaded — cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} max-retry-after:${MAX_RETRY_AFTER_MS/1000}s max-key-wait:${MAX_KEY_WAIT_MS/1000}s diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'} log-level:${LOG_LEVEL} verbose-picks:${VERBOSE_PICKS ? 'on' : 'off'} suspended-last-resort:${USE_SUSPENDED_KEY_AS_LAST_RESORT ? 'on' : 'off'} per-model-providers:${providerState.filter(p => p.perModelLimits).map(p => p.name).join(',') || 'none'} model-from-body:on model-sniff-max:${REQUEST_MODEL_SNIFF_MAX_BYTES} error-sniff-max:${ERROR_BODY_SNIFF_MAX_BYTES} inflight-ttl:${INFLIGHT_TTL_MS}ms sticky-until-failure:${STICKY_UNTIL_FAILURE ? 'on' : 'off'} sticky-scope:${String(process.env.KEY_STICKY_SCOPE || '
|
| 2185 |
emitEvent('rotator_loaded', null, null, {
|
| 2186 |
providers: providerState.filter(p => p.keys.length).map(p => ({ name: p.name, total: p.keys.length })),
|
| 2187 |
logLevel: LOG_LEVEL,
|
|
@@ -2190,7 +2240,7 @@ if (hasProviderKeys) {
|
|
| 2190 |
modelSniffMaxBytes: REQUEST_MODEL_SNIFF_MAX_BYTES,
|
| 2191 |
errorBodySniffMaxBytes: ERROR_BODY_SNIFF_MAX_BYTES,
|
| 2192 |
stickyUntilFailure: STICKY_UNTIL_FAILURE,
|
| 2193 |
-
stickyScope: String(process.env.KEY_STICKY_SCOPE || '
|
| 2194 |
stickyProviders: [...STICKY_PROVIDER_SET],
|
| 2195 |
llmFallbackProviders: LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET] : ['*'],
|
| 2196 |
});
|
|
|
|
| 112 |
const EVENT_LOG_MAX_BYTES = Math.max(64 * 1024, parseInt(process.env.KEY_ROTATOR_EVENT_LOG_MAX_BYTES || '', 10) || 1024 * 1024);
|
| 113 |
const INFLIGHT_TTL_MS = Math.max(
|
| 114 |
30_000,
|
| 115 |
+
Math.min(30 * 60_000, parseInt(process.env.KEY_INFLIGHT_TTL_MS || '', 10) || 30_000),
|
| 116 |
);
|
| 117 |
const REQUEST_MODEL_SNIFF_MAX_BYTES = Math.max(
|
| 118 |
16 * 1024,
|
|
|
|
| 141 |
.map(s => s.trim().toLowerCase())
|
| 142 |
.filter(Boolean),
|
| 143 |
);
|
| 144 |
+
const UNKNOWN_MODEL_SCOPE = '__unknown_model__';
|
| 145 |
|
| 146 |
// Maximum ms to respect from a Retry-After header.
|
| 147 |
// Old cap was 10s — too low for Gemini/Google which often returns 60s+.
|
|
|
|
| 441 |
*/
|
| 442 |
function getKeyExpiry(p, key, model) {
|
| 443 |
let expiry = p.keyState.get(key)?.blacklistedUntil ?? 0;
|
| 444 |
+
if (p.modelKeyState) {
|
| 445 |
+
const mks = p.modelKeyState.get(`${key}:${scopedModelKey(model)}`);
|
| 446 |
if (mks && mks.blacklistedUntil > expiry) expiry = mks.blacklistedUntil;
|
| 447 |
}
|
| 448 |
return expiry;
|
| 449 |
}
|
| 450 |
|
| 451 |
function stickyBucketForProvider(p, model) {
|
| 452 |
+
const rawScope = String(process.env.KEY_STICKY_SCOPE || '').trim().toLowerCase();
|
| 453 |
+
const scope = rawScope === 'provider'
|
| 454 |
+
? 'provider'
|
| 455 |
+
: rawScope === 'model' || rawScope === 'per-model'
|
| 456 |
+
? 'model'
|
| 457 |
+
: p?.perModelLimits
|
| 458 |
+
? 'model'
|
| 459 |
+
: 'provider';
|
| 460 |
+
return scope === 'provider' ? '__provider__' : (model || UNKNOWN_MODEL_SCOPE);
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
function scopedModelKey(model) {
|
| 464 |
+
return model || UNKNOWN_MODEL_SCOPE;
|
| 465 |
}
|
| 466 |
|
| 467 |
function isStickyProvider(p) {
|
|
|
|
| 475 |
|
| 476 |
function clearStickyKey(p, key, model) {
|
| 477 |
if (!p?.stickyKeys || !key) return;
|
| 478 |
+
const hasScopedModelArg = arguments.length >= 3;
|
| 479 |
+
if (hasScopedModelArg) {
|
| 480 |
const bucket = stickyBucketForProvider(p, model);
|
| 481 |
if (p.stickyKeys.get(bucket) === key) p.stickyKeys.delete(bucket);
|
| 482 |
// Also clear the ambiguous fallback bucket if this key was selected before
|
| 483 |
// a Gemini OpenAI-compatible request body revealed its model. Do not clear
|
| 484 |
// other model buckets: Gemini quota failures are model-scoped.
|
| 485 |
+
if (model) {
|
| 486 |
+
const fallbackBucket = stickyBucketForProvider(p, null);
|
| 487 |
+
if (p.stickyKeys.get(fallbackBucket) === key) p.stickyKeys.delete(fallbackBucket);
|
| 488 |
+
}
|
| 489 |
return;
|
| 490 |
}
|
| 491 |
for (const [bucket, stickyKey] of p.stickyKeys) {
|
|
|
|
| 646 |
}
|
| 647 |
|
| 648 |
// ── Per-model check (gemini etc.) ──────────────────────────────────────────
|
| 649 |
+
if (p.modelKeyState) {
|
| 650 |
+
const scopedModel = scopedModelKey(model);
|
| 651 |
+
const mKey = `${key}:${scopedModel}`;
|
| 652 |
const mks = p.modelKeyState.get(mKey);
|
| 653 |
if (mks && mks.blacklistedUntil !== 0) {
|
| 654 |
+
if (Date.now() < mks.blacklistedUntil) return false; // blocked for this model/unknown-model scope
|
| 655 |
mks.blacklistedUntil = 0;
|
| 656 |
if (mks.strikes > 0) mks.strikes -= 1;
|
| 657 |
+
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} back in pool for model=${model || 'unknown'} (strikes now ${mks.strikes})`);
|
| 658 |
}
|
| 659 |
}
|
| 660 |
|
|
|
|
| 683 |
// our exponential cooldown. This prevents hammering a key before its quota resets.
|
| 684 |
const serverHintMs = (typeof retryAfterMs === 'number' && retryAfterMs > 0) ? retryAfterMs : 0;
|
| 685 |
|
| 686 |
+
if (p.modelKeyState) {
|
| 687 |
+
const scopedModel = scopedModelKey(model);
|
| 688 |
+
const mKey = `${key}:${scopedModel}`;
|
| 689 |
let mks = p.modelKeyState.get(mKey);
|
| 690 |
if (!mks) { mks = makeKeyState(); p.modelKeyState.set(mKey, mks); }
|
| 691 |
|
|
|
|
| 705 |
// ★ Set blacklistedUntil FIRST so it is always written even if the log below throws.
|
| 706 |
mks.blacklistedUntil = Math.max(mks.blacklistedUntil || 0, Date.now() + cooldown);
|
| 707 |
if (isPerm)
|
| 708 |
+
warn(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} hit ${MAX_STRIKES} strikes — suspended for ${formatHours(PERM_SUSPEND_MS)}h (quota likely exhausted for this model)`);
|
| 709 |
else
|
| 710 |
+
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} strike ${mks.strikes}/${MAX_STRIKES} — backoff ${Math.round(cooldown / 1000)}s${serverHintMs > 0 ? ` (server-hint ${Math.round(serverHintMs/1000)}s)` : ''}`);
|
| 711 |
return;
|
| 712 |
}
|
| 713 |
|
|
|
|
| 741 |
* Called on transient retryable failures (non-quota/rate):
|
| 742 |
* applies short cooldown without incrementing strikes.
|
| 743 |
*/
|
| 744 |
+
function recordTransientFailure(p, key, model = null) {
|
| 745 |
+
if (!p || !key) return;
|
| 746 |
+
const stateMap = p.modelKeyState ? p.modelKeyState : p.keyState;
|
| 747 |
+
const stateKey = p.modelKeyState ? `${key}:${scopedModelKey(model)}` : key;
|
| 748 |
+
let ks = stateMap.get(stateKey);
|
| 749 |
+
if (!ks) { ks = makeKeyState(); stateMap.set(stateKey, ks); }
|
| 750 |
ks.lastFailureAt = Date.now();
|
| 751 |
const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
|
| 752 |
const cooldown = Math.max(1000, Math.round(BASE_COOLDOWN_MS * jitter));
|
| 753 |
ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
|
| 754 |
const secs = Math.round(cooldown / 1000);
|
| 755 |
+
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} transient backoff ${secs}s${p.modelKeyState ? ` model=${model || 'unknown'}` : ''} (strikes unchanged)`);
|
| 756 |
}
|
| 757 |
|
| 758 |
/**
|
|
|
|
| 773 |
}
|
| 774 |
}
|
| 775 |
|
| 776 |
+
// Also clear model-specific state on success. If model is still unknown,
|
| 777 |
+
// clear only the unknown-model scope; never clear other Gemini model buckets.
|
| 778 |
+
if (p.modelKeyState) {
|
| 779 |
+
const scopedModel = scopedModelKey(model);
|
| 780 |
+
const mKey = `${key}:${scopedModel}`;
|
| 781 |
const mks = p.modelKeyState.get(mKey);
|
| 782 |
if (mks && (mks.strikes > 0 || mks.blacklistedUntil > 0)) {
|
| 783 |
mks.strikes = 0;
|
| 784 |
mks.lastFailureAt = 0;
|
| 785 |
mks.blacklistedUntil = 0;
|
| 786 |
+
debug(`[key-rotator] ${p.name}: ${keySlot(p, key)}${keyMask(key)} model=${model || 'unknown'} recovered — strikes reset`);
|
| 787 |
}
|
| 788 |
}
|
| 789 |
}
|
|
|
|
| 823 |
else p.inFlightTimers.delete(key);
|
| 824 |
}
|
| 825 |
|
| 826 |
+
function beginInFlight(p, key, model = null) {
|
| 827 |
if (!p || !key) return null;
|
| 828 |
+
const token = { done: false, timer: null, model: model || null };
|
| 829 |
p.inFlight.set(key, (p.inFlight.get(key) || 0) + 1);
|
| 830 |
token.timer = setTimeout(() => {
|
| 831 |
if (token.done) return;
|
|
|
|
| 834 |
const before = p.inFlight.get(key) || 0;
|
| 835 |
if (before > 0) {
|
| 836 |
const next = decrementInFlight(p, key);
|
| 837 |
+
// A timeout here means the rotator saw a key pick but no provider headers,
|
| 838 |
+
// completion, or transport error before the TTL. For OpenClaw failover
|
| 839 |
+
// paths this otherwise leaves the sticky bucket pinned forever and the
|
| 840 |
+
// dashboard shows only "pending/no response observed". Treat it as a
|
| 841 |
+
// transient key failure so the next request can rotate to another key.
|
| 842 |
+
try { recordTransientFailure(p, key, token.model || null); } catch (_) {}
|
| 843 |
+
try { clearStickyKey(p, key, token.model || null); } catch (_) {}
|
| 844 |
+
emitEvent('inflight_timeout', p, key, { model: token.model || null, inflightBefore: before, inflightAfter: next, ttlMs: INFLIGHT_TTL_MS, classifiedAs: 'transient' });
|
| 845 |
}
|
| 846 |
}, INFLIGHT_TTL_MS);
|
| 847 |
token.timer.unref?.();
|
|
|
|
| 902 |
const stickyKey = p.stickyKeys.get(stickyBucketForProvider(p, model));
|
| 903 |
if (stickyKey && p.keys.includes(stickyKey) && isActive(p, stickyKey, model)) {
|
| 904 |
const inflight = p.inFlight.get(stickyKey) || 0;
|
| 905 |
+
if (inflight < MAX_INFLIGHT_PER_KEY) {
|
| 906 |
+
verbosePickLog(`[key-rotator] ${p.name}: sticky picked ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY}`);
|
|
|
|
|
|
|
|
|
|
| 907 |
emitEvent('sticky_pick', p, stickyKey, { model, inflight: inflight + 1, maxInflight: MAX_INFLIGHT_PER_KEY });
|
| 908 |
+
return { key: stickyKey, waitMs: 0 };
|
| 909 |
}
|
| 910 |
+
|
| 911 |
+
// Do not keep piling requests onto one sticky key when OpenClaw has not
|
| 912 |
+
// produced provider headers/completion/error for previous picks. That was
|
| 913 |
+
// the real cause of dashboards like "pick 14, pending 14, rate 0": sticky
|
| 914 |
+
// mode intentionally reused the same key even after it was saturated. Clear
|
| 915 |
+
// the bucket and let the normal active-key scan below choose another key;
|
| 916 |
+
// only if every key is saturated will the fallback path reuse the least-bad
|
| 917 |
+
// candidate.
|
| 918 |
+
warn(`[key-rotator] ${p.name}: sticky key saturated, rotating away from ${keySlot(p, stickyKey)}${keyMask(stickyKey)}${model ? ` model=${model}` : ''} inflight=${inflight}/${MAX_INFLIGHT_PER_KEY}`);
|
| 919 |
+
emitEvent('sticky_saturated_rotate', p, stickyKey, { model, inflight, maxInflight: MAX_INFLIGHT_PER_KEY });
|
| 920 |
+
clearStickyKey(p, stickyKey, model);
|
| 921 |
+
} else if (stickyKey) {
|
| 922 |
+
clearStickyKey(p, stickyKey, model);
|
| 923 |
}
|
|
|
|
| 924 |
}
|
| 925 |
|
| 926 |
let bestPick = null;
|
|
|
|
| 944 |
// Score: prefer keys with fewer recent failures and lower in-flight count.
|
| 945 |
// For perModelLimits, also factor in model-specific strike count.
|
| 946 |
const ks = p.keyState.get(key) || makeKeyState();
|
| 947 |
+
const mks = p.modelKeyState ? (p.modelKeyState.get(`${key}:${scopedModelKey(model)}`) || makeKeyState()) : makeKeyState();
|
| 948 |
const recentFailPenalty =
|
| 949 |
(ks.lastFailureAt > 0 && (Date.now() - ks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0) +
|
| 950 |
(mks.lastFailureAt > 0 && (Date.now() - mks.lastFailureAt) < FAILURE_DECAY_MS ? 100 : 0);
|
|
|
|
| 1084 |
}
|
| 1085 |
|
| 1086 |
if (failureKind === 'transient') {
|
| 1087 |
+
// For per-model providers, keep transient cooldowns scoped to the current
|
| 1088 |
+
// model/unknown-model bucket so one Gemini model does not suppress the key
|
| 1089 |
+
// for all other Gemini models.
|
| 1090 |
+
recordTransientFailure(p, key, model);
|
| 1091 |
+
clearStickyKey(p, key, model);
|
| 1092 |
warn(`[key-rotator] ${p.name}: transient status=${status} on ${keySlot(p, key)}${keyMask(key)}`);
|
| 1093 |
emitEvent('transient_status', p, key, { status, model, ...errorFields });
|
| 1094 |
return;
|
|
|
|
| 1100 |
}
|
| 1101 |
}
|
| 1102 |
|
| 1103 |
+
function handleTransportError(p, key, err, model = null) {
|
| 1104 |
if (!p || !key) return;
|
| 1105 |
// Node.js 18+ undici fetch throws TypeError: "fetch failed" where the actual
|
| 1106 |
// network error code lives in err.cause.code (e.g. ECONNRESET, ETIMEDOUT,
|
|
|
|
| 1110 |
? String(err.code || err.cause?.code).toUpperCase()
|
| 1111 |
: '';
|
| 1112 |
const name = String(err?.name || '');
|
| 1113 |
+
const message = String(err?.message || err?.cause?.message || '');
|
| 1114 |
+
const haystack = `${name} ${message}`.toLowerCase();
|
| 1115 |
+
const looksRateOrQuota = /rate.?limit|too.?many|quota|resource.?exhaust|usage.?limit|insufficient.?quota|capacity.?exceeded|tokens?.?per|requests?.?per|rate_limit|rate.?limited|userratelimit|dailylimit|limitexceeded/.test(haystack);
|
| 1116 |
+
if (looksRateOrQuota) {
|
| 1117 |
+
recordFailure(p, key, model, 0);
|
| 1118 |
+
clearStickyKey(p, key, model);
|
| 1119 |
+
warn(`[key-rotator] ${p.name}: transport/failover quota signal ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}${model ? ` model=${model}` : ''}`);
|
| 1120 |
+
emitEvent('rate_limited', p, key, { model, name: name || 'Error', code, source: 'transport_error', message: message.slice(0, 240) });
|
| 1121 |
+
return;
|
| 1122 |
+
}
|
| 1123 |
const retryable = classifyRetryableFailure(undefined, code) || name === 'AbortError';
|
| 1124 |
if (retryable) {
|
| 1125 |
+
recordTransientFailure(p, key, model);
|
| 1126 |
+
clearStickyKey(p, key, model);
|
| 1127 |
+
warn(`[key-rotator] ${p.name}: retryable network ${name || 'Error'}${code ? ` code=${code}` : ''} on ${keySlot(p, key)}${keyMask(key)}${model ? ` model=${model}` : ''}`);
|
| 1128 |
+
emitEvent('network_retryable', p, key, { model, name: name || 'Error', code });
|
| 1129 |
}
|
| 1130 |
}
|
| 1131 |
|
|
|
|
| 1663 |
} finally {
|
| 1664 |
// User handlers may throw/rethrow; the rotator still owns the
|
| 1665 |
// in-flight token and transport error classification for this key.
|
| 1666 |
+
settle(() => { if (!statusHandled) { try { handleTransportError(provider, key, err, currentModel()); } catch (_) {} } });
|
| 1667 |
}
|
| 1668 |
};
|
| 1669 |
}
|
|
|
|
| 1729 |
|
| 1730 |
if (key) {
|
| 1731 |
usedKey = key; usedProvider = provider; usedModel = model;
|
| 1732 |
+
usedInFlight = beginInFlight(usedProvider, usedKey, usedModel);
|
| 1733 |
|
| 1734 |
const newOptions = { ...options };
|
| 1735 |
|
|
|
|
| 1758 |
usedProvider,
|
| 1759 |
usedKey,
|
| 1760 |
() => usedModel,
|
| 1761 |
+
(model) => { usedModel = model; if (usedInFlight) usedInFlight.model = model; },
|
| 1762 |
);
|
| 1763 |
const wrappedHandler = wrapUndiciHandler(handler, usedProvider, usedKey, usedInFlight, () => usedModel);
|
| 1764 |
return runInRotatorRequest(() => origDispatch.call(this, newOptions, wrappedHandler));
|
|
|
|
| 1941 |
if (key) {
|
| 1942 |
triedKeys.add(key);
|
| 1943 |
usedKey = key;
|
| 1944 |
+
usedInFlight = beginInFlight(provider, key, model);
|
| 1945 |
}
|
| 1946 |
|
| 1947 |
const attemptArgs = buildAttemptFetchArgs(input, init, provider, usedKey);
|
|
|
|
| 1972 |
} catch (err) {
|
| 1973 |
lastErr = err;
|
| 1974 |
try { endInFlight(provider, usedKey, usedInFlight); } catch (_) {}
|
| 1975 |
+
try { handleTransportError(provider, usedKey, err, model); } catch (_) {}
|
| 1976 |
// Node.js 18+ undici fetch: network errors are TypeError("fetch failed")
|
| 1977 |
// where the real code (ECONNRESET, ETIMEDOUT, ENOTFOUND …) is in
|
| 1978 |
// err.cause.code. Check that first before falling back to err.code.
|
|
|
|
| 2038 |
|
| 2039 |
if (key) {
|
| 2040 |
usedKey = key; usedProvider = provider; usedModel = model;
|
| 2041 |
+
usedInFlight = beginInFlight(usedProvider, usedKey, usedModel);
|
| 2042 |
if (provider.queryParam) {
|
| 2043 |
const hasOptionsArg = args[1] && typeof args[1] === 'object' && typeof args[1].on !== 'function';
|
| 2044 |
const u = new URL(String(
|
|
|
|
| 2130 |
const bodyModel = extractModelFromBody(fullBody);
|
| 2131 |
if (bodyModel) {
|
| 2132 |
usedModel = bodyModel;
|
| 2133 |
+
if (usedInFlight) usedInFlight.model = usedModel;
|
| 2134 |
promoteStickyKeyModel(usedProvider, usedKey, null, usedModel);
|
| 2135 |
emitEvent('model_detected', usedProvider, usedKey, { model: usedModel, source: 'http_request_body' });
|
| 2136 |
debug(`[key-rotator] ${usedProvider.name}: (http) model extracted from request body: ${usedModel}`);
|
|
|
|
| 2199 |
req.on('error', (err) => {
|
| 2200 |
try { endInFlight(usedProvider, usedKey, usedInFlight); } catch (_) {}
|
| 2201 |
if (!statusHandled) {
|
| 2202 |
+
try { handleTransportError(usedProvider, usedKey, err, usedModel); } catch (_) {}
|
| 2203 |
}
|
| 2204 |
});
|
| 2205 |
}
|
|
|
|
| 2231 |
patchUndici(); // covers OpenClaw gateway's bundled undici AI calls
|
| 2232 |
startDiagnostics();
|
| 2233 |
|
| 2234 |
+
debug(`[key-rotator] loaded — cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} max-retry-after:${MAX_RETRY_AFTER_MS/1000}s max-key-wait:${MAX_KEY_WAIT_MS/1000}s diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'} log-level:${LOG_LEVEL} verbose-picks:${VERBOSE_PICKS ? 'on' : 'off'} suspended-last-resort:${USE_SUSPENDED_KEY_AS_LAST_RESORT ? 'on' : 'off'} per-model-providers:${providerState.filter(p => p.perModelLimits).map(p => p.name).join(',') || 'none'} model-from-body:on model-sniff-max:${REQUEST_MODEL_SNIFF_MAX_BYTES} error-sniff-max:${ERROR_BODY_SNIFF_MAX_BYTES} inflight-ttl:${INFLIGHT_TTL_MS}ms sticky-until-failure:${STICKY_UNTIL_FAILURE ? 'on' : 'off'} sticky-scope:${String(process.env.KEY_STICKY_SCOPE || 'auto').trim().toLowerCase() || 'auto'} sticky-providers:${[...STICKY_PROVIDER_SET].join(',') || 'none'} llm-fallback-providers:${LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET].join(',') : 'all'}`);
|
| 2235 |
emitEvent('rotator_loaded', null, null, {
|
| 2236 |
providers: providerState.filter(p => p.keys.length).map(p => ({ name: p.name, total: p.keys.length })),
|
| 2237 |
logLevel: LOG_LEVEL,
|
|
|
|
| 2240 |
modelSniffMaxBytes: REQUEST_MODEL_SNIFF_MAX_BYTES,
|
| 2241 |
errorBodySniffMaxBytes: ERROR_BODY_SNIFF_MAX_BYTES,
|
| 2242 |
stickyUntilFailure: STICKY_UNTIL_FAILURE,
|
| 2243 |
+
stickyScope: String(process.env.KEY_STICKY_SCOPE || 'auto').trim().toLowerCase() || 'auto',
|
| 2244 |
stickyProviders: [...STICKY_PROVIDER_SET],
|
| 2245 |
llmFallbackProviders: LLM_FALLBACK_PROVIDER_SET ? [...LLM_FALLBACK_PROVIDER_SET] : ['*'],
|
| 2246 |
});
|