HuggingClaw Fix Bot commited on
Commit
9b55b3e
Β·
1 Parent(s): 03aba55

fix: key rotation infinite suspension loop, round-robin broken, strike decay, WA double-reject

Browse files

Bug A (Critical) β€” multi-provider-key-rotator.cjs
recordFailure() used a plain assignment for blacklistedUntil:
ks.blacklistedUntil = Date.now() + cooldown;
When a perm-suspended key is used as last-resort and receives another 429,
recordFailure() is called again (strikes stay at MAX_STRIKES, cooldown = PERM).
The assignment RESETS the timer to a fresh 16h window even if only 1h had passed.
In a 429-storm with all keys exhausted, this causes an infinite rolling 16h
suspension β€” keys never recover.

Fix: Math.max() so existing longer suspensions are never shortened.
ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);

Bug B (Critical) β€” multi-provider-key-rotator.cjs
The last-resort path in nextKey() (all keys suspended) returned a key without
updating p.idx. Normal and saturated paths both advance p.idx after picking, so
the next call starts from a different position. The last-resort path did not,
causing the same key (index 0, or whichever had the earliest expiry) to be
picked on every call β€” round-robin completely broken when all keys are suspended.

Also: the scan started at 0, ignoring p.idx, so even if expiries differed the
selection was not round-robin-fair.

Fix: scan from p.idx (same as normal path) so ties in expiry are broken by
position, and advance p.idx after pick.

Bug C (Medium) β€” multi-provider-key-rotator.cjs
isActive() cleared blacklistedUntil on expiry but left strikes unchanged.
A key with 2 strikes that served its full 240s cooldown came back with
strikes=2. The very next 429 immediately pushed it to strike 3 = 16h
perm-suspend, regardless of how long ago the previous failures were.

Practical scenario: 3 rate-limits spread over several hours on a free-tier
Gemini key β†’ permanent 16h suspension. Intended for truly exhausted keys,
not intermittent rate-limits.

Fix: decrement strikes by 1 in isActive() when a blacklist expires naturally
(served full cooldown). The key still needs a successful response to fully
reset, but one cooldown served = one strike forgiven.

Bug D (Minor) β€” wa-guardian.js
createConnection() timeout callback called ws.close() without setting
resolved=true first. ws.close() can synchronously emit an 'error' event
on some WebSocket states (readyState CONNECTING/CLOSING), which triggered
the ws.on('error') listener β†’ if (!resolved) reject(e) β€” a second reject on
an already-rejected Promise. Silently swallowed by JS but emitted a spurious
error log line that could mask real errors.

Fix: set resolved=true in the timeout callback before calling ws.close().

Files changed (2) hide show
  1. multi-provider-key-rotator.cjs +26 -7
  2. wa-guardian.js +3 -1
multi-provider-key-rotator.cjs CHANGED
@@ -177,6 +177,12 @@ if (fallbackCount > 0)
177
  /**
178
  * Is this key currently sitting out?
179
  * Also auto-clears expired blacklists so the key re-enters the pool silently.
 
 
 
 
 
 
180
  */
181
  function isActive(p, key) {
182
  const ks = p.keyState.get(key);
@@ -184,7 +190,11 @@ function isActive(p, key) {
184
  if (ks.blacklistedUntil === 0) return true; // not blacklisted
185
  if (Date.now() >= ks.blacklistedUntil) {
186
  ks.blacklistedUntil = 0; // expired β†’ back in pool
187
- debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} back in pool`);
 
 
 
 
188
  return true;
189
  }
190
  return false;
@@ -221,7 +231,11 @@ function recordFailure(p, key) {
221
  debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} strike ${ks.strikes}/${MAX_STRIKES} β€” backoff ${secs}s`);
222
  }
223
 
224
- ks.blacklistedUntil = Date.now() + cooldown;
 
 
 
 
225
  }
226
 
227
  /**
@@ -338,12 +352,17 @@ function nextKey(p) {
338
  }
339
 
340
  warn(`[key-rotator] ${p.name}: all ${total} key(s) suspended β€” using soonest-recovering key`);
341
- let best = p.keys[0], bestExpiry = Infinity;
342
- for (const k of p.keys) {
343
- const exp = p.keyState.get(k)?.blacklistedUntil ?? 0;
344
- if (exp < bestExpiry) { best = k; bestExpiry = exp; }
 
 
 
 
345
  }
346
- return best;
 
347
  }
348
 
349
  // ─── Auth header injection ────────────────────────────────────────────────────
 
177
  /**
178
  * Is this key currently sitting out?
179
  * Also auto-clears expired blacklists so the key re-enters the pool silently.
180
+ * Strike decay: each time a blacklist period expires naturally (served its
181
+ * full cooldown without a success), we reduce strikes by 1. This prevents a
182
+ * key from being permanently suspended just because it was rate-limited 3 times
183
+ * over a long period (e.g. 3 Γ— 429s spread across hours on a free-tier quota).
184
+ * A key that truly has quota exhausted will simply accumulate strikes again on
185
+ * the next requests and settle back into long suspension.
186
  */
187
  function isActive(p, key) {
188
  const ks = p.keyState.get(key);
 
190
  if (ks.blacklistedUntil === 0) return true; // not blacklisted
191
  if (Date.now() >= ks.blacklistedUntil) {
192
  ks.blacklistedUntil = 0; // expired β†’ back in pool
193
+ // Decay strikes by 1 on natural expiry so a key that served its full
194
+ // cooldown gets a partial fresh start. It still needs a success to fully
195
+ // reset, but this prevents instant perm-suspension on the very next 429.
196
+ if (ks.strikes > 0) ks.strikes -= 1;
197
+ debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} back in pool (strikes now ${ks.strikes})`);
198
  return true;
199
  }
200
  return false;
 
231
  debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} strike ${ks.strikes}/${MAX_STRIKES} β€” backoff ${secs}s`);
232
  }
233
 
234
+ // Use Math.max so a longer existing suspension is never shortened.
235
+ // This matters when a last-resort key (already perm-suspended) gets another
236
+ // 429 β€” without Math.max the timer would reset to a fresh 16 h window,
237
+ // potentially looping forever and keeping all keys in perpetual suspension.
238
+ ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
239
  }
240
 
241
  /**
 
352
  }
353
 
354
  warn(`[key-rotator] ${p.name}: all ${total} key(s) suspended β€” using soonest-recovering key`);
355
+ // FIX: scan from p.idx (same round-robin start as normal path) so ties in
356
+ // expiry are broken by position β€” every key gets equal turns even when all
357
+ // are suspended with the same blacklistedUntil timestamp.
358
+ let bestIdx = -1, bestExpiry = Infinity;
359
+ for (let offset = 0; offset < total; offset++) {
360
+ const i = (p.idx + offset) % total;
361
+ const exp = p.keyState.get(p.keys[i])?.blacklistedUntil ?? 0;
362
+ if (exp < bestExpiry) { bestIdx = i; bestExpiry = exp; }
363
  }
364
+ p.idx = (bestIdx + 1) % total; // advance for next call
365
+ return p.keys[bestIdx];
366
  }
367
 
368
  // ─── Auth header injection ────────────────────────────────────────────────────
wa-guardian.js CHANGED
@@ -123,7 +123,9 @@ async function createConnection() {
123
  });
124
 
125
  ws.on("error", (e) => { if (!resolved) reject(e); });
126
- setTimeout(() => { if (!resolved) { ws.close(); reject(new Error("Timeout")); } }, 10000);
 
 
127
  });
128
  }
129
 
 
123
  });
124
 
125
  ws.on("error", (e) => { if (!resolved) reject(e); });
126
+ // FIX: set resolved=true before ws.close() so the error listener above does not
127
+ // fire a second reject when close() triggers a WebSocket error event (double-reject).
128
+ setTimeout(() => { if (!resolved) { resolved = true; ws.close(); reject(new Error("Timeout")); } }, 10000);
129
  });
130
  }
131