Spaces:
Running
Running
anurag008w commited on
Commit Β·
9a6444d
1
Parent(s): 2eb43c4
fix: merge injected model providers using wildcard in start.sh
Browse files- .env.example +5 -1
- README.md +3 -1
- env-builder.js +20 -1
- multi-provider-key-rotator.cjs +51 -17
- openclaw-sync.py +18 -10
- start.sh +51 -8
.env.example
CHANGED
|
@@ -169,7 +169,7 @@ LLM_API_KEY_FALLBACK_ENABLED=true
|
|
| 169 |
# KEY_MAX_INFLIGHT_PER_KEY=3
|
| 170 |
#
|
| 171 |
# Auto-retry count for fetch requests on retryable errors/statuses.
|
| 172 |
-
# Total attempts = 1 + retries (GET/HEAD/OPTIONS
|
| 173 |
# KEY_FETCH_MAX_RETRIES=2
|
| 174 |
#
|
| 175 |
# Base delay (ms) between auto-retries. Exponential per attempt; also respects
|
|
@@ -180,6 +180,10 @@ LLM_API_KEY_FALLBACK_ENABLED=true
|
|
| 180 |
# KEY_ROTATOR_DIAGNOSTICS=true
|
| 181 |
# KEY_ROTATOR_DIAGNOSTICS_INTERVAL_MS=60000
|
| 182 |
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
# Note: This rotator does not re-send the same failed request automatically.
|
| 184 |
# It blacklists/penalizes the failed key so the *next* request prefers
|
| 185 |
# healthier keys.
|
|
|
|
| 169 |
# KEY_MAX_INFLIGHT_PER_KEY=3
|
| 170 |
#
|
| 171 |
# Auto-retry count for fetch requests on retryable errors/statuses.
|
| 172 |
+
# Total attempts = 1 + retries (GET/HEAD/OPTIONS/POST).
|
| 173 |
# KEY_FETCH_MAX_RETRIES=2
|
| 174 |
#
|
| 175 |
# Base delay (ms) between auto-retries. Exponential per attempt; also respects
|
|
|
|
| 180 |
# KEY_ROTATOR_DIAGNOSTICS=true
|
| 181 |
# KEY_ROTATOR_DIAGNOSTICS_INTERVAL_MS=60000
|
| 182 |
#
|
| 183 |
+
# Log verbosity controls for rotator internals.
|
| 184 |
+
# KEY_ROTATOR_LOG_LEVEL=info
|
| 185 |
+
# KEY_ROTATOR_VERBOSE_PICKS=false
|
| 186 |
+
#
|
| 187 |
# Note: This rotator does not re-send the same failed request automatically.
|
| 188 |
# It blacklists/penalizes the failed key so the *next* request prefers
|
| 189 |
# healthier keys.
|
README.md
CHANGED
|
@@ -280,10 +280,12 @@ Optional tuning:
|
|
| 280 |
- `KEY_PERM_SUSPEND_MS` (default `57600000`) β long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
|
| 281 |
- `KEY_FAILURE_DECAY_MS` (default `900000`) β recent-failure decay window used to deprioritize keys.
|
| 282 |
- `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) β soft concurrent request cap per key.
|
| 283 |
-
- `KEY_FETCH_MAX_RETRIES` (default `2`) β auto-retry count for retryable failures on **GET/HEAD/OPTIONS** with a different key.
|
| 284 |
- `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) β base delay for retry backoff (respects `Retry-After`, capped to 10s).
|
| 285 |
- `KEY_ROTATOR_DIAGNOSTICS=true` β emit periodic provider/key health snapshots.
|
| 286 |
- `KEY_ROTATOR_DIAGNOSTICS_INTERVAL_MS` (default `60000`) β diagnostics interval.
|
|
|
|
|
|
|
| 287 |
|
| 288 |
Supported per-provider variables: `ANTHROPIC_API_KEYS`, `OPENAI_API_KEYS`, `GEMINI_API_KEYS`, `DEEPSEEK_API_KEYS`, `GROQ_API_KEYS`, `MISTRAL_API_KEYS`, `OPENROUTER_API_KEYS`, `XAI_API_KEYS`, `NVIDIA_API_KEYS`, `COHERE_API_KEYS`, `TOGETHER_API_KEYS`, `CEREBRAS_API_KEYS`, and more β see `.env.example` for the full list.
|
| 289 |
|
|
|
|
| 280 |
- `KEY_PERM_SUSPEND_MS` (default `57600000`) β long suspend duration for exhausted/auth-invalid keys (**capped at 16h max**).
|
| 281 |
- `KEY_FAILURE_DECAY_MS` (default `900000`) β recent-failure decay window used to deprioritize keys.
|
| 282 |
- `KEY_MAX_INFLIGHT_PER_KEY` (default `3`) β soft concurrent request cap per key.
|
| 283 |
+
- `KEY_FETCH_MAX_RETRIES` (default `2`) β auto-retry count for retryable failures on **GET/HEAD/OPTIONS/POST** with a different key.
|
| 284 |
- `KEY_FETCH_RETRY_BASE_DELAY_MS` (default `250`) β base delay for retry backoff (respects `Retry-After`, capped to 10s).
|
| 285 |
- `KEY_ROTATOR_DIAGNOSTICS=true` β emit periodic provider/key health snapshots.
|
| 286 |
- `KEY_ROTATOR_DIAGNOSTICS_INTERVAL_MS` (default `60000`) β diagnostics interval.
|
| 287 |
+
- `KEY_ROTATOR_LOG_LEVEL` (`info`/`debug`/`silent`, default `info`) β controls rotator log verbosity.
|
| 288 |
+
- `KEY_ROTATOR_VERBOSE_PICKS` (`true`/`false`, default `false`) β enable per-request key-pick logs (best with `KEY_ROTATOR_LOG_LEVEL=debug`).
|
| 289 |
|
| 290 |
Supported per-provider variables: `ANTHROPIC_API_KEYS`, `OPENAI_API_KEYS`, `GEMINI_API_KEYS`, `DEEPSEEK_API_KEYS`, `GROQ_API_KEYS`, `MISTRAL_API_KEYS`, `OPENROUTER_API_KEYS`, `XAI_API_KEYS`, `NVIDIA_API_KEYS`, `COHERE_API_KEYS`, `TOGETHER_API_KEYS`, `CEREBRAS_API_KEYS`, and more β see `.env.example` for the full list.
|
| 291 |
|
env-builder.js
CHANGED
|
@@ -529,7 +529,7 @@ const FIELDS = [
|
|
| 529 |
"g": "Plugins",
|
| 530 |
"icon": "π",
|
| 531 |
"k": "KEY_FETCH_MAX_RETRIES",
|
| 532 |
-
"lbl": "Auto-retries for retryable failures (GET/HEAD/OPTIONS
|
| 533 |
"type": "text",
|
| 534 |
"ph": "2",
|
| 535 |
"tag": "advanced"
|
|
@@ -543,6 +543,25 @@ const FIELDS = [
|
|
| 543 |
"ph": "250",
|
| 544 |
"tag": "advanced"
|
| 545 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
{
|
| 547 |
"g": "Plugins",
|
| 548 |
"icon": "π",
|
|
|
|
| 529 |
"g": "Plugins",
|
| 530 |
"icon": "π",
|
| 531 |
"k": "KEY_FETCH_MAX_RETRIES",
|
| 532 |
+
"lbl": "Auto-retries for retryable failures (GET/HEAD/OPTIONS/POST)",
|
| 533 |
"type": "text",
|
| 534 |
"ph": "2",
|
| 535 |
"tag": "advanced"
|
|
|
|
| 543 |
"ph": "250",
|
| 544 |
"tag": "advanced"
|
| 545 |
},
|
| 546 |
+
|
| 547 |
+
{
|
| 548 |
+
"g": "Plugins",
|
| 549 |
+
"icon": "π§Ύ",
|
| 550 |
+
"k": "KEY_ROTATOR_LOG_LEVEL",
|
| 551 |
+
"lbl": "Key-rotator log level (info/debug/silent)",
|
| 552 |
+
"type": "text",
|
| 553 |
+
"ph": "info",
|
| 554 |
+
"tag": "advanced"
|
| 555 |
+
},
|
| 556 |
+
{
|
| 557 |
+
"g": "Plugins",
|
| 558 |
+
"icon": "π§Ύ",
|
| 559 |
+
"k": "KEY_ROTATOR_VERBOSE_PICKS",
|
| 560 |
+
"lbl": "Verbose per-request key pick logs (use with debug)",
|
| 561 |
+
"type": "toggle",
|
| 562 |
+
"ph": "false",
|
| 563 |
+
"tag": "advanced"
|
| 564 |
+
},
|
| 565 |
{
|
| 566 |
"g": "Plugins",
|
| 567 |
"icon": "π",
|
multi-provider-key-rotator.cjs
CHANGED
|
@@ -13,13 +13,18 @@
|
|
| 13 |
* KEY_BLACKLIST_COOLDOWN_MS base backoff ms (default 60 000)
|
| 14 |
* KEY_MAX_STRIKES failures before perm (default 3)
|
| 15 |
* LLM_API_KEY_FALLBACK_ENABLED true/false (default true)
|
|
|
|
|
|
|
| 16 |
*/
|
| 17 |
|
| 18 |
const http = require('node:http');
|
| 19 |
const https = require('node:https');
|
| 20 |
|
| 21 |
-
const
|
| 22 |
-
const
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
// βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
|
|
@@ -175,7 +180,7 @@ function isActive(p, key) {
|
|
| 175 |
if (ks.blacklistedUntil === 0) return true; // not blacklisted
|
| 176 |
if (Date.now() >= ks.blacklistedUntil) {
|
| 177 |
ks.blacklistedUntil = 0; // expired β back in pool
|
| 178 |
-
|
| 179 |
return true;
|
| 180 |
}
|
| 181 |
return false;
|
|
@@ -209,12 +214,27 @@ function recordFailure(p, key) {
|
|
| 209 |
const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
|
| 210 |
cooldown = Math.max(1000, Math.round(cooldown * jitter));
|
| 211 |
const secs = Math.round(cooldown / 1000);
|
| 212 |
-
|
| 213 |
}
|
| 214 |
|
| 215 |
ks.blacklistedUntil = Date.now() + cooldown;
|
| 216 |
}
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
/**
|
| 219 |
* Called on any 2xx/3xx response β resets the key's strike counter.
|
| 220 |
*/
|
|
@@ -223,12 +243,12 @@ function recordSuccess(p, key) {
|
|
| 223 |
if (ks && ks.strikes > 0) {
|
| 224 |
ks.strikes = 0;
|
| 225 |
ks.lastFailureAt = 0;
|
| 226 |
-
|
| 227 |
}
|
| 228 |
}
|
| 229 |
|
| 230 |
function classifyRetryableFailure(status, errCode) {
|
| 231 |
-
const retryableStatus = new Set([408, 425, 429, 500, 502, 503, 504,
|
| 232 |
const retryableErrorCodes = new Set([
|
| 233 |
'ECONNRESET', 'ETIMEDOUT', 'EAI_AGAIN', 'ENOTFOUND',
|
| 234 |
'ECONNREFUSED', 'EPIPE',
|
|
@@ -280,7 +300,7 @@ function nextKey(p) {
|
|
| 280 |
const inflight = p.inFlight.get(key) || 0;
|
| 281 |
if (inflight < MAX_INFLIGHT_PER_KEY) {
|
| 282 |
p.idx = (i + 1) % total; // next call starts AFTER the key we just picked
|
| 283 |
-
|
| 284 |
return key;
|
| 285 |
}
|
| 286 |
if (!bestPick) bestPick = { i, key, inflight, score: Number.POSITIVE_INFINITY };
|
|
@@ -353,10 +373,20 @@ function handleStatus(p, key, status) {
|
|
| 353 |
warn(`[key-rotator] ${p.name}: ...${key.slice(-6)} auth-failed (${status}) β suspended for ${formatHours(PERM_SUSPEND_MS)} h`);
|
| 354 |
return;
|
| 355 |
}
|
| 356 |
-
|
|
|
|
| 357 |
recordFailure(p, key);
|
| 358 |
-
warn(`[key-rotator] ${p.name}:
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
recordSuccess(p, key);
|
| 361 |
}
|
| 362 |
}
|
|
@@ -364,9 +394,11 @@ function handleStatus(p, key, status) {
|
|
| 364 |
function handleTransportError(p, key, err) {
|
| 365 |
if (!p || !key) return;
|
| 366 |
const code = err?.code ? String(err.code).toUpperCase() : '';
|
| 367 |
-
|
|
|
|
|
|
|
| 368 |
recordFailure(p, key);
|
| 369 |
-
warn(`[key-rotator] ${p.name}: retryable network code=${code} on ...${key.slice(-6)}`);
|
| 370 |
}
|
| 371 |
}
|
| 372 |
|
|
@@ -428,7 +460,8 @@ function patchFetch() {
|
|
| 428 |
const baseRequest = new Request(input, init);
|
| 429 |
const method = String(baseRequest.method || 'GET').toUpperCase();
|
| 430 |
const replaySafe = method === 'GET' || method === 'HEAD' || method === 'OPTIONS';
|
| 431 |
-
const
|
|
|
|
| 432 |
const triedKeys = new Set();
|
| 433 |
let lastErr = null;
|
| 434 |
let lastResponse = null;
|
|
@@ -478,7 +511,7 @@ function patchFetch() {
|
|
| 478 |
10_000,
|
| 479 |
Math.max(retryAfterMs, FETCH_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1)),
|
| 480 |
);
|
| 481 |
-
warn(`[key-rotator] ${provider.name}: fetch retry ${attempt}/${maxAttempts - 1} after status=${response.status}`);
|
| 482 |
await sleep(backoffMs);
|
| 483 |
continue;
|
| 484 |
}
|
|
@@ -488,10 +521,11 @@ function patchFetch() {
|
|
| 488 |
try { handleTransportError(provider, usedKey, err); } catch (_) {}
|
| 489 |
try { endInFlight(provider, usedKey); } catch (_) {}
|
| 490 |
const code = err?.code ? String(err.code).toUpperCase() : '';
|
| 491 |
-
const
|
|
|
|
| 492 |
if (shouldRetry) {
|
| 493 |
const backoffMs = Math.min(10_000, FETCH_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1));
|
| 494 |
-
warn(`[key-rotator] ${provider.name}: fetch retry ${attempt}/${maxAttempts - 1} after network code=${code || 'unknown'}`);
|
| 495 |
await sleep(backoffMs);
|
| 496 |
continue;
|
| 497 |
}
|
|
@@ -578,4 +612,4 @@ patchHttpModule(http);
|
|
| 578 |
patchHttpModule(https);
|
| 579 |
startDiagnostics();
|
| 580 |
|
| 581 |
-
log(`[key-rotator] loaded β cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'}`);
|
|
|
|
| 13 |
* KEY_BLACKLIST_COOLDOWN_MS base backoff ms (default 60 000)
|
| 14 |
* KEY_MAX_STRIKES failures before perm (default 3)
|
| 15 |
* LLM_API_KEY_FALLBACK_ENABLED true/false (default true)
|
| 16 |
+
* KEY_ROTATOR_LOG_LEVEL info/debug/silent (default info)
|
| 17 |
+
* KEY_ROTATOR_VERBOSE_PICKS true/false (default false)
|
| 18 |
*/
|
| 19 |
|
| 20 |
const http = require('node:http');
|
| 21 |
const https = require('node:https');
|
| 22 |
|
| 23 |
+
const LOG_LEVEL = String(process.env.KEY_ROTATOR_LOG_LEVEL || 'info').trim().toLowerCase();
|
| 24 |
+
const VERBOSE_PICKS = /^(1|true|yes|on)$/i.test(String(process.env.KEY_ROTATOR_VERBOSE_PICKS || '').trim());
|
| 25 |
+
const log = (...a) => { if (LOG_LEVEL !== 'silent') console.error(...a); };
|
| 26 |
+
const warn = (...a) => { if (LOG_LEVEL !== 'silent') console.warn(...a); };
|
| 27 |
+
const debug = (...a) => { if (LOG_LEVEL === 'debug') console.error(...a); };
|
| 28 |
|
| 29 |
// βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
|
|
|
|
| 180 |
if (ks.blacklistedUntil === 0) return true; // not blacklisted
|
| 181 |
if (Date.now() >= ks.blacklistedUntil) {
|
| 182 |
ks.blacklistedUntil = 0; // expired β back in pool
|
| 183 |
+
debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} back in pool`);
|
| 184 |
return true;
|
| 185 |
}
|
| 186 |
return false;
|
|
|
|
| 214 |
const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
|
| 215 |
cooldown = Math.max(1000, Math.round(cooldown * jitter));
|
| 216 |
const secs = Math.round(cooldown / 1000);
|
| 217 |
+
debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} strike ${ks.strikes}/${MAX_STRIKES} β backoff ${secs}s`);
|
| 218 |
}
|
| 219 |
|
| 220 |
ks.blacklistedUntil = Date.now() + cooldown;
|
| 221 |
}
|
| 222 |
|
| 223 |
+
/**
|
| 224 |
+
* Called on transient retryable failures (non-quota/rate):
|
| 225 |
+
* applies short cooldown without incrementing strikes.
|
| 226 |
+
*/
|
| 227 |
+
function recordTransientFailure(p, key) {
|
| 228 |
+
let ks = p.keyState.get(key);
|
| 229 |
+
if (!ks) { ks = makeKeyState(); p.keyState.set(key, ks); }
|
| 230 |
+
ks.lastFailureAt = Date.now();
|
| 231 |
+
const jitter = 1 + ((Math.random() * 2 - 1) * (COOLDOWN_JITTER_PCT / 100));
|
| 232 |
+
const cooldown = Math.max(1000, Math.round(BASE_COOLDOWN_MS * jitter));
|
| 233 |
+
ks.blacklistedUntil = Math.max(ks.blacklistedUntil || 0, Date.now() + cooldown);
|
| 234 |
+
const secs = Math.round(cooldown / 1000);
|
| 235 |
+
debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} transient backoff ${secs}s (strikes unchanged)`);
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
/**
|
| 239 |
* Called on any 2xx/3xx response β resets the key's strike counter.
|
| 240 |
*/
|
|
|
|
| 243 |
if (ks && ks.strikes > 0) {
|
| 244 |
ks.strikes = 0;
|
| 245 |
ks.lastFailureAt = 0;
|
| 246 |
+
debug(`[key-rotator] ${p.name}: ...${key.slice(-6)} recovered β strikes reset`);
|
| 247 |
}
|
| 248 |
}
|
| 249 |
|
| 250 |
function classifyRetryableFailure(status, errCode) {
|
| 251 |
+
const retryableStatus = new Set([402, 408, 425, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524, 529]);
|
| 252 |
const retryableErrorCodes = new Set([
|
| 253 |
'ECONNRESET', 'ETIMEDOUT', 'EAI_AGAIN', 'ENOTFOUND',
|
| 254 |
'ECONNREFUSED', 'EPIPE',
|
|
|
|
| 300 |
const inflight = p.inFlight.get(key) || 0;
|
| 301 |
if (inflight < MAX_INFLIGHT_PER_KEY) {
|
| 302 |
p.idx = (i + 1) % total; // next call starts AFTER the key we just picked
|
| 303 |
+
if (VERBOSE_PICKS) debug(`[key-rotator] ${p.name}: picked ...${key.slice(-6)} inflight=${inflight + 1}/${MAX_INFLIGHT_PER_KEY}`);
|
| 304 |
return key;
|
| 305 |
}
|
| 306 |
if (!bestPick) bestPick = { i, key, inflight, score: Number.POSITIVE_INFINITY };
|
|
|
|
| 373 |
warn(`[key-rotator] ${p.name}: ...${key.slice(-6)} auth-failed (${status}) β suspended for ${formatHours(PERM_SUSPEND_MS)} h`);
|
| 374 |
return;
|
| 375 |
}
|
| 376 |
+
|
| 377 |
+
if (status === 429 || status === 402) {
|
| 378 |
recordFailure(p, key);
|
| 379 |
+
warn(`[key-rotator] ${p.name}: quota/rate status=${status} on ...${key.slice(-6)}`);
|
| 380 |
+
return;
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
if (classifyRetryableFailure(status)) {
|
| 384 |
+
recordTransientFailure(p, key);
|
| 385 |
+
warn(`[key-rotator] ${p.name}: transient status=${status} on ...${key.slice(-6)}`);
|
| 386 |
+
return;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
if (status >= 200 && status < 400) {
|
| 390 |
recordSuccess(p, key);
|
| 391 |
}
|
| 392 |
}
|
|
|
|
| 394 |
function handleTransportError(p, key, err) {
|
| 395 |
if (!p || !key) return;
|
| 396 |
const code = err?.code ? String(err.code).toUpperCase() : '';
|
| 397 |
+
const name = String(err?.name || '');
|
| 398 |
+
const retryable = classifyRetryableFailure(undefined, code) || name === 'AbortError';
|
| 399 |
+
if (retryable) {
|
| 400 |
recordFailure(p, key);
|
| 401 |
+
warn(`[key-rotator] ${p.name}: retryable network ${name || 'Error'}${code ? ` code=${code}` : ''} on ...${key.slice(-6)}`);
|
| 402 |
}
|
| 403 |
}
|
| 404 |
|
|
|
|
| 460 |
const baseRequest = new Request(input, init);
|
| 461 |
const method = String(baseRequest.method || 'GET').toUpperCase();
|
| 462 |
const replaySafe = method === 'GET' || method === 'HEAD' || method === 'OPTIONS';
|
| 463 |
+
const retryEligible = replaySafe || method === 'POST';
|
| 464 |
+
const maxAttempts = retryEligible ? 1 + FETCH_MAX_RETRIES : 1;
|
| 465 |
const triedKeys = new Set();
|
| 466 |
let lastErr = null;
|
| 467 |
let lastResponse = null;
|
|
|
|
| 511 |
10_000,
|
| 512 |
Math.max(retryAfterMs, FETCH_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1)),
|
| 513 |
);
|
| 514 |
+
warn(`[key-rotator] ${provider.name}: fetch retry ${attempt}/${maxAttempts - 1} after status=${response.status} method=${method}`);
|
| 515 |
await sleep(backoffMs);
|
| 516 |
continue;
|
| 517 |
}
|
|
|
|
| 521 |
try { handleTransportError(provider, usedKey, err); } catch (_) {}
|
| 522 |
try { endInFlight(provider, usedKey); } catch (_) {}
|
| 523 |
const code = err?.code ? String(err.code).toUpperCase() : '';
|
| 524 |
+
const isAbort = String(err?.name || '') === 'AbortError';
|
| 525 |
+
const shouldRetry = attempt < maxAttempts && (classifyRetryableFailure(undefined, code) || isAbort);
|
| 526 |
if (shouldRetry) {
|
| 527 |
const backoffMs = Math.min(10_000, FETCH_RETRY_BASE_DELAY_MS * Math.pow(2, attempt - 1));
|
| 528 |
+
warn(`[key-rotator] ${provider.name}: fetch retry ${attempt}/${maxAttempts - 1} after network ${isAbort ? 'AbortError' : `code=${code || 'unknown'}`} method=${method}`);
|
| 529 |
await sleep(backoffMs);
|
| 530 |
continue;
|
| 531 |
}
|
|
|
|
| 612 |
patchHttpModule(https);
|
| 613 |
startDiagnostics();
|
| 614 |
|
| 615 |
+
log(`[key-rotator] loaded β cooldown base:${BASE_COOLDOWN_MS/1000}s max-strikes:${MAX_STRIKES} perm-suspend:${formatHours(PERM_SUSPEND_MS)}h (cap 16h) max-inflight-per-key:${MAX_INFLIGHT_PER_KEY} diagnostics:${DIAGNOSTICS_ENABLED ? 'on' : 'off'} log-level:${LOG_LEVEL} verbose-picks:${VERBOSE_PICKS ? 'on' : 'off'}`);
|
openclaw-sync.py
CHANGED
|
@@ -140,7 +140,8 @@ def copy_state_entry_with_retry(source_path: Path, backup_path: Path, attempts:
|
|
| 140 |
continue
|
| 141 |
raise last_exc
|
| 142 |
|
| 143 |
-
def snapshot_state_into_workspace() ->
|
|
|
|
| 144 |
try:
|
| 145 |
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 146 |
# Atomic snapshot: copy to a staging dir first, then rename.
|
|
@@ -185,6 +186,7 @@ def snapshot_state_into_workspace() -> None:
|
|
| 185 |
# known-good version for only those entries (staging was seeded from
|
| 186 |
# previous backup). This preserves forward progress for the rest.
|
| 187 |
if skipped_entries:
|
|
|
|
| 188 |
for name, entry_exc in skipped_entries:
|
| 189 |
print(f"Warning: keeping previous state entry {name}: {entry_exc}")
|
| 190 |
print(
|
|
@@ -200,10 +202,11 @@ def snapshot_state_into_workspace() -> None:
|
|
| 200 |
if staging_dir.exists():
|
| 201 |
shutil.rmtree(staging_dir, ignore_errors=True)
|
| 202 |
print(f"Warning: could not snapshot OpenClaw state: {exc}")
|
|
|
|
| 203 |
|
| 204 |
try:
|
| 205 |
if not WHATSAPP_ENABLED:
|
| 206 |
-
return
|
| 207 |
|
| 208 |
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 209 |
|
|
@@ -212,16 +215,16 @@ def snapshot_state_into_workspace() -> None:
|
|
| 212 |
shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
|
| 213 |
print("Removed backed-up WhatsApp credentials after reset request.")
|
| 214 |
RESET_MARKER.unlink(missing_ok=True)
|
| 215 |
-
return
|
| 216 |
|
| 217 |
if not WHATSAPP_CREDS_DIR.exists():
|
| 218 |
-
return
|
| 219 |
|
| 220 |
file_count = count_files(WHATSAPP_CREDS_DIR)
|
| 221 |
if file_count < 2:
|
| 222 |
if file_count > 0:
|
| 223 |
print(f"WhatsApp backup skipped: credentials incomplete ({file_count} files).")
|
| 224 |
-
return
|
| 225 |
|
| 226 |
WHATSAPP_BACKUP_DIR.parent.mkdir(parents=True, exist_ok=True)
|
| 227 |
if WHATSAPP_BACKUP_DIR.exists():
|
|
@@ -229,6 +232,8 @@ def snapshot_state_into_workspace() -> None:
|
|
| 229 |
shutil.copytree(WHATSAPP_CREDS_DIR, WHATSAPP_BACKUP_DIR)
|
| 230 |
except Exception as exc:
|
| 231 |
print(f"Warning: could not snapshot WhatsApp state: {exc}")
|
|
|
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
def restore_embedded_state() -> None:
|
|
@@ -515,7 +520,7 @@ def _sync_once_unlocked(
|
|
| 515 |
write_status("disabled", "HF_TOKEN is not configured.")
|
| 516 |
return (last_fingerprint or "", last_marker or (0, 0, 0, ""))
|
| 517 |
|
| 518 |
-
snapshot_state_into_workspace()
|
| 519 |
repo_id = ensure_repo_exists()
|
| 520 |
current_marker = metadata_marker(WORKSPACE)
|
| 521 |
if last_marker is not None and current_marker == last_marker:
|
|
@@ -547,10 +552,13 @@ def _sync_once_unlocked(
|
|
| 547 |
commit_message=f"HuggingClaw sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
|
| 548 |
ignore_patterns=[".git/*", ".git"],
|
| 549 |
)
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
| 554 |
finally:
|
| 555 |
shutil.rmtree(snapshot_dir, ignore_errors=True)
|
| 556 |
|
|
|
|
| 140 |
continue
|
| 141 |
raise last_exc
|
| 142 |
|
| 143 |
+
def snapshot_state_into_workspace() -> bool:
|
| 144 |
+
had_copy_failures = False
|
| 145 |
try:
|
| 146 |
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 147 |
# Atomic snapshot: copy to a staging dir first, then rename.
|
|
|
|
| 186 |
# known-good version for only those entries (staging was seeded from
|
| 187 |
# previous backup). This preserves forward progress for the rest.
|
| 188 |
if skipped_entries:
|
| 189 |
+
had_copy_failures = True
|
| 190 |
for name, entry_exc in skipped_entries:
|
| 191 |
print(f"Warning: keeping previous state entry {name}: {entry_exc}")
|
| 192 |
print(
|
|
|
|
| 202 |
if staging_dir.exists():
|
| 203 |
shutil.rmtree(staging_dir, ignore_errors=True)
|
| 204 |
print(f"Warning: could not snapshot OpenClaw state: {exc}")
|
| 205 |
+
had_copy_failures = True
|
| 206 |
|
| 207 |
try:
|
| 208 |
if not WHATSAPP_ENABLED:
|
| 209 |
+
return had_copy_failures
|
| 210 |
|
| 211 |
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 212 |
|
|
|
|
| 215 |
shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
|
| 216 |
print("Removed backed-up WhatsApp credentials after reset request.")
|
| 217 |
RESET_MARKER.unlink(missing_ok=True)
|
| 218 |
+
return had_copy_failures
|
| 219 |
|
| 220 |
if not WHATSAPP_CREDS_DIR.exists():
|
| 221 |
+
return had_copy_failures
|
| 222 |
|
| 223 |
file_count = count_files(WHATSAPP_CREDS_DIR)
|
| 224 |
if file_count < 2:
|
| 225 |
if file_count > 0:
|
| 226 |
print(f"WhatsApp backup skipped: credentials incomplete ({file_count} files).")
|
| 227 |
+
return had_copy_failures
|
| 228 |
|
| 229 |
WHATSAPP_BACKUP_DIR.parent.mkdir(parents=True, exist_ok=True)
|
| 230 |
if WHATSAPP_BACKUP_DIR.exists():
|
|
|
|
| 232 |
shutil.copytree(WHATSAPP_CREDS_DIR, WHATSAPP_BACKUP_DIR)
|
| 233 |
except Exception as exc:
|
| 234 |
print(f"Warning: could not snapshot WhatsApp state: {exc}")
|
| 235 |
+
had_copy_failures = True
|
| 236 |
+
return had_copy_failures
|
| 237 |
|
| 238 |
|
| 239 |
def restore_embedded_state() -> None:
|
|
|
|
| 520 |
write_status("disabled", "HF_TOKEN is not configured.")
|
| 521 |
return (last_fingerprint or "", last_marker or (0, 0, 0, ""))
|
| 522 |
|
| 523 |
+
had_snapshot_copy_failures = snapshot_state_into_workspace()
|
| 524 |
repo_id = ensure_repo_exists()
|
| 525 |
current_marker = metadata_marker(WORKSPACE)
|
| 526 |
if last_marker is not None and current_marker == last_marker:
|
|
|
|
| 552 |
commit_message=f"HuggingClaw sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
|
| 553 |
ignore_patterns=[".git/*", ".git"],
|
| 554 |
)
|
| 555 |
+
if had_snapshot_copy_failures:
|
| 556 |
+
print("Warning: skipping remote prune this pass because local state snapshot had copy failures.")
|
| 557 |
+
else:
|
| 558 |
+
try:
|
| 559 |
+
prune_remote_deleted_files(repo_id, snapshot_dir)
|
| 560 |
+
except Exception as prune_exc:
|
| 561 |
+
print(f"Warning: could not prune stale remote files: {prune_exc}")
|
| 562 |
finally:
|
| 563 |
shutil.rmtree(snapshot_dir, ignore_errors=True)
|
| 564 |
|
start.sh
CHANGED
|
@@ -783,6 +783,36 @@ if [ "$WHATSAPP_ENABLED_NORMALIZED" = "true" ]; then
|
|
| 783 |
CONFIG_JSON=$(echo "$CONFIG_JSON" | jq '.channels.whatsapp = {"dmPolicy": "pairing"}')
|
| 784 |
fi
|
| 785 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
# Write config
|
| 787 |
EXISTING_CONFIG="/home/node/.openclaw/openclaw.json"
|
| 788 |
WHATSAPP_CONFIG_ENABLED=false
|
|
@@ -820,8 +850,8 @@ if [ -f "$EXISTING_CONFIG" ]; then
|
|
| 820 |
| if (($injectedModelsProviders | length) > 0) then
|
| 821 |
($injectedModelsProviders | to_entries) as $entries
|
| 822 |
| reduce $entries[] as $e (.;
|
| 823 |
-
.models.providers[$e.key]
|
| 824 |
-
|
| 825 |
)
|
| 826 |
else
|
| 827 |
.
|
|
@@ -852,16 +882,29 @@ if [ -f "$EXISTING_CONFIG" ]; then
|
|
| 852 |
"$EXISTING_CONFIG" 2>/dev/null)
|
| 853 |
|
| 854 |
if [ -n "$PATCHED" ]; then
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
else
|
| 859 |
-
echo "Patch failed
|
| 860 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
fi
|
| 862 |
else
|
| 863 |
echo "No restored config β writing fresh config..."
|
| 864 |
-
|
| 865 |
fi
|
| 866 |
chmod 600 "$EXISTING_CONFIG"
|
| 867 |
|
|
|
|
| 783 |
CONFIG_JSON=$(echo "$CONFIG_JSON" | jq '.channels.whatsapp = {"dmPolicy": "pairing"}')
|
| 784 |
fi
|
| 785 |
|
| 786 |
+
|
| 787 |
+
validate_json_file() {
|
| 788 |
+
local file="$1"
|
| 789 |
+
[ -f "$file" ] || return 1
|
| 790 |
+
jq -e . "$file" >/dev/null 2>&1
|
| 791 |
+
}
|
| 792 |
+
|
| 793 |
+
write_json_atomic() {
|
| 794 |
+
local dest="$1"
|
| 795 |
+
local payload="$2"
|
| 796 |
+
local tmp
|
| 797 |
+
tmp="${dest}.tmp.$$"
|
| 798 |
+
printf '%s\n' "$payload" > "$tmp" || return 1
|
| 799 |
+
if ! jq -e . "$tmp" >/dev/null 2>&1; then
|
| 800 |
+
echo "ERROR: refusing to write invalid JSON to $dest" >&2
|
| 801 |
+
rm -f "$tmp"
|
| 802 |
+
return 1
|
| 803 |
+
fi
|
| 804 |
+
mv "$tmp" "$dest"
|
| 805 |
+
}
|
| 806 |
+
|
| 807 |
+
backup_config_copy() {
|
| 808 |
+
local src="$1"
|
| 809 |
+
[ -f "$src" ] || return 0
|
| 810 |
+
local stamp backup
|
| 811 |
+
stamp="$(date +%Y%m%d-%H%M%S)"
|
| 812 |
+
backup="${src}.backup.${stamp}"
|
| 813 |
+
cp -a "$src" "$backup" 2>/dev/null || cp "$src" "$backup" 2>/dev/null || true
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
# Write config
|
| 817 |
EXISTING_CONFIG="/home/node/.openclaw/openclaw.json"
|
| 818 |
WHATSAPP_CONFIG_ENABLED=false
|
|
|
|
| 850 |
| if (($injectedModelsProviders | length) > 0) then
|
| 851 |
($injectedModelsProviders | to_entries) as $entries
|
| 852 |
| reduce $entries[] as $e (.;
|
| 853 |
+
(($desired.models.providers[$e.key] // {}) * {models: (($e.value.models // []) | unique_by(.id))}) as $desiredProvider
|
| 854 |
+
| .models.providers[$e.key] = ((.models.providers[$e.key] // {}) * $desiredProvider)
|
| 855 |
)
|
| 856 |
else
|
| 857 |
.
|
|
|
|
| 882 |
"$EXISTING_CONFIG" 2>/dev/null)
|
| 883 |
|
| 884 |
if [ -n "$PATCHED" ]; then
|
| 885 |
+
backup_config_copy "$EXISTING_CONFIG"
|
| 886 |
+
if write_json_atomic "$EXISTING_CONFIG" "$PATCHED"; then
|
| 887 |
+
echo "Config patched successfully."
|
| 888 |
+
else
|
| 889 |
+
echo "Patch produced invalid JSON β writing fresh config."
|
| 890 |
+
write_json_atomic "$EXISTING_CONFIG" "$CONFIG_JSON" || { echo "ERROR: could not write valid fallback config" >&2; exit 1; }
|
| 891 |
+
fi
|
| 892 |
else
|
| 893 |
+
echo "Patch failed."
|
| 894 |
+
# Validate only on patch failure (as requested). If restored config is invalid,
|
| 895 |
+
# quarantine it and regenerate from runtime config; otherwise keep it untouched.
|
| 896 |
+
if ! validate_json_file "$EXISTING_CONFIG"; then
|
| 897 |
+
echo "Restored config is invalid JSON β backing up and regenerating from runtime config."
|
| 898 |
+
cp "$EXISTING_CONFIG" "${EXISTING_CONFIG}.invalid.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
|
| 899 |
+
backup_config_copy "$EXISTING_CONFIG"
|
| 900 |
+
write_json_atomic "$EXISTING_CONFIG" "$CONFIG_JSON" || { echo "ERROR: could not write valid fallback config" >&2; exit 1; }
|
| 901 |
+
else
|
| 902 |
+
echo "Patch failed but restored config is valid β keeping existing config unchanged."
|
| 903 |
+
fi
|
| 904 |
fi
|
| 905 |
else
|
| 906 |
echo "No restored config β writing fresh config..."
|
| 907 |
+
write_json_atomic "$EXISTING_CONFIG" "$CONFIG_JSON" || { echo "ERROR: could not write valid config" >&2; exit 1; }
|
| 908 |
fi
|
| 909 |
chmod 600 "$EXISTING_CONFIG"
|
| 910 |
|