titan-server / src /managers /KeyManager.ts
M-hv1's picture
Upload 3 files
c6ec6e8 verified
Raw
History Blame Contribute Delete
5.88 kB
/**
* src/managers/KeyManager.ts
*
* Manages a pool of API keys for a single LLM provider with:
* - Round-robin selection across active keys
* - Automatic per-key cooldown on HTTP 429 (configurable duration)
* - Async waiting if ALL keys are in cooldown
* - Dynamic concurrency limit recommendation: min(activeKeys Γ— 2, 20)
*
* Usage:
* const mgr = new KeyManager(['key1', 'key2', 'key3'], 'groq');
* const key = await mgr.getKey(); // non-blocking if any key available
* // ... if 429 error ...
* mgr.blacklist(key);
*/
import { logger } from '../utils/logger';
// ─── Types ────────────────────────────────────────────────────────────────────
export type LLMProvider = 'groq' | 'gemini' | 'sambanova';
// ─── KeyManager ───────────────────────────────────────────────────────────────
export class KeyManager {
private readonly provider: LLMProvider;
private readonly keys: readonly string[];
private readonly cooldownMs: number;
/**
* Maps an API key β†’ the timestamp (ms since epoch) when it was blacklisted.
* Entries are pruned lazily on each getKey() call.
*/
private readonly blacklisted = new Map<string, number>();
/**
* Round-robin cursor within the *active* (non-blacklisted) key list.
* Resets automatically as keys come and go from the blacklist.
*/
private rrIndex = 0;
constructor(
keys: string[],
provider: LLMProvider,
cooldownMs = 65_000 // 65 s β€” slightly above the standard 60 s Groq cooldown
) {
if (keys.length === 0) {
throw new Error(
`KeyManager[${provider}]: received 0 API keys. ` +
`Check your environment variable.`
);
}
this.keys = Object.freeze([...keys]);
this.provider = provider;
this.cooldownMs = cooldownMs;
logger.info(
`KeyManager[${provider}]: initialized with ${keys.length} key(s), ` +
`cooldown=${cooldownMs}ms`
);
}
// ─── Public API ─────────────────────────────────────────────────────────
/**
* Returns the number of keys NOT currently in cooldown.
*/
get activeCount(): number {
this.pruneBlacklist();
return this.keys.filter(k => !this.blacklisted.has(k)).length;
}
/**
* Recommended p-limit concurrency value for this provider.
* Formula: min(activeKeys Γ— 2, 20), minimum of 1.
*/
get concurrencyLimit(): number {
return Math.min(Math.max(this.activeCount * 2, 1), 20);
}
/**
* Returns the next available API key using round-robin rotation.
*
* If all keys are in cooldown, this method WAITS asynchronously for
* the earliest key to recover rather than throwing β€” ensuring the
* pipeline self-heals without crashing.
*/
async getKey(): Promise<string> {
this.pruneBlacklist();
const available = this.keys.filter(k => !this.blacklisted.has(k));
if (available.length > 0) {
// Advance round-robin and pick
const key = available[this.rrIndex % available.length];
this.rrIndex = (this.rrIndex + 1) % available.length;
return key;
}
// ── All keys are in cooldown β€” compute the shortest wait ────────────
const cooldownEntries = Array.from(this.blacklisted.values());
const earliestBlacklist = Math.min(...cooldownEntries);
const waitMs = Math.max(0, earliestBlacklist + this.cooldownMs - Date.now());
logger.warn(
`KeyManager[${this.provider}]: ALL ${this.keys.length} key(s) in cooldown. ` +
`Waiting ${Math.ceil(waitMs / 1000)}s for recovery...`
);
await new Promise<void>(resolve => setTimeout(resolve, waitMs + 100 /* tiny buffer */));
// Recursive call β€” at least one key should now be active
return this.getKey();
}
/**
* Marks a key as rate-limited. It will be skipped until the cooldown expires.
*
* @param key - The exact key string returned by `getKey()`
*/
blacklist(key: string): void {
if (!this.keys.includes(key)) {
logger.warn(`KeyManager[${this.provider}]: attempted to blacklist unknown key β€” ignoring`);
return;
}
const alreadyBlacklisted = this.blacklisted.has(key);
if (!alreadyBlacklisted) {
this.blacklisted.set(key, Date.now());
logger.warn(
`KeyManager[${this.provider}]: key ***${key.slice(-4)} blacklisted. ` +
`${this.activeCount} active key(s) remaining.`
);
}
}
/**
* Returns a snapshot of the current key pool status (for health endpoints).
*/
getStatus(): { provider: LLMProvider; total: number; active: number; coolingDown: number } {
this.pruneBlacklist();
const coolingDown = this.blacklisted.size;
return {
provider: this.provider,
total: this.keys.length,
active: this.keys.length - coolingDown,
coolingDown,
};
}
// ─── Private Helpers ────────────────────────────────────────────────────
/**
* Removes keys from the blacklist whose cooldown duration has elapsed.
* Called lazily before every key selection.
*/
private pruneBlacklist(): void {
const now = Date.now();
for (const [key, blacklistedAt] of this.blacklisted.entries()) {
if (now - blacklistedAt >= this.cooldownMs) {
this.blacklisted.delete(key);
logger.info(
`KeyManager[${this.provider}]: key ***${key.slice(-4)} restored from cooldown`
);
}
}
}
}