Spaces:
Running
Running
| /** | |
| * src/managers/KeyManager.ts | |
| * | |
| * Manages a pool of API keys for a single LLM provider with: | |
| * - Round-robin selection across active keys | |
| * - Automatic per-key cooldown on HTTP 429 (configurable duration) | |
| * - Async waiting if ALL keys are in cooldown | |
| * - Dynamic concurrency limit recommendation: min(activeKeys Γ 2, 20) | |
| * | |
| * Usage: | |
| * const mgr = new KeyManager(['key1', 'key2', 'key3'], 'groq'); | |
| * const key = await mgr.getKey(); // non-blocking if any key available | |
| * // ... if 429 error ... | |
| * mgr.blacklist(key); | |
| */ | |
| import { logger } from '../utils/logger'; | |
| // βββ Types ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export type LLMProvider = 'groq' | 'gemini' | 'sambanova'; | |
| // βββ KeyManager βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export class KeyManager { | |
| private readonly provider: LLMProvider; | |
| private readonly keys: readonly string[]; | |
| private readonly cooldownMs: number; | |
| /** | |
| * Maps an API key β the timestamp (ms since epoch) when it was blacklisted. | |
| * Entries are pruned lazily on each getKey() call. | |
| */ | |
| private readonly blacklisted = new Map<string, number>(); | |
| /** | |
| * Round-robin cursor within the *active* (non-blacklisted) key list. | |
| * Resets automatically as keys come and go from the blacklist. | |
| */ | |
| private rrIndex = 0; | |
| constructor( | |
| keys: string[], | |
| provider: LLMProvider, | |
| cooldownMs = 65_000 // 65 s β slightly above the standard 60 s Groq cooldown | |
| ) { | |
| if (keys.length === 0) { | |
| throw new Error( | |
| `KeyManager[${provider}]: received 0 API keys. ` + | |
| `Check your environment variable.` | |
| ); | |
| } | |
| this.keys = Object.freeze([...keys]); | |
| this.provider = provider; | |
| this.cooldownMs = cooldownMs; | |
| logger.info( | |
| `KeyManager[${provider}]: initialized with ${keys.length} key(s), ` + | |
| `cooldown=${cooldownMs}ms` | |
| ); | |
| } | |
| // βββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Returns the number of keys NOT currently in cooldown. | |
| */ | |
| get activeCount(): number { | |
| this.pruneBlacklist(); | |
| return this.keys.filter(k => !this.blacklisted.has(k)).length; | |
| } | |
| /** | |
| * Recommended p-limit concurrency value for this provider. | |
| * Formula: min(activeKeys Γ 2, 20), minimum of 1. | |
| */ | |
| get concurrencyLimit(): number { | |
| return Math.min(Math.max(this.activeCount * 2, 1), 20); | |
| } | |
| /** | |
| * Returns the next available API key using round-robin rotation. | |
| * | |
| * If all keys are in cooldown, this method WAITS asynchronously for | |
| * the earliest key to recover rather than throwing β ensuring the | |
| * pipeline self-heals without crashing. | |
| */ | |
| async getKey(): Promise<string> { | |
| this.pruneBlacklist(); | |
| const available = this.keys.filter(k => !this.blacklisted.has(k)); | |
| if (available.length > 0) { | |
| // Advance round-robin and pick | |
| const key = available[this.rrIndex % available.length]; | |
| this.rrIndex = (this.rrIndex + 1) % available.length; | |
| return key; | |
| } | |
| // ββ All keys are in cooldown β compute the shortest wait ββββββββββββ | |
| const cooldownEntries = Array.from(this.blacklisted.values()); | |
| const earliestBlacklist = Math.min(...cooldownEntries); | |
| const waitMs = Math.max(0, earliestBlacklist + this.cooldownMs - Date.now()); | |
| logger.warn( | |
| `KeyManager[${this.provider}]: ALL ${this.keys.length} key(s) in cooldown. ` + | |
| `Waiting ${Math.ceil(waitMs / 1000)}s for recovery...` | |
| ); | |
| await new Promise<void>(resolve => setTimeout(resolve, waitMs + 100 /* tiny buffer */)); | |
| // Recursive call β at least one key should now be active | |
| return this.getKey(); | |
| } | |
| /** | |
| * Marks a key as rate-limited. It will be skipped until the cooldown expires. | |
| * | |
| * @param key - The exact key string returned by `getKey()` | |
| */ | |
| blacklist(key: string): void { | |
| if (!this.keys.includes(key)) { | |
| logger.warn(`KeyManager[${this.provider}]: attempted to blacklist unknown key β ignoring`); | |
| return; | |
| } | |
| const alreadyBlacklisted = this.blacklisted.has(key); | |
| if (!alreadyBlacklisted) { | |
| this.blacklisted.set(key, Date.now()); | |
| logger.warn( | |
| `KeyManager[${this.provider}]: key ***${key.slice(-4)} blacklisted. ` + | |
| `${this.activeCount} active key(s) remaining.` | |
| ); | |
| } | |
| } | |
| /** | |
| * Returns a snapshot of the current key pool status (for health endpoints). | |
| */ | |
| getStatus(): { provider: LLMProvider; total: number; active: number; coolingDown: number } { | |
| this.pruneBlacklist(); | |
| const coolingDown = this.blacklisted.size; | |
| return { | |
| provider: this.provider, | |
| total: this.keys.length, | |
| active: this.keys.length - coolingDown, | |
| coolingDown, | |
| }; | |
| } | |
| // βββ Private Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Removes keys from the blacklist whose cooldown duration has elapsed. | |
| * Called lazily before every key selection. | |
| */ | |
| private pruneBlacklist(): void { | |
| const now = Date.now(); | |
| for (const [key, blacklistedAt] of this.blacklisted.entries()) { | |
| if (now - blacklistedAt >= this.cooldownMs) { | |
| this.blacklisted.delete(key); | |
| logger.info( | |
| `KeyManager[${this.provider}]: key ***${key.slice(-4)} restored from cooldown` | |
| ); | |
| } | |
| } | |
| } | |
| } | |