Spaces:

nhttdo
/

MedChat

Running

MedChat / src /key_manager.py

mnhat19

feat: full optimization - Groq LLM, disease cache, deploy configs

b59fc2c 6 days ago

2.55 kB

	"""
	GroqKeyManager — round-robin key rotation with immediate failover on 429.

	Usage:
	mgr = GroqKeyManager([KEY_1, KEY_2])
	key = mgr.current() # get current key
	key = mgr.rotate() # advance to next key (call on 429)
	llm = mgr.build_llm(model) # ChatGroq with current key
	"""
	import threading
	import time
	import logging
	from typing import List

	from langchain_groq import ChatGroq

	logger = logging.getLogger(__name__)


	class GroqKeyManager:
	"""Thread-safe round-robin Groq API key manager."""

	def __init__(self, keys: List[str], model: str = "llama-3.3-70b-versatile"):
	self._keys = [k.strip() for k in keys if k and k.strip()]
	if not self._keys:
	raise ValueError("GroqKeyManager: no valid API keys provided")
	self._model = model
	self._idx = 0
	self._lock = threading.Lock()
	# per-key cooldown tracking: key → expiry timestamp
	self._cooldown: dict[str, float] = {}
	logger.info(f"[KeyManager] {len(self._keys)} Groq key(s) loaded, model={model}")

	def current(self) -> str:
	with self._lock:
	return self._keys[self._idx % len(self._keys)]

	def rotate(self) -> str:
	"""Advance to next available (non-cooled-down) key. Returns the new key."""
	with self._lock:
	now = time.time()
	for _ in range(len(self._keys)):
	self._idx = (self._idx + 1) % len(self._keys)
	key = self._keys[self._idx]
	if now >= self._cooldown.get(key, 0):
	logger.warning(f"[KeyManager] Rotated to key index {self._idx}")
	return key
	# all keys on cooldown — return current and let tenacity wait
	logger.warning("[KeyManager] All keys on cooldown, returning current key")
	return self._keys[self._idx % len(self._keys)]

	def mark_rate_limited(self, key: str, cooldown_secs: int = 62):
	"""Mark a key as rate-limited for cooldown_secs seconds."""
	with self._lock:
	self._cooldown[key] = time.time() + cooldown_secs
	logger.warning(f"[KeyManager] Key ...{key[-6:]} cooled down for {cooldown_secs}s")

	def build_llm(self, temperature: float = 0) -> ChatGroq:
	"""Return a ChatGroq instance using the current key."""
	return ChatGroq(
	model=self._model,
	api_key=self.current(),
	temperature=temperature,
	max_tokens=800, # cap output tokens to save quota
	)