Spaces:
Running
Running
File size: 29,101 Bytes
9918f43 f8381b8 9918f43 f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e 88245f7 f8381b8 bcd961e 88245f7 f8381b8 88245f7 bcd961e f8381b8 bcd961e f8381b8 88245f7 9918f43 88245f7 9918f43 88245f7 f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e 9918f43 bcd961e f8381b8 9918f43 f8381b8 bcd961e f8381b8 bcd961e f8381b8 88245f7 9918f43 88245f7 9918f43 88245f7 3fb99da 88245f7 bcd961e f8381b8 88245f7 bcd961e 88245f7 bcd961e f8381b8 88245f7 f8381b8 88245f7 f8381b8 88245f7 bcd961e 9918f43 f8381b8 88245f7 bcd961e 88245f7 f8381b8 88245f7 f8381b8 88245f7 9918f43 88245f7 9918f43 88245f7 f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 9918f43 f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 bcd961e f8381b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 | """LLM wrapper, API-key pool with rate limiting, JSON helpers, and a
LangGraph file checkpointer.
Contents:
* :class:`GeminiLLM` β synchronous wrapper around ``google-genai`` with
Pydantic-typed structured output (:meth:`GeminiLLM.call_typed`) and a
grounded-search variant (:meth:`GeminiLLM.call_grounded`).
* :func:`pydantic_to_gemini_schema` β converts a Pydantic model to a
``response_schema`` dict accepted by the Gemini API.
* :class:`APIPoolManager` β round-robin Gemini keys with optional RPM/RPD
enforcement.
* :func:`extract_and_parse_json` β measured JSON-repair fallback for the
rare path where ``response_schema`` is unavailable.
* :class:`FileCheckpointSaver` β pickles LangGraph checkpoints to disk so
long-running sessions survive a process restart.
"""
from __future__ import annotations
import json
import os
import pickle
import re
import time
from collections import deque
from dataclasses import dataclass, field
from datetime import date, datetime
from threading import Lock
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
from google import genai
from google.genai import types
from json_repair import repair_json
from langgraph.checkpoint.base import BaseCheckpointSaver
from pydantic import BaseModel, ValidationError
from config import get_settings
from logging_setup import get_logger
_logger = get_logger("utils")
_llm_logger = get_logger("llm.gemini")
_pool_logger = get_logger("utils.api_pool")
T = TypeVar("T", bound=BaseModel)
# --- Parse metrics -------------------------------------------------------------
@dataclass
class ParseMetrics:
"""Counts native-vs-fallback parses across the process.
Native parses come from Gemini's ``response_schema``; fallback parses
use :func:`extract_and_parse_json` (regex / ``json_repair``). A healthy
deployment should see ``fallback_parses`` close to zero β anything
higher is a signal the prompt or schema needs work. The eval harness
surfaces both counters.
"""
native_parses: int = 0 # response.parsed worked first try
fallback_parses: int = 0 # had to invoke extract_and_parse_json
schema_failures: int = 0 # output failed Pydantic validation altogether
by_model: Dict[str, Dict[str, int]] = field(default_factory=dict)
def record(self, model: str, kind: str) -> None:
if kind == "native":
self.native_parses += 1
elif kind == "fallback":
self.fallback_parses += 1
elif kind == "failure":
self.schema_failures += 1
slot = self.by_model.setdefault(model, {"native": 0, "fallback": 0, "failure": 0})
slot[kind] = slot.get(kind, 0) + 1
_parse_metrics = ParseMetrics()
def get_parse_metrics() -> ParseMetrics:
"""Return the global parse-metrics singleton (read-only-ish)."""
return _parse_metrics
# --- Debug-scope helper --------------------------------------------------------
def should_debug(scope: str, name: str) -> bool:
"""Return True when this scope/name is enabled in ``settings.debug_scopes``."""
settings = get_settings()
if not settings.debug_mode:
return False
if scope not in settings.debug_scopes:
return False
scopes_list = settings.debug_scopes[scope]
return "all" in scopes_list or name in scopes_list
# --- Filesystem logging --------------------------------------------------------
def save_to_json(data: Dict[str, Any], filename: str, subdirectory: Optional[str] = None) -> None:
"""Persist a structured payload to ``settings.log_dir`` if logging is on."""
settings = get_settings()
if settings.log_dir is None:
return
log_dir = os.path.join(settings.log_dir, subdirectory) if subdirectory else settings.log_dir
os.makedirs(log_dir, exist_ok=True)
# Filenames may contain ``:`` from ISO timestamps which is invalid on Windows.
safe_name = filename.replace(":", "-")
filepath = os.path.join(log_dir, safe_name)
with open(filepath, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, default=str)
# --- LLM abstractions ----------------------------------------------------------
class LLM:
"""Minimal LLM contract: callable returning a list with one string."""
def __call__(self, prompt: str, **kwargs: Any) -> list[str]: # pragma: no cover - interface
raise NotImplementedError
def format_prompt(self, messages: List[Dict[str, str]]) -> str: # pragma: no cover - interface
raise NotImplementedError
# --- Gemini schema conversion -------------------------------------------------
# Keys Gemini's response_schema either rejects or silently mishandles.
_GEMINI_DROP_KEYS = frozenset(
{
"additionalProperties",
"$defs",
"$ref",
"$schema",
"title",
"default",
"discriminator",
"examples",
"readOnly",
"writeOnly",
"definitions",
}
)
def pydantic_to_gemini_schema(model_cls: Type[BaseModel]) -> Dict[str, Any]:
"""Convert a Pydantic model into a Gemini-safe response_schema dict.
Pydantic's ``model_json_schema()`` emits keys (``$ref``, ``$defs``,
``additionalProperties``, ``title``, ``default``) that Gemini's API does
not accept. This helper:
1. Inlines every ``$ref`` against ``$defs``.
2. Recursively strips the unsupported keys.
3. Promotes ``anyOf: [X, {"type": "null"}]`` (Pydantic's idiom for
``Optional[X]``) into ``nullable: true`` on ``X``.
Returns a plain ``dict`` suitable for ``GenerateContentConfig.response_schema``.
"""
raw = model_cls.model_json_schema()
defs = raw.get("$defs", {}) or raw.get("definitions", {}) or {}
def _resolve(node: Any) -> Any:
if isinstance(node, list):
return [_resolve(n) for n in node]
if not isinstance(node, dict):
return node
# Inline $ref.
ref = node.get("$ref")
if isinstance(ref, str) and ref.startswith("#/$defs/"):
name = ref.rsplit("/", 1)[-1]
target = defs.get(name)
if target is not None:
merged = {k: v for k, v in node.items() if k != "$ref"}
# The referenced definition wins for type/structure; extras
# on the wrapping node (e.g. description) are preserved.
return _resolve({**target, **merged})
# Collapse Optional[X] = anyOf [X, {"type": "null"}] into nullable.
if "anyOf" in node and isinstance(node["anyOf"], list):
non_null = [s for s in node["anyOf"] if s.get("type") != "null"]
has_null = len(non_null) != len(node["anyOf"])
if has_null and len(non_null) == 1:
base = _resolve(non_null[0])
merged = {k: v for k, v in node.items() if k != "anyOf"}
merged.update(base)
merged["nullable"] = True
return _resolve(merged)
out: Dict[str, Any] = {}
for k, v in node.items():
if k in _GEMINI_DROP_KEYS:
continue
out[k] = _resolve(v)
return out
sanitized = _resolve(raw)
# Drop any residual top-level keys that may have slipped through.
return {k: v for k, v in sanitized.items() if k not in _GEMINI_DROP_KEYS}
class GeminiLLM(LLM):
"""Synchronous Gemini wrapper with API-key pooling.
Exposes three entry points:
* :meth:`__call__` β free-text streaming call returning a single string.
* :meth:`call_typed` β structured-output call constrained to a Pydantic
model via Gemini's ``response_schema``.
* :meth:`call_grounded` β single round-trip with Gemini's built-in
``google_search`` tool; returns text, citations, and the search
queries the model ran.
Every call goes through the supplied :class:`APIPoolManager` for key
rotation and (optional) RPM/RPD enforcement.
"""
def __init__(
self,
model_name: str,
structured_output: bool = False,
thinking_budget: int = 300,
manager: Optional["APIPoolManager"] = None,
**kwargs: Any,
) -> None:
self.model_name = model_name
self.structured_output = structured_output
self.thinking_budget = thinking_budget
self.kwargs = kwargs
self.manager = manager
self.is_gemma = "gemma" in model_name.lower()
if self.is_gemma:
# Gemma family doesn't support thinking_config or JSON response schema.
self.structured_output = False
self.thinking_budget = None
def __call__(self, prompt: str, **kwargs: Any) -> list[str]:
"""Untyped streaming call. Returns ``[response_text]``.
Backwards-compat path used by code that still parses JSON-from-text.
Prefer :meth:`call_typed` when a Pydantic schema is available.
"""
text, _ = self._invoke(prompt, response_schema=None, **kwargs)
return [text]
def call_typed(
self,
prompt: str,
response_model: Type[T],
**kwargs: Any,
) -> Optional[T]:
"""Call Gemini with constrained-decoded JSON matching ``response_model``.
Returns a parsed instance of ``response_model``, or ``None`` if every
parse strategy failed (in which case the parse-metrics ``schema_failures``
counter is incremented so the eval harness can spot it).
"""
text, parsed = self._invoke(prompt, response_schema=response_model, **kwargs)
# Gemini occasionally wraps a single object in a one-element list even
# when the schema is object-typed. Unwrap before validation.
def _unwrap(value: Any) -> Any:
if isinstance(value, list) and len(value) == 1 and isinstance(value[0], (dict, BaseModel)):
return value[0]
return value
parsed = _unwrap(parsed)
# Strategy 1: SDK already parsed it for us via response_schema.
if isinstance(parsed, response_model):
_parse_metrics.record(self.model_name, "native")
return parsed
# Strategy 2: SDK gave us a dict; try to validate it.
if isinstance(parsed, dict):
try:
instance = response_model.model_validate(parsed)
_parse_metrics.record(self.model_name, "native")
return instance
except ValidationError as e:
_llm_logger.debug("response.parsed dict failed Pydantic validation: %s", e)
# Strategy 3: regex / json_repair fallback on the raw text.
try:
data = _unwrap(extract_and_parse_json(text))
instance = response_model.model_validate(data)
_parse_metrics.record(self.model_name, "fallback")
_llm_logger.warning(
"Used JSON-repair fallback for %s on model %s β fix the prompt or schema",
response_model.__name__,
self.model_name,
)
return instance
except (ValidationError, Exception) as e: # noqa: BLE001
_parse_metrics.record(self.model_name, "failure")
_llm_logger.error(
"Failed to parse %s from %s response: %s",
response_model.__name__,
self.model_name,
str(e),
)
return None
def call_grounded(
self,
prompt: str,
**kwargs: Any,
) -> Tuple[str, List[Dict[str, str]], List[str]]:
"""Single grounded call using Gemini's built-in ``google_search`` tool.
Gemini handles the whole search loop internally: it generates queries,
runs them against Google Search, synthesises an answer, and returns
``groundingMetadata`` with the sources it relied on.
Returns ``(text, citations, queries)`` where ``citations`` is a list
of ``{"title": str, "uri": str}`` derived from
``grounding_chunks`` and ``queries`` is the actual list of search
strings Gemini ran (useful for debugging).
"""
if self.manager is None:
raise ValueError("APIPoolManager must be provided for rate limiting.")
if self.is_gemma:
raise ValueError("Gemma models do not support google_search grounding.")
merged_kwargs = {**self.kwargs, **kwargs}
api_key = self.manager.get_next_key(self.model_name)
try:
client = genai.Client(api_key=api_key)
contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
generate_content_config = types.GenerateContentConfig(
tools=[types.Tool(google_search=types.GoogleSearch())],
max_output_tokens=merged_kwargs.get("max_tokens", 5120),
temperature=merged_kwargs.get("temperature", 0.3),
)
start_time = time.time()
response = client.models.generate_content(
model=self.model_name,
contents=contents,
config=generate_content_config,
)
completion_time = time.time()
if self.manager.rate_limits is not None:
self.manager.record_usage(api_key, self.model_name, completion_time)
text = (response.text or "").strip()
citations: List[Dict[str, str]] = []
queries: List[str] = []
try:
candidate = response.candidates[0]
gm = getattr(candidate, "grounding_metadata", None)
if gm is not None:
for chunk in getattr(gm, "grounding_chunks", None) or []:
web = getattr(chunk, "web", None)
if web and getattr(web, "uri", None):
citations.append(
{"title": web.title or web.uri, "uri": web.uri}
)
queries = list(getattr(gm, "web_search_queries", None) or [])
except (AttributeError, IndexError):
pass
_llm_logger.debug(
"Grounded LLM call completed for %s using key β¦%s in %.2fs (%d citations, %d queries)",
self.model_name,
api_key[-4:],
completion_time - start_time,
len(citations),
len(queries),
)
return text, citations, queries
except Exception as e: # noqa: BLE001
_llm_logger.warning(
"Grounded LLM call failed for %s using key β¦%s: %s",
self.model_name,
api_key[-4:],
str(e),
)
return f"Error: grounded LLM call failed - {str(e)}", [], []
def _invoke(
self,
prompt: str,
response_schema: Optional[Type[BaseModel]] = None,
**kwargs: Any,
) -> Tuple[str, Any]:
"""Single Gemini round-trip. Returns ``(text, response.parsed)``.
``parsed`` is whatever the SDK populated on ``response.parsed`` β
usually a Pydantic instance when ``response_schema`` is supplied, ``None``
otherwise.
"""
if self.manager is None:
raise ValueError("APIPoolManager must be provided for rate limiting.")
merged_kwargs = {**self.kwargs, **kwargs}
api_key = self.manager.get_next_key(self.model_name)
try:
client = genai.Client(api_key=api_key)
contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
generate_content_config = self._build_config(merged_kwargs, response_schema=response_schema)
start_time = time.time()
# Non-streaming when we want response.parsed (the streaming API
# doesn't populate it). Streaming for free-text plain calls.
if response_schema is not None:
response = client.models.generate_content(
model=self.model_name,
contents=contents,
config=generate_content_config,
)
response_text = response.text or ""
parsed = getattr(response, "parsed", None)
else:
response_text = ""
parsed = None
for chunk in client.models.generate_content_stream(
model=self.model_name,
contents=contents,
config=generate_content_config,
):
if chunk.text:
response_text += chunk.text
completion_time = time.time()
if self.manager.rate_limits is not None:
self.manager.record_usage(api_key, self.model_name, completion_time)
_llm_logger.debug(
"LLM call completed for %s using key β¦%s in %.2fs (schema=%s)",
self.model_name,
api_key[-4:],
completion_time - start_time,
response_schema.__name__ if response_schema else "none",
)
return response_text.strip(), parsed
except Exception as e: # noqa: BLE001 β broad on purpose; rotate key on any provider error
_llm_logger.warning(
"LLM call failed for %s using key β¦%s: %s",
self.model_name,
api_key[-4:],
str(e),
)
return f"Error: LLM call failed - {str(e)}", None
def _build_config(
self,
merged_kwargs: Dict[str, Any],
response_schema: Optional[Type[BaseModel]] = None,
) -> types.GenerateContentConfig:
max_tokens = merged_kwargs.get("max_tokens", 5120)
temperature = merged_kwargs.get("temperature", 0.3)
if self.is_gemma:
# Gemma can't do thinking_config or response_schema.
return types.GenerateContentConfig(
response_mime_type="text/plain",
max_output_tokens=max_tokens,
temperature=temperature,
)
thinking_cfg = types.ThinkingConfig(thinking_budget=self.thinking_budget)
if response_schema is not None:
# Gemini's response_schema accepts a SUBSET of OpenAPI 3.0; passing
# the Pydantic class direct lets the SDK emit `additionalProperties`
# / `$ref` / `$defs` / `title` / `default`, which the API rejects
# ("additionalProperties is not supported in the Gemini API").
# We sanitize to a dict the API actually accepts.
schema_dict = pydantic_to_gemini_schema(response_schema)
return types.GenerateContentConfig(
thinking_config=thinking_cfg,
response_mime_type="application/json",
response_schema=schema_dict,
max_output_tokens=max_tokens,
temperature=temperature,
)
mime = "application/json" if self.structured_output else "text/plain"
return types.GenerateContentConfig(
thinking_config=thinking_cfg,
response_mime_type=mime,
max_output_tokens=max_tokens,
temperature=temperature,
)
def format_prompt(self, messages: List[Dict[str, str]]) -> str:
prompt = ""
for msg in messages:
if msg["role"] == "system":
prompt += f"System: {msg['content']}\n"
elif msg["role"] == "user":
prompt += f"User: {msg['content']}\n"
elif msg["role"] == "assistant":
prompt += f"Assistant: {msg['content']}\n"
prompt += "Assistant:"
return prompt
# --- API key pool with optional rate limiting ----------------------------------
class APIPoolManager:
"""Round-robin Gemini API keys with per-key RPM/RPD enforcement.
``rate_limits`` is ``{model_name: (rpm, rpd)}``. When ``None``, the pool
just rotates keys without any throttling.
"""
def __init__(
self,
api_keys: List[str],
rate_limits: Optional[Dict[str, Tuple[int, int]]] = None,
) -> None:
self.api_keys = list(api_keys)
self.active_keys = list(api_keys)
self.rate_limits = rate_limits
self.usage: Dict[str, Dict[str, Dict[str, Any]]] = {}
self.current_index = 0
self.lock = Lock()
if rate_limits is not None:
for key in api_keys:
self.usage[key] = {}
for model, (rpm, _rpd) in rate_limits.items():
self.usage[key][model] = {
"timestamps": deque(maxlen=max(1, rpm)),
"daily_requests": 0,
"last_day": date.today(),
}
# --- internal helpers ------------------------------------------------------
def _refresh_daily(self, key: str, model: str) -> None:
usage = self.usage[key][model]
today = date.today()
if usage["last_day"] < today:
usage["daily_requests"] = 0
usage["last_day"] = today
def _key_is_rpd_ok(self, key: str, model: str) -> bool:
if self.rate_limits is None:
return True
self._refresh_daily(key, model)
_, rpd = self.rate_limits[model]
if self.usage[key][model]["daily_requests"] >= rpd:
if key in self.active_keys:
self.active_keys.remove(key)
return False
return True
def _key_wait_info(self, key: str, model: str) -> Tuple[float, float]:
if self.rate_limits is None:
return 0.0, 0.0
rpm, _ = self.rate_limits[model]
usage = self.usage[key][model]
now = time.time()
timestamps = usage["timestamps"]
while timestamps and now - timestamps[0] > 60:
timestamps.popleft()
wait_slot = 0.0
if len(timestamps) >= rpm:
oldest = timestamps[0]
wait_slot = max(0.0, 60.0 - (now - oldest))
wait_spacing = 0.0
if timestamps:
time_since_last = now - timestamps[-1]
min_interval = 60.0 / rpm if rpm > 0 else 0.0
wait_spacing = max(0.0, min_interval - time_since_last)
return wait_slot, wait_spacing
def can_use_now(self, key: str, model: str) -> bool:
if key not in self.active_keys:
return False
if not self._key_is_rpd_ok(key, model):
return False
wait_slot, wait_spacing = self._key_wait_info(key, model)
return wait_slot <= 0.0 and wait_spacing <= 0.0
# --- public API ------------------------------------------------------------
def get_next_key(self, model: str, max_sleep_once: bool = True) -> str:
with self.lock:
if not self.active_keys:
raise RuntimeError("No available API keys left due to rate limits.")
n = len(self.active_keys)
for i in range(n):
idx = (self.current_index + i) % n
key = self.active_keys[idx]
if self.can_use_now(key, model):
self.current_index = (idx + 1) % max(1, len(self.active_keys))
return key
min_wait: Optional[float] = None
for key in list(self.active_keys):
if not self._key_is_rpd_ok(key, model):
continue
wait_slot, wait_spacing = self._key_wait_info(key, model)
wait = max(wait_slot, wait_spacing)
if min_wait is None or wait < min_wait:
min_wait = wait
if min_wait is None:
raise RuntimeError("No available API keys left (RPD exhausted).")
if min_wait and min_wait > 0:
_pool_logger.debug("Waiting %.2fs for next API slot", min_wait)
time.sleep(min_wait)
return self.get_next_key(model, max_sleep_once=True)
def record_usage(self, key: str, model: str, timestamp: Optional[float] = None) -> None:
if self.rate_limits is None:
return
t = timestamp or time.time()
with self.lock:
if key not in self.active_keys:
return
self._refresh_daily(key, model)
self.usage[key][model]["timestamps"].append(t)
self.usage[key][model]["daily_requests"] += 1
_, rpd = self.rate_limits[model]
if self.usage[key][model]["daily_requests"] >= rpd:
if key in self.active_keys:
self.active_keys.remove(key)
# --- Factory -------------------------------------------------------------------
def create_llm(config: dict, manager: APIPoolManager) -> LLM:
"""Instantiate an LLM from a config dict."""
if config["type"] == "gemini":
return GeminiLLM(
model_name=config["model_name"],
structured_output=config.get("structured_output", False),
thinking_budget=config.get("thinking_budget", 300),
manager=manager,
**config.get("params", {}),
)
raise ValueError(f"Unknown LLM type: {config['type']}")
# --- JSON helpers --------------------------------------------------------------
def extract_and_parse_json(text: str) -> Dict[str, Any]:
"""Best-effort JSON extraction with a chain of fallbacks.
Reserved for the measured fallback path β :meth:`GeminiLLM.call_typed`
prefers Gemini's native ``response_schema`` and only falls through here
when the SDK returns no parsed object.
"""
try:
return json.loads(text.strip())
except Exception:
pass
fenced = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
if fenced:
try:
return json.loads(fenced.group(1))
except Exception:
pass
braces = re.search(r"\{.*\}", text, re.DOTALL)
if braces:
try:
return json.loads(repair_json(braces.group(0)))
except Exception:
pass
try:
return json.loads(repair_json(text))
except Exception as e:
_logger.warning("All JSON parsing strategies failed: %s", str(e))
return {
"thought": f"JSON parsing failed: {str(e)}",
"action": "compose_response",
"params": {"text": f"I encountered an error processing your request. Original response: {text[:200]}..."},
"_parse_error": True,
"_original_text": text,
}
def set_nested(d: Dict[str, Any], key: str, value: Any) -> None:
"""Assign ``value`` at a dotted-path key inside a nested dict."""
keys = key.split(".")
for k in keys[:-1]:
d = d.setdefault(k, {})
d[keys[-1]] = value
def get_memory_summary(memory: Dict[str, Any], partitions: Optional[List[str]] = None) -> str:
"""Format selected memory partitions as JSON for prompt embedding."""
if partitions is None:
partitions = ["user_profile", "medical_history", "flags_and_assessments", "plans"]
summary: Dict[str, Any] = {}
for partition in partitions:
summary[partition] = memory[partition] if partition in memory and memory[partition] else "empty"
return json.dumps(summary, indent=2, default=str)
def update_memory_partition(memory: Dict[str, Any], partition: str, data: Any) -> None:
"""Merge ``data`` into ``memory[partition]`` (or assign when types disagree)."""
if partition not in memory:
memory[partition] = {}
if isinstance(data, dict) and isinstance(memory[partition], dict):
memory[partition].update(data)
else:
memory[partition] = data
_logger.debug("Updated memory partition %r with new data", partition)
# --- Checkpointer --------------------------------------------------------------
class FileCheckpointSaver(BaseCheckpointSaver):
"""Pickle LangGraph checkpoints to ``directory/checkpoint_<thread_id>.pkl``."""
def __init__(self, directory: str) -> None:
self.directory = directory
os.makedirs(directory, exist_ok=True)
def put(self, config: Dict[str, Any], checkpoint: Dict[str, Any]) -> None:
thread_id = config.get("configurable", {}).get("thread_id", "default")
filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
with open(filepath, "wb") as f:
pickle.dump(checkpoint, f)
def get(self, config: Dict[str, Any]) -> Optional[Dict[str, Any]]:
thread_id = config.get("configurable", {}).get("thread_id", "default")
filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
if os.path.exists(filepath):
with open(filepath, "rb") as f:
return pickle.load(f)
return None
__all__ = [
"APIPoolManager",
"FileCheckpointSaver",
"GeminiLLM",
"LLM",
"create_llm",
"extract_and_parse_json",
"get_memory_summary",
"save_to_json",
"set_nested",
"should_debug",
"update_memory_partition",
]
|