Katya Beresneva
fix
13317d6
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableConfig
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type
)
from google.api_core import exceptions
import time
# Rate limiting settings
REQUESTS_PER_MINUTE = 15
MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE # Minimum time between requests
last_request_time = 0
def rate_limit():
"""Implement rate limiting"""
global last_request_time
current_time = time.time()
time_since_last_request = current_time - last_request_time
if time_since_last_request < MINIMUM_PAUSE:
time.sleep(MINIMUM_PAUSE - time_since_last_request)
last_request_time = time.time()
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=2, min=4, max=60),
retry=retry_if_exception_type(exceptions.ResourceExhausted)
)
def _call_llm_with_retry(*args, **kwargs):
"""Wrapper function to implement retry logic"""
rate_limit() # Apply rate limiting before each attempt
return ChatGoogleGenerativeAI(*args, **kwargs)
def get_llm(
llm_provider_api_key: str,
model_name: str = "gemini-2.0-flash-lite", # Default model aligned with AGENT_MODEL_NAME
):
"""Get a rate-limited and retry-enabled LLM instance"""
return _call_llm_with_retry(
google_api_key=llm_provider_api_key,
temperature=0.7,
max_retries=3,
model=model_name,
convert_system_message_to_human=True,
)