File size: 1,515 Bytes
b75609c b37a35c b75609c b37a35c 13317d6 b37a35c 523e34e b75609c 13317d6 523e34e b37a35c b75609c 523e34e b37a35c b75609c b37a35c 523e34e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableConfig
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type
)
from google.api_core import exceptions
import time
# Rate limiting settings
REQUESTS_PER_MINUTE = 15
MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE # Minimum time between requests
last_request_time = 0
def rate_limit():
"""Implement rate limiting"""
global last_request_time
current_time = time.time()
time_since_last_request = current_time - last_request_time
if time_since_last_request < MINIMUM_PAUSE:
time.sleep(MINIMUM_PAUSE - time_since_last_request)
last_request_time = time.time()
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=2, min=4, max=60),
retry=retry_if_exception_type(exceptions.ResourceExhausted)
)
def _call_llm_with_retry(*args, **kwargs):
"""Wrapper function to implement retry logic"""
rate_limit() # Apply rate limiting before each attempt
return ChatGoogleGenerativeAI(*args, **kwargs)
def get_llm(
llm_provider_api_key: str,
model_name: str = "gemini-2.0-flash-lite", # Default model aligned with AGENT_MODEL_NAME
):
"""Get a rate-limited and retry-enabled LLM instance"""
return _call_llm_with_retry(
google_api_key=llm_provider_api_key,
temperature=0.7,
max_retries=3,
model=model_name,
convert_system_message_to_human=True,
)
|