File size: 1,515 Bytes
b75609c
b37a35c
 
 
 
 
 
 
 
 
b75609c
b37a35c
13317d6
b37a35c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523e34e
b75609c
 
13317d6
523e34e
b37a35c
 
b75609c
523e34e
b37a35c
b75609c
b37a35c
523e34e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableConfig
from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type
)
from google.api_core import exceptions
import time

# Rate limiting settings
REQUESTS_PER_MINUTE = 15
MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE  # Minimum time between requests
last_request_time = 0

def rate_limit():
    """Implement rate limiting"""
    global last_request_time
    current_time = time.time()
    time_since_last_request = current_time - last_request_time
    if time_since_last_request < MINIMUM_PAUSE:
        time.sleep(MINIMUM_PAUSE - time_since_last_request)
    last_request_time = time.time()

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=2, min=4, max=60),
    retry=retry_if_exception_type(exceptions.ResourceExhausted)
)
def _call_llm_with_retry(*args, **kwargs):
    """Wrapper function to implement retry logic"""
    rate_limit()  # Apply rate limiting before each attempt
    return ChatGoogleGenerativeAI(*args, **kwargs)

def get_llm(
    llm_provider_api_key: str,
    model_name: str = "gemini-2.0-flash-lite",  # Default model aligned with AGENT_MODEL_NAME
):
    """Get a rate-limited and retry-enabled LLM instance"""
    return _call_llm_with_retry(
        google_api_key=llm_provider_api_key,
        temperature=0.7,
        max_retries=3,
        model=model_name,
        convert_system_message_to_human=True,
    )