Katya Beresneva commited on
Commit
b37a35c
·
1 Parent(s): e17eeab
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. utils.py +36 -2
requirements.txt CHANGED
@@ -13,4 +13,5 @@ smolagents
13
  tavily-python
14
  wikipedia-api
15
  wikipedia
16
- duckduckgo-search
 
 
13
  tavily-python
14
  wikipedia-api
15
  wikipedia
16
+ duckduckgo-search
17
+ tenacity>=8.0.0
utils.py CHANGED
@@ -1,13 +1,47 @@
1
  from langchain_google_genai import ChatGoogleGenerativeAI
 
 
 
 
 
 
 
 
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def get_llm(
5
  llm_provider_api_key: str,
6
  model_name: str = "gemini-2.0-flash", # Default model aligned with AGENT_MODEL_NAME
7
  ):
8
- return ChatGoogleGenerativeAI(
 
9
  google_api_key=llm_provider_api_key,
10
  temperature=0.7,
11
- max_retries=5,
12
  model=model_name,
 
13
  )
 
1
  from langchain_google_genai import ChatGoogleGenerativeAI
2
+ from langchain_core.runnables import RunnableConfig
3
+ from tenacity import (
4
+ retry,
5
+ stop_after_attempt,
6
+ wait_exponential,
7
+ retry_if_exception_type
8
+ )
9
+ from google.api_core import exceptions
10
+ import time
11
 
12
+ # Rate limiting settings
13
+ REQUESTS_PER_MINUTE = 10 # Conservative limit below the 15 RPM free tier limit
14
+ MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE # Minimum time between requests
15
+ last_request_time = 0
16
+
17
+ def rate_limit():
18
+ """Implement rate limiting"""
19
+ global last_request_time
20
+ current_time = time.time()
21
+ time_since_last_request = current_time - last_request_time
22
+ if time_since_last_request < MINIMUM_PAUSE:
23
+ time.sleep(MINIMUM_PAUSE - time_since_last_request)
24
+ last_request_time = time.time()
25
+
26
+ @retry(
27
+ stop=stop_after_attempt(3),
28
+ wait=wait_exponential(multiplier=2, min=4, max=60),
29
+ retry=retry_if_exception_type(exceptions.ResourceExhausted)
30
+ )
31
+ def _call_llm_with_retry(*args, **kwargs):
32
+ """Wrapper function to implement retry logic"""
33
+ rate_limit() # Apply rate limiting before each attempt
34
+ return ChatGoogleGenerativeAI(*args, **kwargs)
35
 
36
  def get_llm(
37
  llm_provider_api_key: str,
38
  model_name: str = "gemini-2.0-flash", # Default model aligned with AGENT_MODEL_NAME
39
  ):
40
+ """Get a rate-limited and retry-enabled LLM instance"""
41
+ return _call_llm_with_retry(
42
  google_api_key=llm_provider_api_key,
43
  temperature=0.7,
44
+ max_retries=3,
45
  model=model_name,
46
+ convert_system_message_to_human=True,
47
  )