Katya Beresneva commited on
Commit ·
b37a35c
1
Parent(s): e17eeab
fix
Browse files- requirements.txt +2 -1
- utils.py +36 -2
requirements.txt
CHANGED
|
@@ -13,4 +13,5 @@ smolagents
|
|
| 13 |
tavily-python
|
| 14 |
wikipedia-api
|
| 15 |
wikipedia
|
| 16 |
-
duckduckgo-search
|
|
|
|
|
|
| 13 |
tavily-python
|
| 14 |
wikipedia-api
|
| 15 |
wikipedia
|
| 16 |
+
duckduckgo-search
|
| 17 |
+
tenacity>=8.0.0
|
utils.py
CHANGED
|
@@ -1,13 +1,47 @@
|
|
| 1 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def get_llm(
|
| 5 |
llm_provider_api_key: str,
|
| 6 |
model_name: str = "gemini-2.0-flash", # Default model aligned with AGENT_MODEL_NAME
|
| 7 |
):
|
| 8 |
-
|
|
|
|
| 9 |
google_api_key=llm_provider_api_key,
|
| 10 |
temperature=0.7,
|
| 11 |
-
max_retries=
|
| 12 |
model=model_name,
|
|
|
|
| 13 |
)
|
|
|
|
| 1 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 2 |
+
from langchain_core.runnables import RunnableConfig
|
| 3 |
+
from tenacity import (
|
| 4 |
+
retry,
|
| 5 |
+
stop_after_attempt,
|
| 6 |
+
wait_exponential,
|
| 7 |
+
retry_if_exception_type
|
| 8 |
+
)
|
| 9 |
+
from google.api_core import exceptions
|
| 10 |
+
import time
|
| 11 |
|
| 12 |
+
# Rate limiting settings
|
| 13 |
+
REQUESTS_PER_MINUTE = 10 # Conservative limit below the 15 RPM free tier limit
|
| 14 |
+
MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE # Minimum time between requests
|
| 15 |
+
last_request_time = 0
|
| 16 |
+
|
| 17 |
+
def rate_limit():
|
| 18 |
+
"""Implement rate limiting"""
|
| 19 |
+
global last_request_time
|
| 20 |
+
current_time = time.time()
|
| 21 |
+
time_since_last_request = current_time - last_request_time
|
| 22 |
+
if time_since_last_request < MINIMUM_PAUSE:
|
| 23 |
+
time.sleep(MINIMUM_PAUSE - time_since_last_request)
|
| 24 |
+
last_request_time = time.time()
|
| 25 |
+
|
| 26 |
+
@retry(
|
| 27 |
+
stop=stop_after_attempt(3),
|
| 28 |
+
wait=wait_exponential(multiplier=2, min=4, max=60),
|
| 29 |
+
retry=retry_if_exception_type(exceptions.ResourceExhausted)
|
| 30 |
+
)
|
| 31 |
+
def _call_llm_with_retry(*args, **kwargs):
|
| 32 |
+
"""Wrapper function to implement retry logic"""
|
| 33 |
+
rate_limit() # Apply rate limiting before each attempt
|
| 34 |
+
return ChatGoogleGenerativeAI(*args, **kwargs)
|
| 35 |
|
| 36 |
def get_llm(
|
| 37 |
llm_provider_api_key: str,
|
| 38 |
model_name: str = "gemini-2.0-flash", # Default model aligned with AGENT_MODEL_NAME
|
| 39 |
):
|
| 40 |
+
"""Get a rate-limited and retry-enabled LLM instance"""
|
| 41 |
+
return _call_llm_with_retry(
|
| 42 |
google_api_key=llm_provider_api_key,
|
| 43 |
temperature=0.7,
|
| 44 |
+
max_retries=3,
|
| 45 |
model=model_name,
|
| 46 |
+
convert_system_message_to_human=True,
|
| 47 |
)
|