Spaces:

kberasneva
/

Final_Assignment_Template

Sleeping

App Files Files Community

Katya Beresneva commited on Jun 5, 2025

Commit

b37a35c

1 Parent(s): e17eeab

fix

Browse files

Files changed (2) hide show

requirements.txt +2 -1
utils.py +36 -2

requirements.txt CHANGED Viewed

@@ -13,4 +13,5 @@ smolagents
 tavily-python
 wikipedia-api
 wikipedia
-duckduckgo-search

 tavily-python
 wikipedia-api
 wikipedia
+duckduckgo-search
+tenacity>=8.0.0

utils.py CHANGED Viewed

@@ -1,13 +1,47 @@
 from langchain_google_genai import ChatGoogleGenerativeAI
 def get_llm(
     llm_provider_api_key: str,
     model_name: str = "gemini-2.0-flash",  # Default model aligned with AGENT_MODEL_NAME
 ):
-    return ChatGoogleGenerativeAI(
         google_api_key=llm_provider_api_key,
         temperature=0.7,
-        max_retries=5,
         model=model_name,
     )

 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.runnables import RunnableConfig
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type
+)
+from google.api_core import exceptions
+import time
+# Rate limiting settings
+REQUESTS_PER_MINUTE = 10  # Conservative limit below the 15 RPM free tier limit
+MINIMUM_PAUSE = 60.0 / REQUESTS_PER_MINUTE  # Minimum time between requests
+last_request_time = 0
+def rate_limit():
+    """Implement rate limiting"""
+    global last_request_time
+    current_time = time.time()
+    time_since_last_request = current_time - last_request_time
+    if time_since_last_request < MINIMUM_PAUSE:
+        time.sleep(MINIMUM_PAUSE - time_since_last_request)
+    last_request_time = time.time()
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=2, min=4, max=60),
+    retry=retry_if_exception_type(exceptions.ResourceExhausted)
+)
+def _call_llm_with_retry(*args, **kwargs):
+    """Wrapper function to implement retry logic"""
+    rate_limit()  # Apply rate limiting before each attempt
+    return ChatGoogleGenerativeAI(*args, **kwargs)
 def get_llm(
     llm_provider_api_key: str,
     model_name: str = "gemini-2.0-flash",  # Default model aligned with AGENT_MODEL_NAME
 ):
+    """Get a rate-limited and retry-enabled LLM instance"""
+    return _call_llm_with_retry(
         google_api_key=llm_provider_api_key,
         temperature=0.7,
+        max_retries=3,
         model=model_name,
+        convert_system_message_to_human=True,
     )