Final_Assignment

Sleeping

App Files Files Community

Shivangsinha commited on 16 days ago

Commit

567c67d

verified ·

1 Parent(s): 0fd1749

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -30

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import inspect
 import pandas as pd
 from smolagents import (
     CodeAgent,
-    InferenceClientModel,  # <-- Updated here
     DuckDuckGoSearchTool,
     WikipediaSearchTool,
     PythonInterpreterTool,
@@ -16,6 +16,33 @@ from smolagents import (
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 @tool
 def get_current_date_time() -> str:
@@ -27,9 +54,14 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-        # Using Hugging Face's free Serverless Inference API with the updated class name
-        self.model = InferenceClientModel(  # <-- Updated here
-            model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
         )
         self.tools = [
@@ -45,26 +77,18 @@ class BasicAgent:
             max_steps=8,
             additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
         )
-        print("BasicAgent ready with Qwen2.5-Coder-32B-Instruct (CodeAgent).")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question: {question[:80]}...")
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                answer = self.agent.run(question)
-                print(f"Agent answer: {str(answer)[:200]}")
-                return str(answer)
-            except Exception as e:
-                err = str(e)
-                if "429" in err or "rate_limit" in err.lower() or "quota" in err.lower():
-                    wait_time = 30 * (attempt + 1)
-                    print(f"Rate limit hit, waiting {wait_time}s before retry {attempt+1}/{max_retries}...")
-                    time.sleep(wait_time)
-                else:
-                    print(f"Agent error: {e}")
-                    return f"Error: {err}"
-        return "Error: Rate limit exceeded after retries"
 # --- The rest of the code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -87,7 +111,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code: {agent_code}")
     print(f"Fetching questions from: {questions_url}")
     try:
@@ -95,10 +118,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            print("No questions.")
             return "No questions.", None
     except Exception as e:
-        print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     results_log = []
@@ -119,9 +140,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
-        # Wait 10 seconds between questions to play nicely with HF inference servers
-        time.sleep(10)
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
@@ -151,9 +169,10 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
         **Instructions:**
-        1. Ensure you have your `HF_TOKEN` in your Space secrets (Settings -> Secrets).
         2. Log in with your Hugging Face account below.
-        3. Click 'Run Evaluation & Submit' to start. Please be patient, as inference will take a few minutes to process 20 questions securely.
         """
     )
     gr.LoginButton()

 import pandas as pd
 from smolagents import (
     CodeAgent,
+    LiteLLMModel,
     DuckDuckGoSearchTool,
     WikipediaSearchTool,
     PythonInterpreterTool,
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Custom Throttled Model to fix Gemini 15 RPM Limits ---
+class ThrottledGeminiModel(LiteLLMModel):
+    """
+    Wraps the LiteLLMModel to automatically enforce delays between requests.
+    Gemini Free Tier allows 15 requests per minute.
+    By sleeping 5 seconds before every call, we guarantee we never exceed the limit.
+    It also catches internal 429 errors without breaking the agent's multi-step thought process.
+    """
+    def __call__(self, *args, **kwargs):
+        print("Throttling: Sleeping 5s to prevent hitting Gemini's 15 RPM limit...")
+        time.sleep(5)
+        max_retries = 5
+        for attempt in range(max_retries):
+            try:
+                return super().__call__(*args, **kwargs)
+            except Exception as e:
+                error_msg = str(e).lower()
+                if "429" in error_msg or "rate limit" in error_msg or "quota" in error_msg:
+                    wait_time = 30 * (attempt + 1)
+                    print(f"Internal API Rate limit hit. Pausing for {wait_time}s (Attempt {attempt+1}/{max_retries})...")
+                    time.sleep(wait_time)
+                else:
+                    raise e
+        # Final attempt if loop finishes without returning
+        return super().__call__(*args, **kwargs)
 # --- Basic Agent Definition ---
 @tool
 def get_current_date_time() -> str:
     def __init__(self):
         print("BasicAgent initialized.")
+        gemini_api_key = os.getenv("GEMINI_API_KEY")
+        if not gemini_api_key:
+            raise ValueError("GEMINI_API_KEY environment variable not set in Space Secrets.")
+        # Using our custom throttled wrapper
+        self.model = ThrottledGeminiModel(
+            model_id="gemini/gemini-2.0-flash-lite",
+            api_key=gemini_api_key,
         )
         self.tools = [
             max_steps=8,
             additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
         )
+        print("BasicAgent ready with Throttled Gemini 2.0 Flash-Lite.")
     def __call__(self, question: str) -> str:
+        print(f"\nAgent received question: {question[:80]}...")
+        # The retry loop is now handled safely inside the ThrottledGeminiModel
+        try:
+            answer = self.agent.run(question)
+            print(f"Agent answer: {str(answer)[:200]}")
+            return str(answer)
+        except Exception as e:
+            print(f"Agent error processing question: {e}")
+            return f"Error: {str(e)}"
 # --- The rest of the code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(f"Fetching questions from: {questions_url}")
     try:
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             return "No questions.", None
     except Exception as e:
         return f"Error fetching questions: {e}", None
     results_log = []
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
     gr.Markdown(
         """
         **Instructions:**
+        1. Set `GEMINI_API_KEY` in your Space Secrets.
         2. Log in with your Hugging Face account below.
+        3. Click 'Run Evaluation & Submit' to start.
+        *(Note: Because we are intentionally throttling the agent to respect Gemini's free tier limits, running all 20 questions might take around 10 to 15 minutes. Feel free to grab a coffee!)*
         """
     )
     gr.LoginButton()