Final_Assignment

Sleeping

App Files Files Community

Shivangsinha commited on 15 days ago

Commit

6767692

verified ·

1 Parent(s): 0e853cf

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -50

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import time
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 from smolagents import (
     CodeAgent,
@@ -13,37 +12,8 @@ from smolagents import (
     tool,
 )
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Custom Throttled Model to fix Gemini 15 RPM Limits ---
-class ThrottledGeminiModel(LiteLLMModel):
-    """
-    Wraps the LiteLLMModel to automatically enforce delays between requests.
-    Gemini Free Tier allows 15 requests per minute.
-    By sleeping 5 seconds before every call, we guarantee we never exceed the limit.
-    It also catches internal 429 errors without breaking the agent's multi-step thought process.
-    """
-    def __call__(self, *args, **kwargs):
-        print("Throttling: Sleeping 5s to prevent hitting Gemini's 15 RPM limit...")
-        time.sleep(5)
-        max_retries = 5
-        for attempt in range(max_retries):
-            try:
-                return super().__call__(*args, **kwargs)
-            except Exception as e:
-                error_msg = str(e).lower()
-                if "429" in error_msg or "rate limit" in error_msg or "quota" in error_msg:
-                    wait_time = 30 * (attempt + 1)
-                    print(f"Internal API Rate limit hit. Pausing for {wait_time}s (Attempt {attempt+1}/{max_retries})...")
-                    time.sleep(wait_time)
-                else:
-                    raise e
-        # Final attempt if loop finishes without returning
-        return super().__call__(*args, **kwargs)
-# --- Basic Agent Definition ---
 @tool
 def get_current_date_time() -> str:
     """Returns the current date and time in ISO format."""
@@ -54,14 +24,15 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-        gemini_api_key = os.getenv("GEMINI_API_KEY")
-        if not gemini_api_key:
-            raise ValueError("GEMINI_API_KEY environment variable not set in Space Secrets.")
-        self.model = ThrottledGeminiModel(
-            model_id="gemini/gemini-2.0-flash", # <-- Changed from flash-lite
-            api_key=gemini_api_key,
         )
         self.tools = [
@@ -74,23 +45,35 @@ class BasicAgent:
         self.agent = CodeAgent(
             tools=self.tools,
             model=self.model,
-            max_steps=8,
             additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
         )
-        print("BasicAgent ready with Throttled Gemini 2.0 Flash-Lite.")
     def __call__(self, question: str) -> str:
         print(f"\nAgent received question: {question[:80]}...")
-        # The retry loop is now handled safely inside the ThrottledGeminiModel
-        try:
-            answer = self.agent.run(question)
-            print(f"Agent answer: {str(answer)[:200]}")
-            return str(answer)
-        except Exception as e:
-            print(f"Agent error processing question: {e}")
-            return f"Error: {str(e)}"
-# --- The rest of the code ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
@@ -140,6 +123,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
@@ -169,10 +156,9 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
         **Instructions:**
-        1. Set `GEMINI_API_KEY` in your Space Secrets.
         2. Log in with your Hugging Face account below.
         3. Click 'Run Evaluation & Submit' to start.
-        *(Note: Because we are intentionally throttling the agent to respect Gemini's free tier limits, running all 20 questions might take around 10 to 15 minutes. Feel free to grab a coffee!)*
         """
     )
     gr.LoginButton()

 import time
 import gradio as gr
 import requests
 import pandas as pd
 from smolagents import (
     CodeAgent,
     tool,
 )
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def get_current_date_time() -> str:
     """Returns the current date and time in ISO format."""
     def __init__(self):
         print("BasicAgent initialized.")
+        # 1. Fetch the OpenRouter API Key
+        openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
+        if not openrouter_api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set in Space Secrets.")
+        # 2. Use LiteLLM to connect to OpenRouter's completely free Llama 3.3 70B endpoint
+        self.model = LiteLLMModel(
+            model_id="openrouter/meta-llama/llama-3.3-70b-instruct:free",
+            api_key=openrouter_api_key,
         )
         self.tools = [
         self.agent = CodeAgent(
             tools=self.tools,
             model=self.model,
+            max_steps=6,  # Reduced from 8 to save tokens and prevent quota crashes
             additional_authorized_imports=["datetime", "re", "json", "math", "collections"],
         )
+        print("BasicAgent ready with OpenRouter (Llama-3.3-70b Free).")
     def __call__(self, question: str) -> str:
         print(f"\nAgent received question: {question[:80]}...")
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                # A tiny safety buffer per step
+                time.sleep(2)
+                answer = self.agent.run(question)
+                print(f"Agent answer: {str(answer)[:200]}")
+                return str(answer)
+            except Exception as e:
+                err_msg = str(e).lower()
+                # If we hit a rate limit, pause and retry
+                if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
+                    wait_time = 20 * (attempt + 1)
+                    print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying (Attempt {attempt+1}/{max_retries})...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Agent error processing question: {e}")
+                    return f"Error: {str(e)}"
+        return "Error: Rate limit exceeded after maximum retries."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
+        # CRITICAL FIX: Give the API token bucket time to cool down between questions
+        print("Cooling down for 15 seconds to prevent token exhaustion...")
+        time.sleep(15)
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
     gr.Markdown(
         """
         **Instructions:**
+        1. Set `OPENROUTER_API_KEY` in your Space Secrets.
         2. Log in with your Hugging Face account below.
         3. Click 'Run Evaluation & Submit' to start.
         """
     )
     gr.LoginButton()