Final_Assignment

Sleeping

App Files Files Community

Shivangsinha commited on Mar 15

Commit

37790b8

verified ·

1 Parent(s): 2db6e24

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -49

app.py CHANGED Viewed

@@ -5,72 +5,130 @@ import requests
 import pandas as pd
 from smolagents import (
     CodeAgent,
     InferenceClientModel,
     DuckDuckGoSearchTool,
     WikipediaSearchTool,
     PythonInterpreterTool,
     tool,
 )
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def get_current_date_time() -> str:
     """Returns the current date and time in ISO format."""
     from datetime import datetime
     return datetime.now().isoformat()
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-        new_hf_token = os.getenv("HF_TOKEN")
-        if not new_hf_token:
-            raise ValueError("HF_TOKEN environment variable not set in Space Secrets.")
-        self.model = InferenceClientModel(
-            model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-            token=new_hf_token,
-        )
         self.tools = [
             DuckDuckGoSearchTool(),
             WikipediaSearchTool(),
             PythonInterpreterTool(),
             get_current_date_time,
         ]
-        self.agent = CodeAgent(
-            tools=self.tools,
-            model=self.model,
-            max_steps=10,
-            # CRITICAL FIX: Added pandas and requests so the agent can download and read Excel/CSV files!
-            additional_authorized_imports=["datetime", "re", "json", "math", "collections", "pandas", "requests"],
-        )
-        print("BasicAgent ready with Hugging Face (Qwen2.5-Coder-32B).")
     def __call__(self, question: str) -> str:
         print(f"\nAgent received question: {question[:80]}...")
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                time.sleep(2)
-                answer = self.agent.run(question)
-                print(f"Agent answer: {str(answer)[:200]}")
-                return str(answer)
-            except Exception as e:
-                err_msg = str(e).lower()
-                if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
-                    wait_time = 20 * (attempt + 1)
-                    print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying...")
-                    time.sleep(wait_time)
-                else:
-                    print(f"Agent error processing question: {e}")
-                    return f"Error: {str(e)}"
-        return "Error: Rate limit exceeded after maximum retries."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
@@ -85,13 +143,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     submit_url = f"{api_url}/submit"
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=15)
@@ -109,18 +166,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        # CRITICAL FIX 1: Grab the hidden file URL if the server provides one
         file_url = item.get("file_url")
         if not task_id or not question_text:
             continue
-        # CRITICAL FIX 2: Inject the file URL into the agent's prompt so it can download it
         if file_url:
             question_text += f"\n\n[IMPORTANT: This task requires analyzing an attached file. You MUST download or read it directly from this URL: {file_url} using your Python tool.]"
-        # CRITICAL FIX 3: Threaten the agent to act like a strict robot to pass the automated grader
         strict_prompt = (
             f"{question_text}\n\n"
             "CRITICAL SUBMISSION INSTRUCTIONS:\n"
@@ -129,19 +182,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             "2. DO NOT include any conversational text, explanations, or reasoning in your final output.\n"
             "3. If the answer is a name, number, or short string, output ONLY that exact string.\n"
             "4. For numbers, do not include symbols unless explicitly requested."
         )
         try:
-            # We pass the strict prompt instead of the raw question
             submitted_answer = agent(strict_prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-            print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
-        print("Cooling down for 15 seconds to prevent token exhaustion...")
-        time.sleep(15)
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
@@ -163,18 +215,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(final_status)
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
-        print(f"Submission error: {e}")
         return f"Submission failed: {e}", pd.DataFrame(results_log)
 # --- Build Gradio UI ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1. Ensure your `HF_TOKEN` is set.
         2. Log in below.
         3. Click 'Run Evaluation & Submit' to start.
         """
     )
     gr.LoginButton()

 import pandas as pd
 from smolagents import (
     CodeAgent,
+    LiteLLMModel,
     InferenceClientModel,
     DuckDuckGoSearchTool,
     WikipediaSearchTool,
     PythonInterpreterTool,
+    VisitWebpageTool,
     tool,
 )
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Custom Throttled Model to protect Gemini ---
+class ThrottledGeminiModel(LiteLLMModel):
+    def __call__(self, *args, **kwargs):
+        time.sleep(5) # Base 5-second delay to stay under 15 RPM
+        return super().__call__(*args, **kwargs)
 @tool
 def get_current_date_time() -> str:
     """Returns the current date and time in ISO format."""
     from datetime import datetime
     return datetime.now().isoformat()
+class FailproofAgent:
     def __init__(self):
+        print("Initializing Failproof Cascade Agent...")
+        self.models = []
+        # 1. Primary: Gemini 2.0 Flash (1500 daily requests, huge context)
+        gemini_key = os.getenv("GEMINI_API_KEY")
+        if gemini_key:
+            self.models.append({
+                "name": "Gemini 2.0 Flash",
+                "model": ThrottledGeminiModel(model_id="gemini/gemini-2.0-flash", api_key=gemini_key)
+            })
+        # 2. Secondary: HF Qwen2.5-Coder (Great for code, serverless)
+        hf_token = os.getenv("HF_TOKEN") or os.getenv("HF_TOKEN")
+        if hf_token:
+            self.models.append({
+                "name": "Hugging Face Qwen2.5 Coder",
+                "model": InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", token=hf_token)
+            })
+        # 3. Tertiary: Groq Llama 3.3 (Fast, smart fallback)
+        groq_key = os.getenv("GROQ_API_KEY")
+        if groq_key:
+            self.models.append({
+                "name": "Groq Llama 3.3 70B",
+                "model": LiteLLMModel(model_id="groq/llama-3.3-70b-versatile", api_key=groq_key)
+            })
+        # 4. Emergency: OpenRouter Free Pool (Decentralized backup)
+        or_key = os.getenv("OPENROUTER_API_KEY")
+        if or_key:
+            self.models.append({
+                "name": "OpenRouter Auto-Free",
+                "model": LiteLLMModel(model_id="openrouter/openrouter/free", api_key=or_key)
+            })
+        if not self.models:
+            raise ValueError("No API keys found! Please set at least one in Space Secrets.")
+        self.current_model_idx = 0
         self.tools = [
             DuckDuckGoSearchTool(),
             WikipediaSearchTool(),
             PythonInterpreterTool(),
+            VisitWebpageTool(), # Allows the agent to read inside websites
             get_current_date_time,
         ]
+        print(f"Agent armed with {len(self.models)} fallback brains. Ready to go.")
     def __call__(self, question: str) -> str:
         print(f"\nAgent received question: {question[:80]}...")
+        max_retries_per_model = 3
+        # Keep trying models until we run out of backups
+        while self.current_model_idx < len(self.models):
+            current_brain = self.models[self.current_model_idx]
+            print(f"🧠 USING BRAIN: {current_brain['name']}")
+            # Re-instantiate the agent cleanly for this attempt
+            agent = CodeAgent(
+                tools=self.tools,
+                model=current_brain["model"],
+                max_steps=7,
+                additional_authorized_imports=["datetime", "re", "json", "math", "collections", "pandas", "requests"],
+            )
+            for attempt in range(max_retries_per_model):
+                try:
+                    time.sleep(2)
+                    answer = agent.run(question)
+                    print(f"Agent answer: {str(answer)[:200]}")
+                    return str(answer)
+                except Exception as e:
+                    err_msg = str(e).lower()
+                    print(f"⚠️ Agent Error: {err_msg}")
+                    # FATAL QUOTA ERROR: Break the retry loop and switch brains
+                    if "402" in err_msg or "payment required" in err_msg or "quota" in err_msg or "limit 0" in err_msg or "spend limit" in err_msg:
+                        print(f"🚨 FATAL QUOTA HIT on {current_brain['name']}. Swapping to backup brain...")
+                        break # This exits the attempt loop and moves to the next model
+                    # TEMPORARY RATE LIMIT: Pause and retry the same brain
+                    elif "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
+                        wait_time = 20 * (attempt + 1)
+                        print(f"⏳ Temporary rate limit. Pausing for {wait_time}s...")
+                        time.sleep(wait_time)
+                        continue
+                    # OTHER ERRORS (Code failures, etc): Retry
+                    else:
+                        print("Retrying due to generic error...")
+                        continue
+            # If we exit the loop, this brain has failed completely. Move to the next one.
+            print(f"⏭️ Exhausted retries or hit hard limit on {current_brain['name']}. Escalating...")
+            self.current_model_idx += 1
+        return "Error: All available models exhausted their quotas or failed."
+# --- App Runner ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if profile:
     submit_url = f"{api_url}/submit"
     try:
+        agent = FailproofAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
         response = requests.get(questions_url, timeout=15)
     for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_url = item.get("file_url")
         if not task_id or not question_text:
             continue
         if file_url:
             question_text += f"\n\n[IMPORTANT: This task requires analyzing an attached file. You MUST download or read it directly from this URL: {file_url} using your Python tool.]"
         strict_prompt = (
             f"{question_text}\n\n"
             "CRITICAL SUBMISSION INSTRUCTIONS:\n"
             "2. DO NOT include any conversational text, explanations, or reasoning in your final output.\n"
             "3. If the answer is a name, number, or short string, output ONLY that exact string.\n"
             "4. For numbers, do not include symbols unless explicitly requested."
+            "5. **ULTRATHINK** and double check the response making sure the return answer."
         )
         try:
             submitted_answer = agent(strict_prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
+        print("Cooling down for 10 seconds to protect quotas...")
+        time.sleep(10)
     if not answers_payload:
         return "No answers.", pd.DataFrame(results_log)
         print(final_status)
         return final_status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission failed: {e}", pd.DataFrame(results_log)
 # --- Build Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# The Failproof Multi-Model Agent Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Ensure your API keys (`GEMINI_API_KEY`, `NEW_HF_TOKEN`, `GROQ_API_KEY`, etc.) are set in Space Secrets.
         2. Log in below.
         3. Click 'Run Evaluation & Submit' to start.
+        *(Watch the logs! If a model dies, it will automatically hot-swap to the next one).*
         """
     )
     gr.LoginButton()