Final_Assignment_Template

Sleeping

App Files Files Community

claudi47 commited on Apr 4

Commit

1ccc559

1 Parent(s): 5850246

restored default model

Browse files

Files changed (1) hide show

app.py +223 -101

app.py CHANGED Viewed

@@ -14,15 +14,16 @@ from smolagents import (
 load_dotenv()
-# (Keep Constants as is)
 # --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Format instructions appended to every question
 # so that the agent returns exact-match-friendly
 # answers via final_answer().
 ANSWER_FORMAT_INSTRUCTIONS = """
 IMPORTANT FORMAT INSTRUCTIONS:
 Your final_answer must be as concise as possible:
 - If the answer is a number, return ONLY the number
@@ -34,18 +35,15 @@ Your final_answer must be as concise as possible:
 - If the answer is a comma separated list, apply
   the rules above to each element.
 Do NOT include explanations in your final_answer,
-just the bare answer.
-"""
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 # --------------------------------------------------
 # Custom tool: download a GAIA task file
 # --------------------------------------------------
 class GaiaFileFetcherTool(Tool):
     """Downloads the file attached to a GAIA task."""
     name = "fetch_task_file"
     description = (
         "Downloads the file attached to a GAIA task "
@@ -62,24 +60,24 @@ class GaiaFileFetcherTool(Tool):
         }
     }
     output_type = "string"
     def __init__(self, api_url: str, **kwargs):
         super().__init__(**kwargs)
         self.api_url = api_url
     def forward(self, task_id: str) -> str:
         import requests as _req
         import tempfile as _tmp
         import mimetypes as _mt
         url = f"{self.api_url}/files/{task_id}"
         resp = _req.get(url, timeout=30)
         resp.raise_for_status()
         # Derive a sensible extension from headers
         ct = resp.headers.get("Content-Type", "")
         ext = _mt.guess_extension(ct.split(";")[0]) or ""
         cd = resp.headers.get(
             "Content-Disposition", ""
         )
@@ -87,34 +85,46 @@ class GaiaFileFetcherTool(Tool):
         if "filename=" in cd:
             fname = cd.split("filename=")[-1]
             fname = fname.strip('"').strip("'")
         if not fname:
             fname = f"{task_id}{ext}"
         path = os.path.join(
             _tmp.gettempdir(), fname
         )
         with open(path, "wb") as f:
             f.write(resp.content)
         return path
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         model = InferenceClientModel(
-            model_id="Qwen/Qwen2.5-72B-Instruct",
             token=os.getenv("HF_TOKEN"),
         )
         self.file_tool = GaiaFileFetcherTool(
             api_url=DEFAULT_API_URL,
         )
         self.agent = CodeAgent(
             model=model,
             tools=[
                 DuckDuckGoSearchTool(),
-                WikipediaSearchTool(user_agent="GaiaAgent/1.0"),
                 VisitWebpageTool(),
                 self.file_tool,
             ],
@@ -156,91 +166,163 @@ class BasicAgent:
         return raw.strip()
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             print("Fetched questions list is empty.")
-            return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-        print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
-        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append(
-                {"task_id": task_id, "submitted_answer": submitted_answer}
             )
             results_log.append(
                 {
                     "Task ID": task_id,
                     "Question": question_text,
-                    "Submitted Answer": submitted_answer,
                 }
             )
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
             results_log.append(
                 {
                     "Task ID": task_id,
                     "Question": question_text,
-                    "Submitted Answer": f"AGENT ERROR: {e}",
                 }
             )
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {
@@ -248,107 +330,147 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         "agent_code": agent_code,
         "answers": answers_payload,
     }
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
         except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(
-        label="Run Status / Submission Result", lines=5, interactive=False
     )
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
-    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:  # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(
-            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
-        )
     else:
-        print(
-            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
-        )
-    print("-" * (60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 load_dotenv()
 # --- Constants ---
+DEFAULT_API_URL = (
+    "https://agents-course-unit4-scoring.hf.space"
+)
 # Format instructions appended to every question
 # so that the agent returns exact-match-friendly
 # answers via final_answer().
 ANSWER_FORMAT_INSTRUCTIONS = """
 IMPORTANT FORMAT INSTRUCTIONS:
 Your final_answer must be as concise as possible:
 - If the answer is a number, return ONLY the number
 - If the answer is a comma separated list, apply
   the rules above to each element.
 Do NOT include explanations in your final_answer,
+just the bare answer."""
 # --------------------------------------------------
 # Custom tool: download a GAIA task file
 # --------------------------------------------------
 class GaiaFileFetcherTool(Tool):
     """Downloads the file attached to a GAIA task."""
     name = "fetch_task_file"
     description = (
         "Downloads the file attached to a GAIA task "
         }
     }
     output_type = "string"
     def __init__(self, api_url: str, **kwargs):
         super().__init__(**kwargs)
         self.api_url = api_url
     def forward(self, task_id: str) -> str:
         import requests as _req
         import tempfile as _tmp
         import mimetypes as _mt
         url = f"{self.api_url}/files/{task_id}"
         resp = _req.get(url, timeout=30)
         resp.raise_for_status()
         # Derive a sensible extension from headers
         ct = resp.headers.get("Content-Type", "")
         ext = _mt.guess_extension(ct.split(";")[0]) or ""
         cd = resp.headers.get(
             "Content-Disposition", ""
         )
         if "filename=" in cd:
             fname = cd.split("filename=")[-1]
             fname = fname.strip('"').strip("'")
         if not fname:
             fname = f"{task_id}{ext}"
         path = os.path.join(
             _tmp.gettempdir(), fname
         )
         with open(path, "wb") as f:
             f.write(resp.content)
         return path
+# --------------------------------------------------
+# Agent wrapper
+# --------------------------------------------------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         model = InferenceClientModel(
+            model_id=(
+                "Qwen/Qwen2.5-72B-Instruct"
+            ),
             token=os.getenv("HF_TOKEN"),
+            # If you have HF PRO, try a faster
+            # provider like "novita" or "hyperbolic"
+            # provider="novita",
         )
         self.file_tool = GaiaFileFetcherTool(
             api_url=DEFAULT_API_URL,
         )
         self.agent = CodeAgent(
             model=model,
             tools=[
                 DuckDuckGoSearchTool(),
+                WikipediaSearchTool(
+                    user_agent="GaiaAgent/1.0"
+                ),
                 VisitWebpageTool(),
                 self.file_tool,
             ],
         return raw.strip()
+# --------------------------------------------------
+# Gradio: run all & submit
+# --------------------------------------------------
+def run_and_submit_all(
+    profile: gr.OAuthProfile | None,
+):
     """
+    Fetches all questions, runs the agent,
+    submits answers, and displays results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
+        return (
+            "Please Login to Hugging Face "
+            "with the button.",
+            None,
+        )
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    agent_code = (
+        f"https://huggingface.co/spaces/"
+        f"{space_id}/tree/main"
+    )
     print(agent_code)
     # 2. Fetch Questions
+    print(
+        f"Fetching questions from: {questions_url}"
+    )
     try:
+        response = requests.get(
+            questions_url, timeout=15
+        )
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
             print("Fetched questions list is empty.")
+            return (
+                "Fetched questions list is empty "
+                "or invalid format.",
+                None,
+            )
+        print(
+            f"Fetched {len(questions_data)} "
+            f"questions."
+        )
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(
+            "Error decoding JSON from questions "
+            f"endpoint: {e}"
+        )
         print(f"Response text: {response.text[:500]}")
+        return (
+            "Error decoding server response "
+            f"for questions: {e}",
+            None,
+        )
     except Exception as e:
+        print(
+            "Unexpected error fetching "
+            f"questions: {e}"
+        )
+        return (
+            "Unexpected error fetching "
+            f"questions: {e}",
+            None,
+        )
+    # 3. Run Agent on each question
     results_log = []
     answers_payload = []
+    total = len(questions_data)
+    print(f"Running agent on {total} questions...")
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
+            print(
+                "Skipping item with missing "
+                f"task_id or question: {item}"
+            )
             continue
+        # Check if the question has a file
+        file_name = item.get("file_name", "")
+        has_file = bool(file_name)
+        print(
+            f"[{i+1}/{total}] Task {task_id}"
+            f"{' (has file)' if has_file else ''}"
+        )
         try:
+            submitted_answer = agent(
+                question_text,
+                task_id,
+                has_file,
+            )
             answers_payload.append(
+                {
+                    "task_id": task_id,
+                    "submitted_answer": (
+                        submitted_answer
+                    ),
+                }
             )
             results_log.append(
                 {
                     "Task ID": task_id,
                     "Question": question_text,
+                    "Submitted Answer": (
+                        submitted_answer
+                    ),
                 }
             )
         except Exception as e:
+            print(
+                f"Error on task {task_id}: {e}"
+            )
             results_log.append(
                 {
                     "Task ID": task_id,
                     "Question": question_text,
+                    "Submitted Answer": (
+                        f"AGENT ERROR: {e}"
+                    ),
                 }
             )
     if not answers_payload:
+        print(
+            "Agent did not produce any answers."
+        )
+        return (
+            "Agent did not produce any answers "
+            "to submit.",
+            pd.DataFrame(results_log),
+        )
     # 4. Prepare Submission
     submission_data = {
         "agent_code": agent_code,
         "answers": answers_payload,
     }
+    status_update = (
+        f"Agent finished. Submitting "
+        f"{len(answers_payload)} answers for "
+        f"user '{username}'..."
+    )
     print(status_update)
     # 5. Submit
+    print(
+        f"Submitting {len(answers_payload)} "
+        f"answers to: {submit_url}"
+    )
     try:
+        response = requests.post(
+            submit_url,
+            json=submission_data,
+            timeout=60,
+        )
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
+            f"Overall Score: "
+            f"{result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}"
+            f"/{result_data.get('total_attempted', '?')}"
+            f" correct)\n"
+            f"Message: "
+            f"{result_data.get('message', 'N/A')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
+        error_detail = (
+            "Server responded with status "
+            f"{e.response.status_code}."
+        )
         try:
             error_json = e.response.json()
+            error_detail += (
+                " Detail: "
+                f"{error_json.get('detail', e.response.text)}"
+            )
         except requests.exceptions.JSONDecodeError:
+            error_detail += (
+                f" Response: "
+                f"{e.response.text[:500]}"
+            )
+        status_message = (
+            f"Submission Failed: {error_detail}"
+        )
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.Timeout:
+        status_message = (
+            "Submission Failed: Request timed out."
+        )
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except requests.exceptions.RequestException as e:
+        status_message = (
+            f"Submission Failed: Network error - {e}"
+        )
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
+        status_message = (
+            "Unexpected error during "
+            f"submission: {e}"
+        )
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --------------------------------------------------
+# Gradio UI
+# --------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
+**Instructions:**
+1. Clone this space and customise the agent.
+2. Log in with the button below.
+3. Click **Run Evaluation & Submit All Answers**.
+---
+*Processing all 20 questions will take several
+minutes. The agent uses web search, Wikipedia,
+page fetching, and file download tools.*
         """
     )
     gr.LoginButton()
+    run_button = gr.Button(
+        "Run Evaluation & Submit All Answers"
+    )
     status_output = gr.Textbox(
+        label="Run Status / Submission Result",
+        lines=5,
+        interactive=False,
+    )
+    results_table = gr.DataFrame(
+        label="Questions and Agent Answers",
+        wrap=True,
     )
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table],
+    )
 if __name__ == "__main__":
+    print(
+        "\n" + "-" * 30
+        + " App Starting "
+        + "-" * 30
+    )
+    space_host = os.getenv("SPACE_HOST")
+    space_id = os.getenv("SPACE_ID")
+    if space_host:
+        print(f"✅ SPACE_HOST: {space_host}")
     else:
+        print("ℹ️  SPACE_HOST not found.")
+    if space_id:
+        print(f"✅ SPACE_ID: {space_id}")
+    else:
+        print("ℹ️  SPACE_ID not found.")
+    print("-" * 74 + "\n")
+    print("Launching Gradio Interface...")
+    demo.launch(debug=True, share=False)