Gaia_test_ai_agent

Sleeping

App Files Files Community

kamorou commited on Jul 1

Commit

f568d06

verified ·

1 Parent(s): db2d14f

Update app.py

Browse files

Files changed (1) hide show

app.py +445 -39

app.py CHANGED Viewed

@@ -1,23 +1,445 @@
 import os
-import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -40,7 +462,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -91,7 +516,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -142,19 +567,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -163,7 +582,6 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
@@ -173,24 +591,12 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

+# import os
+# import gradio as gr
+# import requests
+# import inspect
+# import pandas as pd
+# # (Keep Constants as is)
+# # --- Constants ---
+# DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# # --- Basic Agent Definition ---
+# # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+# class BasicAgent:
+#     def __init__(self):
+#         print("BasicAgent initialized.")
+#     def __call__(self, question: str) -> str:
+#         print(f"Agent received question (first 50 chars): {question[:50]}...")
+#         fixed_answer = "This is a default answer."
+#         print(f"Agent returning fixed answer: {fixed_answer}")
+#         return fixed_answer
+# def run_and_submit_all( profile: gr.OAuthProfile | None):
+#     """
+#     Fetches all questions, runs the BasicAgent on them, submits all answers,
+#     and displays the results.
+#     """
+#     # --- Determine HF Space Runtime URL and Repo URL ---
+#     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+#     if profile:
+#         username= f"{profile.username}"
+#         print(f"User logged in: {username}")
+#     else:
+#         print("User not logged in.")
+#         return "Please Login to Hugging Face with the button.", None
+#     api_url = DEFAULT_API_URL
+#     questions_url = f"{api_url}/questions"
+#     submit_url = f"{api_url}/submit"
+#     # 1. Instantiate Agent ( modify this part to create your agent)
+#     try:
+#         agent = BasicAgent()
+#     except Exception as e:
+#         print(f"Error instantiating agent: {e}")
+#         return f"Error initializing agent: {e}", None
+#     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+#     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+#     print(agent_code)
+#     # 2. Fetch Questions
+#     print(f"Fetching questions from: {questions_url}")
+#     try:
+#         response = requests.get(questions_url, timeout=15)
+#         response.raise_for_status()
+#         questions_data = response.json()
+#         if not questions_data:
+#              print("Fetched questions list is empty.")
+#              return "Fetched questions list is empty or invalid format.", None
+#         print(f"Fetched {len(questions_data)} questions.")
+#     except requests.exceptions.RequestException as e:
+#         print(f"Error fetching questions: {e}")
+#         return f"Error fetching questions: {e}", None
+#     except requests.exceptions.JSONDecodeError as e:
+#          print(f"Error decoding JSON response from questions endpoint: {e}")
+#          print(f"Response text: {response.text[:500]}")
+#          return f"Error decoding server response for questions: {e}", None
+#     except Exception as e:
+#         print(f"An unexpected error occurred fetching questions: {e}")
+#         return f"An unexpected error occurred fetching questions: {e}", None
+#     # 3. Run your Agent
+#     results_log = []
+#     answers_payload = []
+#     print(f"Running agent on {len(questions_data)} questions...")
+#     for item in questions_data:
+#         task_id = item.get("task_id")
+#         question_text = item.get("question")
+#         if not task_id or question_text is None:
+#             print(f"Skipping item with missing task_id or question: {item}")
+#             continue
+#         try:
+#             submitted_answer = agent(question_text)
+#             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+#             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+#         except Exception as e:
+#              print(f"Error running agent on task {task_id}: {e}")
+#              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+#     if not answers_payload:
+#         print("Agent did not produce any answers to submit.")
+#         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+#     # 4. Prepare Submission
+#     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+#     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+#     print(status_update)
+#     # 5. Submit
+#     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+#     try:
+#         response = requests.post(submit_url, json=submission_data, timeout=60)
+#         response.raise_for_status()
+#         result_data = response.json()
+#         final_status = (
+#             f"Submission Successful!\n"
+#             f"User: {result_data.get('username')}\n"
+#             f"Overall Score: {result_data.get('score', 'N/A')}% "
+#             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+#             f"Message: {result_data.get('message', 'No message received.')}"
+#         )
+#         print("Submission successful.")
+#         results_df = pd.DataFrame(results_log)
+#         return final_status, results_df
+#     except requests.exceptions.HTTPError as e:
+#         error_detail = f"Server responded with status {e.response.status_code}."
+#         try:
+#             error_json = e.response.json()
+#             error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+#         except requests.exceptions.JSONDecodeError:
+#             error_detail += f" Response: {e.response.text[:500]}"
+#         status_message = f"Submission Failed: {error_detail}"
+#         print(status_message)
+#         results_df = pd.DataFrame(results_log)
+#         return status_message, results_df
+#     except requests.exceptions.Timeout:
+#         status_message = "Submission Failed: The request timed out."
+#         print(status_message)
+#         results_df = pd.DataFrame(results_log)
+#         return status_message, results_df
+#     except requests.exceptions.RequestException as e:
+#         status_message = f"Submission Failed: Network error - {e}"
+#         print(status_message)
+#         results_df = pd.DataFrame(results_log)
+#         return status_message, results_df
+#     except Exception as e:
+#         status_message = f"An unexpected error occurred during submission: {e}"
+#         print(status_message)
+#         results_df = pd.DataFrame(results_log)
+#         return status_message, results_df
+# # --- Build Gradio Interface using Blocks ---
+# with gr.Blocks() as demo:
+#     gr.Markdown("# Basic Agent Evaluation Runner")
+#     gr.Markdown(
+#         """
+#         **Instructions:**
+#         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+#         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+#         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+#         ---
+#         **Disclaimers:**
+#         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+#         This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+#         """
+#     )
+#     gr.LoginButton()
+#     run_button = gr.Button("Run Evaluation & Submit All Answers")
+#     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+#     # Removed max_rows=10 from DataFrame constructor
+#     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+#     run_button.click(
+#         fn=run_and_submit_all,
+#         outputs=[status_output, results_table]
+#     )
+# if __name__ == "__main__":
+#     print("\n" + "-"*30 + " App Starting " + "-"*30)
+#     # Check for SPACE_HOST and SPACE_ID at startup for information
+#     space_host_startup = os.getenv("SPACE_HOST")
+#     space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+#     if space_host_startup:
+#         print(f"✅ SPACE_HOST found: {space_host_startup}")
+#         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+#     else:
+#         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+#     if space_id_startup: # Print repo URLs if SPACE_ID is found
+#         print(f"✅ SPACE_ID found: {space_id_startup}")
+#         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+#         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+#     else:
+#         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+#     print("-"*(60 + len(" App Starting ")) + "\n")
+#     print("Launching Gradio Interface for Basic Agent Evaluation...")
+#     demo.launch(debug=True, share=False)
+##################################
+#
+# =================================================================================================
+#  ✅ --- ✅  FINAL ASSESSMENT AGENT - INSTRUCTOR'S VERSION ✅ --- ✅
+# =================================================================================================
+#
+#  Instructions:
+#  1. Make sure you have a requirements.txt file with all the necessary packages.
+#  2. Set your GROQ_API_KEY in the Hugging Face Space secrets.
+#  3. This code replaces the original template entirely.
+#
+# =================================================================================================
 import os
+import io
 import requests
 import inspect
 import pandas as pd
+import gradio as gr
+from contextlib import redirect_stdout
+from typing import TypedDict, Annotated, List, Union
+import operator
+# --- LangChain & LangGraph Imports ---
+from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage
+from langchain_core.tools import tool
+from langchain_groq import ChatGroq
+# from langchain_openai import ChatOpenAI #<-- Alternative LLM
+from langgraph.graph import StateGraph, END
+from langgraph.prebuilt import ToolExecutor
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+FILES_DIR = "./files"
+os.makedirs(FILES_DIR, exist_ok=True)
+#
+# ================================================================================================
+#  ✅ 1. DEFINE THE AGENT'S TOOLS
+# ================================================================================================
+#  Each tool is a simple Python function decorated with `@tool`.
+#  The docstring of the function is CRUCIAL. The LLM uses it to decide which tool to use.
+#
+@tool
+def web_search(query: str) -> str:
+    """
+    Searches the web using DuckDuckGo to find up-to-date information, facts, or answers to general questions.
+    Use this for any questions that require current event knowledge or broad-spectrum information.
+    """
+    print(f"--- Calling Web Search Tool with query: {query} ---")
+    from duckduckgo_search import DDGS
+    try:
+        with DDGS() as ddgs:
+            results = [r for r in ddgs.text(query, max_results=5)]
+            return str(results) if results else "No results found."
+    except Exception as e:
+        return f"Error during web search: {e}"
+@tool
+def read_file(url: str) -> str:
+    """
+    Downloads a file from a given URL, saves it locally, and returns its content.
+    Use this tool when the user provides a URL to a file that needs to be inspected.
+    The file is saved in the './files/' directory. The function returns the full text content.
+    """
+    print(f"--- Calling Read File Tool with URL: {url} ---")
+    try:
+        filename = os.path.join(FILES_DIR, os.path.basename(url))
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        with open(filename, 'wb') as f:
+            f.write(response.content)
+        # Try to read as text, if it fails, it might be a binary file.
+        try:
+            with open(filename, 'r', encoding='utf-8') as f:
+                content = f.read()
+            return f"Successfully read file '{filename}'. Content:\n\n{content}"
+        except UnicodeDecodeError:
+            return f"Successfully downloaded binary file '{filename}'. Cannot display content."
+    except requests.exceptions.RequestException as e:
+        return f"Error downloading or reading file: {e}"
+@tool
+def python_interpreter(code: str) -> str:
+    """
+    Executes a given string of Python code and returns the output from stdout.
+    Use this for complex calculations, data manipulation, or any task that can be solved with code.
+    The code runs in a restricted environment. You can use libraries like pandas, requests etc.
+    Make sure to use a print() statement to capture the output.
+    """
+    print(f"--- Calling Python Interpreter Tool with code:\n{code} ---")
+    output_buffer = io.StringIO()
+    try:
+        with redirect_stdout(output_buffer):
+            exec(code, globals())
+        return f"Code executed successfully. Output:\n{output_buffer.getvalue()}"
+    except Exception as e:
+        return f"Error executing Python code: {e}"
+#
+# ================================================================================================
+#  ✅ 2. CONFIGURE THE AGENT'S STATE, BRAIN (LLM), AND TOOL EXECUTOR
+# ================================================================================================
+#
+# The AgentState is the "memory" of our agent. It keeps track of the conversation history.
+class AgentState(TypedDict):
+    messages: Annotated[List[BaseMessage], operator.add]
+# List of all the tools our agent can use
+tools = [web_search, read_file, python_interpreter]
+# The ToolExecutor is a helper class that runs the tools for us
+tool_executor = ToolExecutor(tools)
+# The "Brain" of our agent. We're using Groq for speed.
+# Make sure to set GROQ_API_KEY in your HF Space secrets
+llm = ChatGroq(model="llama3-70b-8192", temperature=0)
+# If you want to use OpenAI instead, uncomment the line below and set OPENAI_API_KEY
+# llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
+# We now bind the tools to the LLM. This tells the LLM what functions it can call.
+llm_with_tools = llm.bind_tools(tools)
+#
+# ================================================================================================
+#  ✅ 3. DEFINE THE LANGGRAPH NODES AND EDGES
+# ================================================================================================
+#  This is the core logic of our agent, defined as a graph.
+#
+# NODE 1: The Agent Node (call_model)
+# This node invokes the LLM to decide the next action or to give a final answer.
+def call_model(state: AgentState) -> dict:
+    print("--- Calling LLM ---")
+    messages = state['messages']
+    response = llm_with_tools.invoke(messages)
+    # We return a dict, because this node will always be part of a graph
+    return {"messages": [response]}
+# NODE 2: The Tool Node (call_tool)
+# This node executes the tool chosen by the LLM.
+def call_tool(state: AgentState) -> dict:
+    last_message = state['messages'][-1]  # Get the last message, which should be an AIMessage with tool calls
+    # We construct an ToolMessage with the output of the tool call
+    action = last_message.tool_calls[0]
+    print(f"--- Preparing to call tool: {action['name']} with args {action['args']} ---")
+    tool_output = tool_executor.invoke(action)
+    return {"messages": [ToolMessage(content=str(tool_output), tool_call_id=action['id'])]}
+# EDGE: The Conditional Router (should_continue)
+# This function decides which node to go to next.
+def should_continue(state: AgentState) -> str:
+    last_message = state['messages'][-1]
+    # If the LLM made a tool call, we route to the 'action' node (call_tool)
+    if last_message.tool_calls:
+        print("--- Decision: Call a tool ---")
+        return "action"
+    # Otherwise, we are done, and we route to the 'end' state
+    else:
+        print("--- Decision: End of process ---")
+        return "end"
+#
+# ================================================================================================
+#  ✅ 4. BUILD AND COMPILE THE GRAPH
+# ================================================================================================
+#
+# 1. Initialize the graph and add our state object
+workflow = StateGraph(AgentState)
+# 2. Add the two nodes we defined: 'agent' and 'action'
+workflow.add_node("agent", call_model)
+workflow.add_node("action", call_tool)
+# 3. Set the entry point of the graph. The first thing to run is the 'agent' node.
+workflow.set_entry_point("agent")
+# 4. Add the conditional edge. This controls the flow of the graph.
+workflow.add_conditional_edges(
+    "agent",          # Start from the 'agent' node
+    should_continue,  # Use our function to decide the path
+    {
+        "action": "action", # If it returns "action", go to the 'action' node
+        "end": END          # If it returns "end", finish the graph
+    }
+)
+# 5. Add a normal edge. After 'action' runs, it should always go back to 'agent'.
+workflow.add_edge('action', 'agent')
+# 6. Compile the graph into a runnable app.
+app = workflow.compile()
+#
+# ================================================================================================
+#  ✅ 5. CREATE THE AGENT CLASS THAT THE TEMPLATE USES
+# ================================================================================================
+#  This class wraps our LangGraph agent in the format expected by the evaluation script.
+#
+class GaiaAgent:
     def __init__(self):
+        print("GaiaAgent initialized.")
+        # Any one-time setup can go here
+        self.agent_app = app
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 100 chars): {question[:100]}...")
+        # The initial input for our graph is a list of messages.
+        initial_input = {"messages": [HumanMessage(content=question)]}
+        final_state = None
+        # Let's add a loop limit to prevent infinite cycles
+        for i, step in enumerate(self.agent_app.stream(initial_input, {"recursion_limit": 25})):
+            # We'll just take the final state. The stream is useful for seeing intermediate steps.
+            if i == 0:
+                print("--- Starting Agentic Loop ---")
+            # You can print keys to see what's happening at each step:
+            # print(f"Step {i}: {list(step.keys())}")
+            final_state = step
+        # The final answer is in the last AIMessage of the 'messages' list
+        final_answer_message = final_state['agent']['messages'][-1]
+        final_answer = final_answer_message.content
+        print(f"--- Agent finished. Final Answer: {final_answer} ---")
+        return final_answer
+#
+# ================================================================================================
+#  -- DO NOT MODIFY THE CODE BELOW THIS LINE --
+#  -- This is the Gradio App and Submission Logic from the course --
+# ================================================================================================
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        # -------------------------------------------------------------------
+        # THIS IS THE ONLY CHANGE IN THIS FUNCTION: We now use our GaiaAgent
+        agent = GaiaAgent()
+        # -------------------------------------------------------------------
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Final Assessment")
     gr.Markdown(
         """
+        **Instructor's Note:** This space is now powered by a LangGraph agent.
+        1.  Ensure your `GROQ_API_KEY` is set in the Space secrets.
+        2.  Make sure you have a `requirements.txt` file.
+        3.  Log in below and click 'Run Evaluation'. Good luck!
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?).")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
     demo.launch(debug=True, share=False)