Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

3bf855f

verified ·

1 Parent(s): 370af5e

Update app.py

Browse files

Files changed (1) hide show

app.py +242 -54

app.py CHANGED Viewed

@@ -3,32 +3,214 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-# (Keep Constants as is)
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -38,36 +220,39 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
@@ -80,18 +265,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -99,7 +286,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -139,30 +326,28 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
@@ -171,24 +356,27 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+import re # For parsing LLM output
+# --- HF Inference API for LLM ---
+from huggingface_hub import HfInference
+# You can choose a different model, but make sure it's good at instruction following and ReAct-style prompting.
+# Zephyr-7B-beta or Mistral-7B-Instruct are good choices available on the free inference API.
+# Starling-LM-7B-beta is also excellent if available and performant enough.
+LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # or "mistralai/Mistral-7B-Instruct-v0.2"
+# Ensure you have a Hugging Face token set in your space's secrets if using certain models,
+# though many popular ones work without it for basic inference.
+# Name: HF_TOKEN, Value: your_hf_token_here (with read access is usually enough for inference)
+try:
+    hf_token = os.getenv("HF_TOKEN")
+    llm_client = HfInference(model=LLM_MODEL, token=hf_token)
+except Exception as e:
+    print(f"Error initializing HfInference client: {e}")
+    llm_client = None
+# --- Tools ---
+# 1. Search Tool (using DuckDuckGo)
+from duckduckgo_search import DDGS
+def search_tool(query: str) -> str:
+    """
+    Searches the web using DuckDuckGo for a given query and returns the top results.
+    Args:
+        query (str): The search query.
+    Returns:
+        str: A string containing the search results.
+    """
+    print(f"Tool: search_tool, Query: {query}")
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=3) # Get top 3 results
+            if results:
+                return "\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
+            else:
+                return "No results found for your query."
+    except Exception as e:
+        print(f"Error in search_tool: {e}")
+        return f"Error performing search: {str(e)}"
+# 2. Calculator Tool
+def calculator_tool(expression: str) -> str:
+    """
+    Calculates the result of a mathematical expression.
+    Args:
+        expression (str): The mathematical expression to evaluate (e.g., "2+2", "100*3.14/4").
+                          It should be a valid Python-evaluable expression.
+    Returns:
+        str: The result of the calculation or an error message.
+    """
+    print(f"Tool: calculator_tool, Expression: {expression}")
+    try:
+        # Basic security: allow only numbers, operators, parentheses, and math functions.
+        # This is not perfectly secure for a public-facing app with arbitrary eval,
+        # but for this constrained GAIA context, it's a common approach.
+        # A safer approach would be to use a dedicated math parsing library.
+        allowed_chars = "0123456789+-*/(). "
+        if not all(char in allowed_chars or char.isspace() for char in expression):
+            # A more robust check would involve parsing the expression.
+            # For now, we'll allow what seems reasonable for GAIA math.
+            # Let's try to evaluate common math patterns more safely.
+            # This simple check is insufficient for true security.
+            pass # Relaxing this for now to allow GAIA questions like "sqrt(16)" etc.
+        # A slightly safer eval using a limited global scope
+        # For GAIA, often questions involve simple arithmetic or known constants like pi.
+        # This eval is still risky; a dedicated math expression parser is better for production.
+        result = eval(expression, {"__builtins__": {}}, {"sqrt": lambda x: x**0.5, "pi": 3.1415926535})
+        return str(result)
+    except Exception as e:
+        print(f"Error in calculator_tool: {e}")
+        return f"Error calculating: {str(e)}. Ensure the expression is valid math."
+# --- Agent Definition ---
+class ReActAgent:
+    def __init__(self, llm_client, tools: dict, max_iterations=7):
+        print("ReActAgent initialized.")
+        if llm_client is None:
+            raise ValueError("LLM client not initialized. Check HF_TOKEN and model availability.")
+        self.llm = llm_client
+        self.tools = tools
+        self.max_iterations = max_iterations
+        self.stop_pattern = "Final Answer:"
+        # Construct tool descriptions for the prompt
+        self.tool_descriptions = "\n".join([
+            f"- {name}: {inspect.getdoc(func)}"
+            for name, func in tools.items()
+        ])
+        self.tool_names = ", ".join(tools.keys())
+        # This is the core ReAct prompt template
+        self.react_prompt_template = inspect.cleandoc(f"""
+            You are a helpful and observant AI assistant. Your goal is to answer the following question accurately.
+            You must use a step-by-step thinking process (Thought, Action, Observation).
+            Available tools:
+            {self.tool_descriptions}
+            Use the following format:
+            Question: the input question you must answer
+            Thought: You should always think about what to do.
+            Action: The action to take, should be one of [{self.tool_names}]. The input to the tool is between the brackets. For example: search_tool[query] or calculator_tool[expression].
+            Observation: The result of the action.
+            ... (this Thought/Action/Observation sequence can repeat up to {self.max_iterations} times)
+            Thought: I now know the final answer.
+            Final Answer: The final answer to the original input question.
+            Begin!
+        """) + "\nQuestion: {question}\n{scratchpad}"
+    def run_llm(self, prompt: str) -> str:
+        try:
+            # print(f"\n--- LLM Prompt ---\n{prompt}\n--- End LLM Prompt ---")
+            # Parameters for the LLM call
+            # `max_new_tokens` is important to give the LLM enough space to think and provide an answer.
+            # `temperature` can be low for more deterministic ReAct steps.
+            # `stop_sequences` can help control generation if the model supports it well.
+            response = self.llm.text_generation(
+                prompt,
+                max_new_tokens=512,  # Increased to allow for longer thought processes
+                temperature=0.2,     # Lower for more factual/less creative ReAct steps
+                do_sample=True,      # Required if temperature is not 1.0
+                # stop_sequences=["Observation:", "\nThought:", self.stop_pattern] # Helps stop at logical points
+                # Using stop_sequences can be tricky and model-dependent. Simpler to parse output.
+            )
+            # print(f"--- LLM Raw Response ---\n{response}\n--- End LLM Raw Response ---")
+            return response.strip()
+        except Exception as e:
+            print(f"Error during LLM call: {e}")
+            return f"Error generating response: {str(e)}"
     def __call__(self, question: str) -> str:
+        print(f"ReActAgent received question (first 100 chars): {question[:100]}...")
+        scratchpad = ""
+        current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
+        for i in range(self.max_iterations):
+            print(f"\nIteration {i+1}")
+            llm_output = self.run_llm(current_prompt)
+            if not llm_output: # Handle cases where LLM returns empty or error
+                print("LLM returned empty or error, stopping.")
+                return "Agent Error: LLM failed to respond."
+            scratchpad += llm_output + "\n" # Add LLM's entire unfiltered output to scratchpad
+            # Check for Final Answer
+            final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_output, re.DOTALL | re.IGNORECASE)
+            if final_answer_match:
+                answer = final_answer_match.group(1).strip()
+                print(f"Found Final Answer: {answer}")
+                return answer
+            # Parse Action
+            # Regex to capture: Action: tool_name[input]
+            action_match = re.search(r"Action:\s*([a-zA-Z_0-9]+)\[(.*?)\]", llm_output, re.DOTALL)
+            if action_match:
+                tool_name = action_match.group(1).strip()
+                tool_input = action_match.group(2).strip()
+                if tool_name in self.tools:
+                    print(f"Executing Tool: {tool_name}, Input: {tool_input}")
+                    try:
+                        observation = self.tools[tool_name](tool_input)
+                    except Exception as e:
+                        observation = f"Error executing tool {tool_name}: {e}"
+                    print(f"Observation: {observation[:200]}...") # Print truncated observation
+                    scratchpad += f"Observation: {observation}\n"
+                else:
+                    print(f"Unknown tool: {tool_name}")
+                    scratchpad += f"Observation: Error - Unknown tool '{tool_name}'. Available tools: {self.tool_names}\n"
+            else:
+                # If no action, it might be just a thought, or malformed. Add the thought to scratchpad.
+                # Or it might be the LLM directly trying to answer without "Final Answer:"
+                # We assume the LLM is trying to continue the thought process or has given up.
+                print("No valid action found in LLM output for this iteration.")
+                # If the LLM isn't producing actions, it might be stuck or directly answering.
+                # We will let the loop continue, hoping it recovers or hits max_iterations/Final Answer.
+                # If it's a malformed output that isn't a Final Answer, it will just be added to scratchpad.
+            current_prompt = self.react_prompt_template.format(question=question, scratchpad=scratchpad)
+        print("Max iterations reached. Returning current scratchpad or best guess.")
+        # If max iterations reached without "Final Answer:", try to extract a plausible answer from the last thought
+        # or just return a message. This is a fallback.
+        last_thought_match = re.findall(r"Thought:\s*(.*)", scratchpad, re.IGNORECASE)
+        if last_thought_match:
+            return f"Max iterations reached. Last thought: {last_thought_match[-1].strip()}"
+        return "Agent failed to find an answer within the iteration limit."
+# --- Constants (from template) ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Main Execution Logic (from template, modified to use ReActAgent) ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the ReActAgent on them, submits all answers,
     and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        available_tools = {
+            "search_tool": search_tool,
+            "calculator_tool": calculator_tool,
+        }
+        if llm_client is None: # Check if llm_client was initialized
+             return "LLM Client could not be initialized. Check logs and HF_TOKEN.", None
+        agent = ReActAgent(llm_client=llm_client, tools=available_tools)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code not available (SPACE_ID not set)"
+    print(f"Agent code link: {agent_code}")
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
+        response = requests.get(questions_url, timeout=20) # Increased timeout
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     # 3. Run your Agent
     results_log = []
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"\n--- Processing Task ID: {task_id}, Question: {question_text[:100]}... ---")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Agent answer for task {task_id}: {submitted_answer[:100]}...")
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout for submission
         response.raise_for_status()
         result_data = response.json()
         final_status = (
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --- Build Gradio Interface using Blocks (from template) ---
 with gr.Blocks() as demo:
+    gr.Markdown("# ReAct Agent Evaluation Runner (GAIA Modified)")
     gr.Markdown(
         """
         **Instructions:**
+        1.  This Space implements a ReAct (Reasoning-Action) agent using an LLM from the Hugging Face Inference API.
         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        4.  The agent uses a search tool (DuckDuckGo) and a calculator tool.
         ---
         **Disclaimers:**
+        * LLM responses can be slow, and running through all questions will take time.
+        * The agent's performance depends heavily on the chosen LLM and the quality of its ReAct prompting.
+        * You may need to set an `HF_TOKEN` in your Space secrets if you use a gated model or encounter rate limits.
+        * The calculator tool uses `eval()` which has security implications if not carefully managed. For this specific benchmark it is a common simplification.
         """
     )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Removed max_rows
     run_button.click(
         fn=run_and_submit_all,
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    if llm_client is None:
+        print("⚠️ LLM Client (HfInference) was not initialized. The agent will not work.")
+        print("   Please check if you need to set the HF_TOKEN secret in your Space settings,")
+        print(f"   and ensure the model '{LLM_MODEL}' is accessible via the Inference API.")
+    else:
+        print(f"✅ LLM Client initialized with model: {LLM_MODEL}")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for ReAct Agent Evaluation...")
     demo.launch(debug=True, share=False)