Final_Assignment_Template_Final

Sleeping

App Files Files Community

mujtabarizvi commited on May 17, 2025

Commit

a170059

verified ·

1 Parent(s): 0d61bc4

Update app.py

Browse files

Files changed (1) hide show

app.py +302 -180

app.py CHANGED Viewed

@@ -1,36 +1,26 @@
-import gradio as gr
-from transformers import HfAgent, load_tool
-from transformers.tools import Tool
-from huggingface_hub import login
 import os
-import re
-# --- Configuration ---
-# IMPORTANT: Set your Hugging Face API token as an environment variable:
-# export HF_TOKEN="your_hf_token_here"
-# Or, you can use the login() function below if you prefer to enter it when the script runs.
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# LLM for the Agent (Mixtral is a strong choice for reasoning tasks)
-# Ensure this model is accessible via your HF token and has an inference endpoint.
-# Some models might require a Pro subscription or specific endpoint configurations.
-LLM_ENDPOINT = "mistralai/Mixtral-8x7B-Instruct-v0.1" # A powerful open-source model
 # --- Hugging Face Authentication ---
-try:
-    if HF_TOKEN:
-        login(token=HF_TOKEN, add_to_git_credential=False)
-        print("Successfully logged in to Hugging Face Hub.")
-    else:
-        print("HF_TOKEN environment variable not found. You might need to log in manually or set the token.")
-        # You could uncomment the next line to force a manual login if the token isn't set,
-        # but it's generally better to use environment variables for tokens.
-        # login()
-except Exception as e:
-    print(f"Error during Hugging Face login: {e}")
-    print("Please ensure your HF_TOKEN is set correctly or you can log in manually.")
-# --- Tool Definitions ---
 # 1. Calculator Tool
 class CalculatorTool(Tool):
@@ -40,45 +30,33 @@ class CalculatorTool(Tool):
         "Input should be a valid mathematical expression string (e.g., '2+2', '100/5*2', '(3.14+2.71)*4'). "
         "Only use standard arithmetic operators (+, -, *, /) and parentheses."
     )
     def __call__(self, expression: str):
         try:
-            # Basic validation to prevent unsafe expressions
             if not isinstance(expression, str):
                 return "Error: Input expression must be a string."
-            # Allow numbers, operators, parentheses, and spaces
-            # Disallow letters or other symbols to reduce risk with eval()
             if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
                 return "Error: Expression contains invalid characters. Only use numbers, operators (+, -, *, /), and parentheses."
             # Safely evaluate the expression
-            # For truly safe evaluation, a proper math expression parser (e.g., from ast import literal_eval for simple cases, or a dedicated library)
-            # would be better, but eval() is often used in agent examples with LLM-generated input.
-            # The regex above provides a basic guard.
-            result = eval(expression)
             return str(result)
         except Exception as e:
-            # Catch any other errors during evaluation
             return f"Error during calculation: {str(e)}. Please ensure the expression is valid."
 # 2. Web Search Tool (using Hugging Face's wrapper for DuckDuckGo)
 # This tool needs the `duckduckgo-search` library: pip install duckduckgo-search
-try:
-    search_tool = load_tool("HuggingFaceH4/duckduckgo_search",
-                            device_map="auto", # try to use GPU if available
-                            trust_remote_code=True) # Required for some community tools
-    print("DuckDuckGo search tool loaded successfully.")
-except Exception as e:
-    print(f"Error loading DuckDuckGo search tool: {e}")
-    print("Please ensure 'duckduckgo-search' library is installed: pip install duckduckgo-search")
-    search_tool = None
-# --- Agent Initialization ---
-# The system prompt guides the agent's behavior.
-# HfAgent uses a default prompt structure, but we can provide a custom system_prompt.
-# This prompt encourages ReAct-style reasoning.
-agent_system_prompt = """
 You are a highly capable and meticulous AI assistant. Your task is to answer user questions accurately and comprehensively.
 To achieve this, you have access to the following tools:
 {tool_descriptions}
@@ -100,146 +78,290 @@ Follow this process for each user query:
 Important guidelines:
 -   **Accuracy is key:** Prioritize correctness. If you cannot find the information or are unsure, state that. Do not invent facts.
--   **Tool Use:** Use tools only when necessary. If the question is simple or conversational, answer directly. For factual queries requiring up-to-date information or calculations, use your tools.
--   **Search Effectively:** When using the search tool, formulate concise and targeted search queries. If initial results are not helpful, try rephrasing your query.
--   **Calculations:** For any numerical calculations, use the calculator tool to ensure accuracy, even for seemingly simple ones.
--   **Multi-step Reasoning:** Break down complex questions into smaller, manageable steps. Use tools sequentially if needed, using the output of one step as input for the next.
--   **Clarity:** Explain your thought process (the "Thought:" parts) clearly so the user can understand your reasoning. The final answer should be direct.
 """
-# Initialize tools list
-tools_list = []
-if search_tool:
-    tools_list.append(search_tool)
-tools_list.append(CalculatorTool())
-# Initialize the HfAgent
-# It's crucial that the LLM (LLM_ENDPOINT) is compatible with the agent's prompting style.
-# Mixtral-Instruct models are generally good for this.
-try:
-    if not HF_TOKEN:
-        raise ValueError("Hugging Face token is not set. Agent initialization will likely fail.")
-    print(f"Initializing HfAgent with LLM: {LLM_ENDPOINT}")
-    agent = HfAgent(
-        LLM_ENDPOINT,
-        tools=tools_list,
-        system_prompt=agent_system_prompt,
-        # You might need to specify chat_prompt_template for some models,
-        # but HfAgent often infers it or uses a default that works with instruct-tuned models.
-        # Example: chat_prompt_template = "..." (specific to model)
-        # Additional llm_kwargs can be passed if needed, e.g., for temperature, max_tokens
-        additional_llm_kwargs={"temperature": 0.1, "max_new_tokens": 1500} # Adjust as needed
-    )
-    print("HfAgent initialized successfully.")
-except Exception as e:
-    print(f"Error initializing HfAgent: {e}")
-    print("This might be due to an invalid HF token, issues with the LLM endpoint, or model compatibility.")
-    agent = None
-# --- Agent Interaction Function ---
-def run_gaia_agent(user_query: str):
-    """
-    Runs the GAIA agent with the given user query and returns the agent's thought process and final answer.
-    """
-    if agent is None:
-        return "Agent initialization failed. Please check the console for errors (e.g., HF token, LLM endpoint)."
-    print(f"\nUser Query: {user_query}")
-    # The HfAgent's .run() method executes the ReAct loop.
-    # It can return just the final answer or include the intermediate steps (thoughts, actions, observations).
-    # For debugging and understanding, showing the full chat is useful.
-    # The `run` method might produce a generator or a final string depending on how it's implemented
-    # and if streaming is used. Here, we expect a final string output that includes the thought process.
-    # HfAgent's `chat` method is often preferred for conversational history and richer output.
-    # Let's try to capture the full interaction log.
-    # The output format of HfAgent can vary. Some versions might return a string, others a list of dicts.
-    # We aim to get a string that includes the agent's reasoning.
-    try:
-        # Using agent.chat() which typically yields intermediate steps or returns a list of messages.
-        # For simplicity in Gradio, we'll collect the stream if it's a generator.
-        response_stream = agent.chat(user_query, stream=True) # stream=True for intermediate thoughts
-        full_response = ""
-        for output_chunk in response_stream:
-            # output_chunk could be a string or a dict, depending on HfAgent version and setup
-            if isinstance(output_chunk, str):
-                full_response += output_chunk
-            elif isinstance(output_chunk, dict) and "content" in output_chunk: # Common for message formats
-                full_response += output_chunk["content"]
-            # Add more conditions if the structure is different
-            # This part is tricky as HfAgent's output structure for streaming isn't always just simple strings.
-            # The goal is to reconstruct the thought process.
-            # If `stream=True` gives complex objects, you might need to format them.
-            # If `agent.run(user_query, stream=False)` returns the full thought process as a string, that's simpler.
-            # Let's assume for now `agent.run()` without stream gives a good textual trace.
-        # Fallback or alternative: agent.run() might give a more direct trace.
-        # The exact method to get the full trace can depend on the HfAgent version.
-        # Let's try agent.run() and see its output structure.
-        # Typically, agent.run() returns the final answer, but the thought process is sent to the LLM.
-        # To display the thought process, we might need to tap into the agent's internal logging or use a custom ReAct loop.
-        # For this example, let's assume the HfAgent with the custom prompt will produce a string
-        # that includes thoughts and actions when run. If not, the prompt needs to guide it to output them.
-        # The system prompt asks it to "Explain your thought process (the "Thought:" parts) clearly".
-        # A common way HfAgent works is that the LLM's raw output contains these "Thought:", "Action:" blocks.
-        # The `agent.run()` or `agent.chat()` method then parses these.
-        # If we want to *show* them, we need the raw LLM generations or a mode that exposes them.
-        # Let's try a simpler approach: agent.run() and hope the LLM includes thoughts in its final output
-        # as per our prompt, or that HfAgent has a way to return the trace.
-        # The `HfAgent.run()` method is supposed to execute the full chain and return the final answer.
-        # The intermediate steps are part of the prompt sent to the LLM.
-        # To *display* these steps, we often need to run a more manual loop or use a callback.
-        # For the purpose of this template, we'll rely on the LLM being verbose due to the prompt.
-        # A more advanced setup might involve custom callbacks in HfAgent or a manual ReAct loop.
-        # Let's try to get the agent's raw textual output which should include thoughts/actions
-        # based on the prompt.
-        final_answer_or_trace = agent.run(user_query, return_prompt=False) # return_prompt=True gives the full prompt
-        print(f"Agent Response:\n{final_answer_or_trace}")
-        return final_answer_or_trace
     except Exception as e:
-        print(f"Error during agent execution: {e}")
-        return f"An error occurred: {str(e)}. Please check the agent configuration and query."
-# --- Gradio Interface ---
-iface = gr.Interface(
-    fn=run_gaia_agent,
-    inputs=gr.Textbox(lines=3, placeholder="Enter your question for the GAIA agent...", label="Your Question"),
-    outputs=gr.Markdown(label="Agent's Response (including thought process)", sanitize_html=False), # Using Markdown for better formatting
-    title="🧠 GAIA Benchmark Agent 🚀",
-    description=(
-        "This agent uses a Large Language Model (LLM) with web search and calculator tools to answer complex questions. "
-        "It's designed to tackle GAIA-style benchmark questions. "
-        f"LLM: {LLM_ENDPOINT}. Tools: DuckDuckGo Search, Calculator.\n"
-        "Enter your Hugging Face Token as an environment variable (HF_TOKEN) before running."
-        "The agent will show its thought process and actions before providing the final answer."
-    ),
-    examples=[
-        ["What is the current population of the capital of Canada, and what is the square root of that number?"],
-        ["Who was the director of the movie 'Inception', and what is the cube of the number of Oscars it won?"],
-        ["If a car travels 200 miles in 4 hours, what is its average speed in km/h? (1 mile = 1.60934 km)"],
-        ["Find the birth dates of the first three US presidents. Then, calculate the average age they lived to, assuming they all died on Jan 1st of the year they passed away (this is a simplification for calculation)."]
-    ],
-    allow_flagging="never",
-    theme=gr.themes.Soft() # Using a soft theme
-)
 if __name__ == "__main__":
-    if agent is None:
-        print("Cannot start Gradio app because agent initialization failed. See errors above.")
-        print("Common issues: HF_TOKEN not set or invalid, LLM endpoint inaccessible, missing dependencies.")
     else:
-        print("Launching Gradio Interface...")
-        iface.launch()

 import os
+import gradio as gr
+import requests
+import pandas as pd
+import re # For CalculatorTool validation
+# --- Hugging Face and Agent Specific Imports ---
+# MODIFIED IMPORT: HfAgent and load_tool are typically in transformers.agents
+from transformers.agents import HfAgent, load_tool
+from transformers.agents.tools import Tool # For custom tool definition
+from huggingface_hub import login
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# LLM for the HfAgent (Mixtral is a strong choice for reasoning tasks)
+LLM_ENDPOINT = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 # --- Hugging Face Authentication ---
+# The HF_TOKEN should be set as a secret in your Hugging Face Space settings.
+# The agent initialization will attempt to use it.
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# --- Tool Definitions for GAIA Agent ---
 # 1. Calculator Tool
 class CalculatorTool(Tool):
         "Input should be a valid mathematical expression string (e.g., '2+2', '100/5*2', '(3.14+2.71)*4'). "
         "Only use standard arithmetic operators (+, -, *, /) and parentheses."
     )
+    # Explicitly define inputs and outputs for clarity with HfAgent
+    inputs = {"expression": {"type": "text", "description": "The mathematical expression to evaluate."}}
+    output_type = "text"
     def __call__(self, expression: str):
         try:
             if not isinstance(expression, str):
                 return "Error: Input expression must be a string."
+            # Basic validation to prevent unsafe expressions
             if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
                 return "Error: Expression contains invalid characters. Only use numbers, operators (+, -, *, /), and parentheses."
             # Safely evaluate the expression
+            result = eval(expression) # Be cautious with eval in broader applications
             return str(result)
         except Exception as e:
             return f"Error during calculation: {str(e)}. Please ensure the expression is valid."
 # 2. Web Search Tool (using Hugging Face's wrapper for DuckDuckGo)
 # This tool needs the `duckduckgo-search` library: pip install duckduckgo-search
+# It will be loaded within the GaiaHfAgent's __init__ method.
+# --- GAIA Agent Definition ---
+# This system prompt guides the HfAgent's behavior.
+AGENT_SYSTEM_PROMPT = """
 You are a highly capable and meticulous AI assistant. Your task is to answer user questions accurately and comprehensively.
 To achieve this, you have access to the following tools:
 {tool_descriptions}
 Important guidelines:
 -   **Accuracy is key:** Prioritize correctness. If you cannot find the information or are unsure, state that. Do not invent facts.
+-   **Tool Use:** Use tools only when necessary. For factual queries requiring up-to-date information or calculations, use your tools.
+-   **Search Effectively:** When using the search tool, formulate concise and targeted search queries.
+-   **Calculations:** For any numerical calculations, use the calculator tool to ensure accuracy.
+-   **Multi-step Reasoning:** Break down complex questions into smaller, manageable steps.
+-   **Clarity:** Your thought process (intermediate steps) will be logged, but the final output should be just the answer.
 """
+class GaiaHfAgent:
+    def __init__(self):
+        print("Initializing GaiaHfAgent...")
+        self.hf_agent = None # Initialize to None
+        if not HF_TOKEN:
+            print("ERROR: HF_TOKEN environment variable not found. GaiaHfAgent cannot be initialized.")
+            raise ValueError("HF_TOKEN is not set. Please set it as a secret in your Hugging Face Space.")
+        try:
+            login(token=HF_TOKEN, add_to_git_credential=False)
+            print("Successfully logged in to Hugging Face Hub for GaiaHfAgent.")
+        except Exception as e:
+            print(f"Error during Hugging Face login for GaiaHfAgent: {e}")
+            raise ConnectionError(f"Hugging Face login failed: {e}")
+        # Load tools
+        tools_list = []
+        try:
+            print("Loading DuckDuckGo search tool...")
+            # Note: device_map might not be relevant for all tools, especially API-based ones.
+            # trust_remote_code=True is important for community tools.
+            search_tool = load_tool(
+                "HuggingFaceH4/duckduckgo_search",
+                # device_map="auto", # Can be removed if tool doesn't use local models
+                trust_remote_code=True
+            )
+            tools_list.append(search_tool)
+            print("DuckDuckGo search tool loaded.")
+        except Exception as e:
+            print(f"Error loading DuckDuckGo search tool: {e}. Search functionality will be unavailable.")
+            # Optionally, you could decide to raise an error if search is critical
+            # raise ToolNotAvailableError(f"Failed to load search tool: {e}")
+        tools_list.append(CalculatorTool())
+        print("Calculator tool prepared.")
+        if not tools_list: # Check if any tool was actually loaded
+             print("WARNING: No tools were successfully loaded for GaiaHfAgent. Search tool might be missing.")
+        elif len(tools_list) == 1 and isinstance(tools_list[0], CalculatorTool):
+             print("WARNING: Only Calculator tool was loaded. Search tool might be missing.")
+        try:
+            print(f"Initializing HfAgent with LLM: {LLM_ENDPOINT}")
+            self.hf_agent = HfAgent(
+                LLM_ENDPOINT,
+                tools=tools_list,
+                system_prompt=AGENT_SYSTEM_PROMPT,
+                additional_llm_kwargs={"temperature": 0.1, "max_new_tokens": 1024} # Adjust as needed
+            )
+            print("GaiaHfAgent HfAgent component initialized successfully.")
+        except Exception as e:
+            print(f"CRITICAL Error initializing HfAgent component: {e}")
+            # This is critical, so we should raise an error to stop execution if HfAgent fails
+            raise RuntimeError(f"Failed to initialize HfAgent: {e}")
+        print("GaiaHfAgent fully initialized.")
+    def __call__(self, question: str) -> str:
+        print(f"GaiaHfAgent received question (first 100 chars): {question[:100]}...")
+        if self.hf_agent is None:
+            print("ERROR: GaiaHfAgent's HfAgent component is not initialized. Returning error message.")
+            return "Error: Agent not initialized. Check logs."
+        try:
+            # HfAgent.run() executes the ReAct loop and returns the final answer.
+            # The thought process, actions, and observations are handled internally by HfAgent
+            # and typically logged by the transformers library if logging is configured.
+            # For the submission, we only need the final textual answer.
+            print("Running HfAgent to get the answer...")
+            answer = self.hf_agent.run(question, stream=False) # stream=False to get final answer directly
+            # The 'answer' from HfAgent.run() should be the final string.
+            # If it's a more complex object (e.g. a dict or generator if stream=True was used),
+            # you'd need to parse it here. For stream=False, it's typically the string.
+            if not isinstance(answer, str):
+                print(f"Warning: HfAgent returned a non-string type: {type(answer)}. Converting to string.")
+                answer = str(answer)
+            print(f"GaiaHfAgent returning answer (first 100 chars): {answer[:100]}...")
+            return answer
+        except Exception as e:
+            print(f"Error during GaiaHfAgent execution for question '{question[:50]}...': {e}")
+            return f"AGENT EXECUTION ERROR: {str(e)}"
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the GaiaHfAgent on them, submits all answers,
+    and displays the results.
+    """
+    space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    if not HF_TOKEN:
+        no_token_message = "ERROR: HF_TOKEN secret is not set in this Space. The agent cannot operate. Please ask the Space owner to set it."
+        print(no_token_message)
+        return no_token_message, None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent (MODIFIED TO USE GaiaHfAgent)
+    try:
+        print("Attempting to instantiate GaiaHfAgent...")
+        agent = GaiaHfAgent() # <<<< MODIFIED HERE
+        print("GaiaHfAgent instantiated successfully.")
     except Exception as e:
+        error_msg = f"Fatal Error initializing GaiaHfAgent: {e}. Cannot proceed with evaluation."
+        print(error_msg)
+        # Provide more specific feedback if it's a known initialization issue
+        if "HF_TOKEN is not set" in str(e):
+            error_msg = "Fatal Error: The HF_TOKEN secret is missing or not accessible. The agent cannot start. Please ensure it's set in the Space settings."
+        elif "login failed" in str(e) or "authentication" in str(e).lower():
+            error_msg = "Fatal Error: Hugging Face login failed. Check if the HF_TOKEN is valid and has 'read' permissions. The agent cannot start."
+        elif "Failed to initialize HfAgent" in str(e):
+             error_msg = f"Fatal Error: Core HfAgent component failed to initialize: {e}. This could be due to issues with the LLM endpoint ({LLM_ENDPOINT}) or tool setup."
+        elif "ToolNotAvailableError" in str(e): # Example if you add custom tool errors
+             error_msg = f"Fatal Error: A required tool for the agent failed to load: {e}"
+        return error_msg, None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code link not available (SPACE_ID not set)"
+    print(f"Agent code link: {agent_code}")
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=20) # Increased timeout
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e: # Catch any other unexpected errors
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running GaiaHfAgent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            print(f"\nProcessing Task ID: {task_id}, Question: {question_text[:100]}...")
+            submitted_answer = agent(question_text) # Agent's __call__ method is invoked
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Task ID {task_id} processed. Answer (first 100): {submitted_answer[:100]}")
+        except Exception as e:
+             print(f"Error running agent on task {task_id} ('{question_text[:50]}...'): {e}")
+             # Log the error but continue with other questions
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUNTIME ERROR: {e}"})
+             # Do not add to answers_payload if agent failed for this question
+    if not answers_payload: # If all questions resulted in agent errors
+        print("Agent did not produce any valid answers to submit (all tasks might have resulted in errors).")
+        # Still return results_log to show the errors
+        return "Agent did not produce any valid answers to submit. Check logs for errors.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"GaiaHfAgent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=90) # Increased timeout for submission
+        response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX)
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json() # Try to get more details from JSON response
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError: # If response is not JSON
+            error_detail += f" Response: {e.response.text[:500]}" # Show first 500 chars of text response
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e: # Catch other network-related errors
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e: # Catch any other unexpected errors during submission
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo: # Added a theme
+    gr.Markdown("# GAIA Benchmark Agent Runner 🚀")
+    gr.Markdown(
+        f"""
+        **Instructions:**
+        1.  This Space runs a **GAIA-style Agent** using `transformers.HfAgent` with the `{LLM_ENDPOINT}` model.
+        2.  It uses **DuckDuckGo Search** and a **Calculator** tool.
+        3.  **IMPORTANT:** The Space owner must set the `HF_TOKEN` in the Space secrets for the agent to work.
+        4.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        5.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        -   Processing all questions can take significant time (several minutes) depending on the LLM and question complexity.
+        -   Ensure your `HF_TOKEN` has 'read' access.
+        -   The agent's performance depends on the LLM, prompt, and tool effectiveness.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False) # Increased lines
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400) # Added height
+    run_button.click(
+        fn=run_and_submit_all,
+        inputs=None, # No direct input from UI other than login profile
+        outputs=[status_output, results_table],
+        # api_name="run_evaluation" # Optional: if you want to expose this as an API endpoint
+    )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " GAIA Agent App Starting " + "-"*30)
+    # Check for critical environment variables at startup
+    if not HF_TOKEN:
+        print("🔴 WARNING: HF_TOKEN environment variable is NOT SET at startup.")
+        print("   The agent will likely FAIL to initialize. Please set HF_TOKEN as a secret in your Space settings.")
+    else:
+        print(f"✅ HF_TOKEN found (length: {len(HF_TOKEN)}). Agent will attempt to use it.")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" GAIA Agent App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
+    demo.launch(debug=True, share=False) # share=False for security if not needed