mujtabarizvi commited on
Commit
a170059
·
verified ·
1 Parent(s): 0d61bc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +302 -180
app.py CHANGED
@@ -1,36 +1,26 @@
1
- import gradio as gr
2
- from transformers import HfAgent, load_tool
3
- from transformers.tools import Tool
4
- from huggingface_hub import login
5
  import os
6
- import re
 
 
 
7
 
8
- # --- Configuration ---
9
- # IMPORTANT: Set your Hugging Face API token as an environment variable:
10
- # export HF_TOKEN="your_hf_token_here"
11
- # Or, you can use the login() function below if you prefer to enter it when the script runs.
12
- HF_TOKEN = os.environ.get("HF_TOKEN")
13
 
14
- # LLM for the Agent (Mixtral is a strong choice for reasoning tasks)
15
- # Ensure this model is accessible via your HF token and has an inference endpoint.
16
- # Some models might require a Pro subscription or specific endpoint configurations.
17
- LLM_ENDPOINT = "mistralai/Mixtral-8x7B-Instruct-v0.1" # A powerful open-source model
18
 
19
  # --- Hugging Face Authentication ---
20
- try:
21
- if HF_TOKEN:
22
- login(token=HF_TOKEN, add_to_git_credential=False)
23
- print("Successfully logged in to Hugging Face Hub.")
24
- else:
25
- print("HF_TOKEN environment variable not found. You might need to log in manually or set the token.")
26
- # You could uncomment the next line to force a manual login if the token isn't set,
27
- # but it's generally better to use environment variables for tokens.
28
- # login()
29
- except Exception as e:
30
- print(f"Error during Hugging Face login: {e}")
31
- print("Please ensure your HF_TOKEN is set correctly or you can log in manually.")
32
 
33
- # --- Tool Definitions ---
34
 
35
  # 1. Calculator Tool
36
  class CalculatorTool(Tool):
@@ -40,45 +30,33 @@ class CalculatorTool(Tool):
40
  "Input should be a valid mathematical expression string (e.g., '2+2', '100/5*2', '(3.14+2.71)*4'). "
41
  "Only use standard arithmetic operators (+, -, *, /) and parentheses."
42
  )
 
 
 
 
43
 
44
  def __call__(self, expression: str):
45
  try:
46
- # Basic validation to prevent unsafe expressions
47
  if not isinstance(expression, str):
48
  return "Error: Input expression must be a string."
49
 
50
- # Allow numbers, operators, parentheses, and spaces
51
- # Disallow letters or other symbols to reduce risk with eval()
52
  if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
53
  return "Error: Expression contains invalid characters. Only use numbers, operators (+, -, *, /), and parentheses."
54
 
55
  # Safely evaluate the expression
56
- # For truly safe evaluation, a proper math expression parser (e.g., from ast import literal_eval for simple cases, or a dedicated library)
57
- # would be better, but eval() is often used in agent examples with LLM-generated input.
58
- # The regex above provides a basic guard.
59
- result = eval(expression)
60
  return str(result)
61
  except Exception as e:
62
- # Catch any other errors during evaluation
63
  return f"Error during calculation: {str(e)}. Please ensure the expression is valid."
64
 
65
  # 2. Web Search Tool (using Hugging Face's wrapper for DuckDuckGo)
66
  # This tool needs the `duckduckgo-search` library: pip install duckduckgo-search
67
- try:
68
- search_tool = load_tool("HuggingFaceH4/duckduckgo_search",
69
- device_map="auto", # try to use GPU if available
70
- trust_remote_code=True) # Required for some community tools
71
- print("DuckDuckGo search tool loaded successfully.")
72
- except Exception as e:
73
- print(f"Error loading DuckDuckGo search tool: {e}")
74
- print("Please ensure 'duckduckgo-search' library is installed: pip install duckduckgo-search")
75
- search_tool = None
76
-
77
- # --- Agent Initialization ---
78
- # The system prompt guides the agent's behavior.
79
- # HfAgent uses a default prompt structure, but we can provide a custom system_prompt.
80
- # This prompt encourages ReAct-style reasoning.
81
- agent_system_prompt = """
82
  You are a highly capable and meticulous AI assistant. Your task is to answer user questions accurately and comprehensively.
83
  To achieve this, you have access to the following tools:
84
  {tool_descriptions}
@@ -100,146 +78,290 @@ Follow this process for each user query:
100
 
101
  Important guidelines:
102
  - **Accuracy is key:** Prioritize correctness. If you cannot find the information or are unsure, state that. Do not invent facts.
103
- - **Tool Use:** Use tools only when necessary. If the question is simple or conversational, answer directly. For factual queries requiring up-to-date information or calculations, use your tools.
104
- - **Search Effectively:** When using the search tool, formulate concise and targeted search queries. If initial results are not helpful, try rephrasing your query.
105
- - **Calculations:** For any numerical calculations, use the calculator tool to ensure accuracy, even for seemingly simple ones.
106
- - **Multi-step Reasoning:** Break down complex questions into smaller, manageable steps. Use tools sequentially if needed, using the output of one step as input for the next.
107
- - **Clarity:** Explain your thought process (the "Thought:" parts) clearly so the user can understand your reasoning. The final answer should be direct.
108
  """
109
 
110
- # Initialize tools list
111
- tools_list = []
112
- if search_tool:
113
- tools_list.append(search_tool)
114
- tools_list.append(CalculatorTool())
115
 
116
- # Initialize the HfAgent
117
- # It's crucial that the LLM (LLM_ENDPOINT) is compatible with the agent's prompting style.
118
- # Mixtral-Instruct models are generally good for this.
119
- try:
120
- if not HF_TOKEN:
121
- raise ValueError("Hugging Face token is not set. Agent initialization will likely fail.")
122
-
123
- print(f"Initializing HfAgent with LLM: {LLM_ENDPOINT}")
124
- agent = HfAgent(
125
- LLM_ENDPOINT,
126
- tools=tools_list,
127
- system_prompt=agent_system_prompt,
128
- # You might need to specify chat_prompt_template for some models,
129
- # but HfAgent often infers it or uses a default that works with instruct-tuned models.
130
- # Example: chat_prompt_template = "..." (specific to model)
131
- # Additional llm_kwargs can be passed if needed, e.g., for temperature, max_tokens
132
- additional_llm_kwargs={"temperature": 0.1, "max_new_tokens": 1500} # Adjust as needed
133
- )
134
- print("HfAgent initialized successfully.")
135
- except Exception as e:
136
- print(f"Error initializing HfAgent: {e}")
137
- print("This might be due to an invalid HF token, issues with the LLM endpoint, or model compatibility.")
138
- agent = None
139
-
140
- # --- Agent Interaction Function ---
141
- def run_gaia_agent(user_query: str):
142
- """
143
- Runs the GAIA agent with the given user query and returns the agent's thought process and final answer.
144
- """
145
- if agent is None:
146
- return "Agent initialization failed. Please check the console for errors (e.g., HF token, LLM endpoint)."
147
 
148
- print(f"\nUser Query: {user_query}")
149
-
150
- # The HfAgent's .run() method executes the ReAct loop.
151
- # It can return just the final answer or include the intermediate steps (thoughts, actions, observations).
152
- # For debugging and understanding, showing the full chat is useful.
153
- # The `run` method might produce a generator or a final string depending on how it's implemented
154
- # and if streaming is used. Here, we expect a final string output that includes the thought process.
155
-
156
- # HfAgent's `chat` method is often preferred for conversational history and richer output.
157
- # Let's try to capture the full interaction log.
158
- # The output format of HfAgent can vary. Some versions might return a string, others a list of dicts.
159
- # We aim to get a string that includes the agent's reasoning.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- try:
162
- # Using agent.chat() which typically yields intermediate steps or returns a list of messages.
163
- # For simplicity in Gradio, we'll collect the stream if it's a generator.
164
- response_stream = agent.chat(user_query, stream=True) # stream=True for intermediate thoughts
165
-
166
- full_response = ""
167
- for output_chunk in response_stream:
168
- # output_chunk could be a string or a dict, depending on HfAgent version and setup
169
- if isinstance(output_chunk, str):
170
- full_response += output_chunk
171
- elif isinstance(output_chunk, dict) and "content" in output_chunk: # Common for message formats
172
- full_response += output_chunk["content"]
173
- # Add more conditions if the structure is different
174
-
175
- # This part is tricky as HfAgent's output structure for streaming isn't always just simple strings.
176
- # The goal is to reconstruct the thought process.
177
- # If `stream=True` gives complex objects, you might need to format them.
178
- # If `agent.run(user_query, stream=False)` returns the full thought process as a string, that's simpler.
179
- # Let's assume for now `agent.run()` without stream gives a good textual trace.
180
-
181
- # Fallback or alternative: agent.run() might give a more direct trace.
182
- # The exact method to get the full trace can depend on the HfAgent version.
183
- # Let's try agent.run() and see its output structure.
184
- # Typically, agent.run() returns the final answer, but the thought process is sent to the LLM.
185
- # To display the thought process, we might need to tap into the agent's internal logging or use a custom ReAct loop.
186
-
187
- # For this example, let's assume the HfAgent with the custom prompt will produce a string
188
- # that includes thoughts and actions when run. If not, the prompt needs to guide it to output them.
189
- # The system prompt asks it to "Explain your thought process (the "Thought:" parts) clearly".
190
-
191
- # A common way HfAgent works is that the LLM's raw output contains these "Thought:", "Action:" blocks.
192
- # The `agent.run()` or `agent.chat()` method then parses these.
193
- # If we want to *show* them, we need the raw LLM generations or a mode that exposes them.
194
 
195
- # Let's try a simpler approach: agent.run() and hope the LLM includes thoughts in its final output
196
- # as per our prompt, or that HfAgent has a way to return the trace.
197
- # The `HfAgent.run()` method is supposed to execute the full chain and return the final answer.
198
- # The intermediate steps are part of the prompt sent to the LLM.
199
- # To *display* these steps, we often need to run a more manual loop or use a callback.
200
-
201
- # For the purpose of this template, we'll rely on the LLM being verbose due to the prompt.
202
- # A more advanced setup might involve custom callbacks in HfAgent or a manual ReAct loop.
 
 
 
 
 
203
 
204
- # Let's try to get the agent's raw textual output which should include thoughts/actions
205
- # based on the prompt.
206
- final_answer_or_trace = agent.run(user_query, return_prompt=False) # return_prompt=True gives the full prompt
 
 
 
 
207
 
208
- print(f"Agent Response:\n{final_answer_or_trace}")
209
- return final_answer_or_trace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
 
 
 
 
 
211
  except Exception as e:
212
- print(f"Error during agent execution: {e}")
213
- return f"An error occurred: {str(e)}. Please check the agent configuration and query."
214
-
215
-
216
- # --- Gradio Interface ---
217
- iface = gr.Interface(
218
- fn=run_gaia_agent,
219
- inputs=gr.Textbox(lines=3, placeholder="Enter your question for the GAIA agent...", label="Your Question"),
220
- outputs=gr.Markdown(label="Agent's Response (including thought process)", sanitize_html=False), # Using Markdown for better formatting
221
- title="🧠 GAIA Benchmark Agent 🚀",
222
- description=(
223
- "This agent uses a Large Language Model (LLM) with web search and calculator tools to answer complex questions. "
224
- "It's designed to tackle GAIA-style benchmark questions. "
225
- f"LLM: {LLM_ENDPOINT}. Tools: DuckDuckGo Search, Calculator.\n"
226
- "Enter your Hugging Face Token as an environment variable (HF_TOKEN) before running."
227
- "The agent will show its thought process and actions before providing the final answer."
228
- ),
229
- examples=[
230
- ["What is the current population of the capital of Canada, and what is the square root of that number?"],
231
- ["Who was the director of the movie 'Inception', and what is the cube of the number of Oscars it won?"],
232
- ["If a car travels 200 miles in 4 hours, what is its average speed in km/h? (1 mile = 1.60934 km)"],
233
- ["Find the birth dates of the first three US presidents. Then, calculate the average age they lived to, assuming they all died on Jan 1st of the year they passed away (this is a simplification for calculation)."]
234
- ],
235
- allow_flagging="never",
236
- theme=gr.themes.Soft() # Using a soft theme
237
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  if __name__ == "__main__":
240
- if agent is None:
241
- print("Cannot start Gradio app because agent initialization failed. See errors above.")
242
- print("Common issues: HF_TOKEN not set or invalid, LLM endpoint inaccessible, missing dependencies.")
 
 
 
 
 
 
 
 
 
 
243
  else:
244
- print("Launching Gradio Interface...")
245
- iface.launch()
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ import re # For CalculatorTool validation
6
 
7
+ # --- Hugging Face and Agent Specific Imports ---
8
+ # MODIFIED IMPORT: HfAgent and load_tool are typically in transformers.agents
9
+ from transformers.agents import HfAgent, load_tool
10
+ from transformers.agents.tools import Tool # For custom tool definition
11
+ from huggingface_hub import login
12
 
13
+ # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
+ # LLM for the HfAgent (Mixtral is a strong choice for reasoning tasks)
16
+ LLM_ENDPOINT = "mistralai/Mixtral-8x7B-Instruct-v0.1"
17
 
18
  # --- Hugging Face Authentication ---
19
+ # The HF_TOKEN should be set as a secret in your Hugging Face Space settings.
20
+ # The agent initialization will attempt to use it.
21
+ HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
 
 
 
 
 
 
 
22
 
23
+ # --- Tool Definitions for GAIA Agent ---
24
 
25
  # 1. Calculator Tool
26
  class CalculatorTool(Tool):
 
30
  "Input should be a valid mathematical expression string (e.g., '2+2', '100/5*2', '(3.14+2.71)*4'). "
31
  "Only use standard arithmetic operators (+, -, *, /) and parentheses."
32
  )
33
+ # Explicitly define inputs and outputs for clarity with HfAgent
34
+ inputs = {"expression": {"type": "text", "description": "The mathematical expression to evaluate."}}
35
+ output_type = "text"
36
+
37
 
38
  def __call__(self, expression: str):
39
  try:
 
40
  if not isinstance(expression, str):
41
  return "Error: Input expression must be a string."
42
 
43
+ # Basic validation to prevent unsafe expressions
 
44
  if not re.match(r"^[0-9\.\+\-\*\/\(\)\s]+$", expression):
45
  return "Error: Expression contains invalid characters. Only use numbers, operators (+, -, *, /), and parentheses."
46
 
47
  # Safely evaluate the expression
48
+ result = eval(expression) # Be cautious with eval in broader applications
 
 
 
49
  return str(result)
50
  except Exception as e:
 
51
  return f"Error during calculation: {str(e)}. Please ensure the expression is valid."
52
 
53
  # 2. Web Search Tool (using Hugging Face's wrapper for DuckDuckGo)
54
  # This tool needs the `duckduckgo-search` library: pip install duckduckgo-search
55
+ # It will be loaded within the GaiaHfAgent's __init__ method.
56
+
57
+ # --- GAIA Agent Definition ---
58
+ # This system prompt guides the HfAgent's behavior.
59
+ AGENT_SYSTEM_PROMPT = """
 
 
 
 
 
 
 
 
 
 
60
  You are a highly capable and meticulous AI assistant. Your task is to answer user questions accurately and comprehensively.
61
  To achieve this, you have access to the following tools:
62
  {tool_descriptions}
 
78
 
79
  Important guidelines:
80
  - **Accuracy is key:** Prioritize correctness. If you cannot find the information or are unsure, state that. Do not invent facts.
81
+ - **Tool Use:** Use tools only when necessary. For factual queries requiring up-to-date information or calculations, use your tools.
82
+ - **Search Effectively:** When using the search tool, formulate concise and targeted search queries.
83
+ - **Calculations:** For any numerical calculations, use the calculator tool to ensure accuracy.
84
+ - **Multi-step Reasoning:** Break down complex questions into smaller, manageable steps.
85
+ - **Clarity:** Your thought process (intermediate steps) will be logged, but the final output should be just the answer.
86
  """
87
 
88
+ class GaiaHfAgent:
89
+ def __init__(self):
90
+ print("Initializing GaiaHfAgent...")
91
+ self.hf_agent = None # Initialize to None
 
92
 
93
+ if not HF_TOKEN:
94
+ print("ERROR: HF_TOKEN environment variable not found. GaiaHfAgent cannot be initialized.")
95
+ raise ValueError("HF_TOKEN is not set. Please set it as a secret in your Hugging Face Space.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ try:
98
+ login(token=HF_TOKEN, add_to_git_credential=False)
99
+ print("Successfully logged in to Hugging Face Hub for GaiaHfAgent.")
100
+ except Exception as e:
101
+ print(f"Error during Hugging Face login for GaiaHfAgent: {e}")
102
+ raise ConnectionError(f"Hugging Face login failed: {e}")
103
+
104
+ # Load tools
105
+ tools_list = []
106
+ try:
107
+ print("Loading DuckDuckGo search tool...")
108
+ # Note: device_map might not be relevant for all tools, especially API-based ones.
109
+ # trust_remote_code=True is important for community tools.
110
+ search_tool = load_tool(
111
+ "HuggingFaceH4/duckduckgo_search",
112
+ # device_map="auto", # Can be removed if tool doesn't use local models
113
+ trust_remote_code=True
114
+ )
115
+ tools_list.append(search_tool)
116
+ print("DuckDuckGo search tool loaded.")
117
+ except Exception as e:
118
+ print(f"Error loading DuckDuckGo search tool: {e}. Search functionality will be unavailable.")
119
+ # Optionally, you could decide to raise an error if search is critical
120
+ # raise ToolNotAvailableError(f"Failed to load search tool: {e}")
121
+
122
+ tools_list.append(CalculatorTool())
123
+ print("Calculator tool prepared.")
124
+
125
+ if not tools_list: # Check if any tool was actually loaded
126
+ print("WARNING: No tools were successfully loaded for GaiaHfAgent. Search tool might be missing.")
127
+ elif len(tools_list) == 1 and isinstance(tools_list[0], CalculatorTool):
128
+ print("WARNING: Only Calculator tool was loaded. Search tool might be missing.")
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ try:
132
+ print(f"Initializing HfAgent with LLM: {LLM_ENDPOINT}")
133
+ self.hf_agent = HfAgent(
134
+ LLM_ENDPOINT,
135
+ tools=tools_list,
136
+ system_prompt=AGENT_SYSTEM_PROMPT,
137
+ additional_llm_kwargs={"temperature": 0.1, "max_new_tokens": 1024} # Adjust as needed
138
+ )
139
+ print("GaiaHfAgent HfAgent component initialized successfully.")
140
+ except Exception as e:
141
+ print(f"CRITICAL Error initializing HfAgent component: {e}")
142
+ # This is critical, so we should raise an error to stop execution if HfAgent fails
143
+ raise RuntimeError(f"Failed to initialize HfAgent: {e}")
144
 
145
+ print("GaiaHfAgent fully initialized.")
146
+
147
+ def __call__(self, question: str) -> str:
148
+ print(f"GaiaHfAgent received question (first 100 chars): {question[:100]}...")
149
+ if self.hf_agent is None:
150
+ print("ERROR: GaiaHfAgent's HfAgent component is not initialized. Returning error message.")
151
+ return "Error: Agent not initialized. Check logs."
152
 
153
+ try:
154
+ # HfAgent.run() executes the ReAct loop and returns the final answer.
155
+ # The thought process, actions, and observations are handled internally by HfAgent
156
+ # and typically logged by the transformers library if logging is configured.
157
+ # For the submission, we only need the final textual answer.
158
+ print("Running HfAgent to get the answer...")
159
+ answer = self.hf_agent.run(question, stream=False) # stream=False to get final answer directly
160
+
161
+ # The 'answer' from HfAgent.run() should be the final string.
162
+ # If it's a more complex object (e.g. a dict or generator if stream=True was used),
163
+ # you'd need to parse it here. For stream=False, it's typically the string.
164
+ if not isinstance(answer, str):
165
+ print(f"Warning: HfAgent returned a non-string type: {type(answer)}. Converting to string.")
166
+ answer = str(answer)
167
+
168
+ print(f"GaiaHfAgent returning answer (first 100 chars): {answer[:100]}...")
169
+ return answer
170
+ except Exception as e:
171
+ print(f"Error during GaiaHfAgent execution for question '{question[:50]}...': {e}")
172
+ return f"AGENT EXECUTION ERROR: {str(e)}"
173
+
174
+
175
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
176
+ """
177
+ Fetches all questions, runs the GaiaHfAgent on them, submits all answers,
178
+ and displays the results.
179
+ """
180
+ space_id = os.getenv("SPACE_ID")
181
+ if profile:
182
+ username = f"{profile.username}"
183
+ print(f"User logged in: {username}")
184
+ else:
185
+ print("User not logged in.")
186
+ return "Please Login to Hugging Face with the button.", None
187
+
188
+ if not HF_TOKEN:
189
+ no_token_message = "ERROR: HF_TOKEN secret is not set in this Space. The agent cannot operate. Please ask the Space owner to set it."
190
+ print(no_token_message)
191
+ return no_token_message, None
192
+
193
+ api_url = DEFAULT_API_URL
194
+ questions_url = f"{api_url}/questions"
195
+ submit_url = f"{api_url}/submit"
196
 
197
+ # 1. Instantiate Agent (MODIFIED TO USE GaiaHfAgent)
198
+ try:
199
+ print("Attempting to instantiate GaiaHfAgent...")
200
+ agent = GaiaHfAgent() # <<<< MODIFIED HERE
201
+ print("GaiaHfAgent instantiated successfully.")
202
  except Exception as e:
203
+ error_msg = f"Fatal Error initializing GaiaHfAgent: {e}. Cannot proceed with evaluation."
204
+ print(error_msg)
205
+ # Provide more specific feedback if it's a known initialization issue
206
+ if "HF_TOKEN is not set" in str(e):
207
+ error_msg = "Fatal Error: The HF_TOKEN secret is missing or not accessible. The agent cannot start. Please ensure it's set in the Space settings."
208
+ elif "login failed" in str(e) or "authentication" in str(e).lower():
209
+ error_msg = "Fatal Error: Hugging Face login failed. Check if the HF_TOKEN is valid and has 'read' permissions. The agent cannot start."
210
+ elif "Failed to initialize HfAgent" in str(e):
211
+ error_msg = f"Fatal Error: Core HfAgent component failed to initialize: {e}. This could be due to issues with the LLM endpoint ({LLM_ENDPOINT}) or tool setup."
212
+ elif "ToolNotAvailableError" in str(e): # Example if you add custom tool errors
213
+ error_msg = f"Fatal Error: A required tool for the agent failed to load: {e}"
214
+ return error_msg, None
215
+
216
+
217
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code link not available (SPACE_ID not set)"
218
+ print(f"Agent code link: {agent_code}")
219
+
220
+ # 2. Fetch Questions
221
+ print(f"Fetching questions from: {questions_url}")
222
+ try:
223
+ response = requests.get(questions_url, timeout=20) # Increased timeout
224
+ response.raise_for_status()
225
+ questions_data = response.json()
226
+ if not questions_data:
227
+ print("Fetched questions list is empty.")
228
+ return "Fetched questions list is empty or invalid format.", None
229
+ print(f"Fetched {len(questions_data)} questions.")
230
+ except requests.exceptions.RequestException as e:
231
+ print(f"Error fetching questions: {e}")
232
+ return f"Error fetching questions: {e}", None
233
+ except requests.exceptions.JSONDecodeError as e:
234
+ print(f"Error decoding JSON response from questions endpoint: {e}")
235
+ print(f"Response text: {response.text[:500]}")
236
+ return f"Error decoding server response for questions: {e}", None
237
+ except Exception as e: # Catch any other unexpected errors
238
+ print(f"An unexpected error occurred fetching questions: {e}")
239
+ return f"An unexpected error occurred fetching questions: {e}", None
240
+
241
+ # 3. Run your Agent
242
+ results_log = []
243
+ answers_payload = []
244
+ print(f"Running GaiaHfAgent on {len(questions_data)} questions...")
245
+ for item in questions_data:
246
+ task_id = item.get("task_id")
247
+ question_text = item.get("question")
248
+ if not task_id or question_text is None:
249
+ print(f"Skipping item with missing task_id or question: {item}")
250
+ continue
251
+ try:
252
+ print(f"\nProcessing Task ID: {task_id}, Question: {question_text[:100]}...")
253
+ submitted_answer = agent(question_text) # Agent's __call__ method is invoked
254
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
255
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
256
+ print(f"Task ID {task_id} processed. Answer (first 100): {submitted_answer[:100]}")
257
+ except Exception as e:
258
+ print(f"Error running agent on task {task_id} ('{question_text[:50]}...'): {e}")
259
+ # Log the error but continue with other questions
260
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUNTIME ERROR: {e}"})
261
+ # Do not add to answers_payload if agent failed for this question
262
+
263
+ if not answers_payload: # If all questions resulted in agent errors
264
+ print("Agent did not produce any valid answers to submit (all tasks might have resulted in errors).")
265
+ # Still return results_log to show the errors
266
+ return "Agent did not produce any valid answers to submit. Check logs for errors.", pd.DataFrame(results_log)
267
+
268
+
269
+ # 4. Prepare Submission
270
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
271
+ status_update = f"GaiaHfAgent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
272
+ print(status_update)
273
+
274
+ # 5. Submit
275
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
276
+ try:
277
+ response = requests.post(submit_url, json=submission_data, timeout=90) # Increased timeout for submission
278
+ response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX)
279
+ result_data = response.json()
280
+ final_status = (
281
+ f"Submission Successful!\n"
282
+ f"User: {result_data.get('username')}\n"
283
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
284
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
285
+ f"Message: {result_data.get('message', 'No message received.')}"
286
+ )
287
+ print("Submission successful.")
288
+ results_df = pd.DataFrame(results_log)
289
+ return final_status, results_df
290
+ except requests.exceptions.HTTPError as e:
291
+ error_detail = f"Server responded with status {e.response.status_code}."
292
+ try:
293
+ error_json = e.response.json() # Try to get more details from JSON response
294
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
295
+ except requests.exceptions.JSONDecodeError: # If response is not JSON
296
+ error_detail += f" Response: {e.response.text[:500]}" # Show first 500 chars of text response
297
+ status_message = f"Submission Failed: {error_detail}"
298
+ print(status_message)
299
+ results_df = pd.DataFrame(results_log)
300
+ return status_message, results_df
301
+ except requests.exceptions.Timeout:
302
+ status_message = "Submission Failed: The request timed out."
303
+ print(status_message)
304
+ results_df = pd.DataFrame(results_log)
305
+ return status_message, results_df
306
+ except requests.exceptions.RequestException as e: # Catch other network-related errors
307
+ status_message = f"Submission Failed: Network error - {e}"
308
+ print(status_message)
309
+ results_df = pd.DataFrame(results_log)
310
+ return status_message, results_df
311
+ except Exception as e: # Catch any other unexpected errors during submission
312
+ status_message = f"An unexpected error occurred during submission: {e}"
313
+ print(status_message)
314
+ results_df = pd.DataFrame(results_log)
315
+ return status_message, results_df
316
+
317
+
318
+ # --- Build Gradio Interface using Blocks ---
319
+ with gr.Blocks(theme=gr.themes.Soft()) as demo: # Added a theme
320
+ gr.Markdown("# GAIA Benchmark Agent Runner 🚀")
321
+ gr.Markdown(
322
+ f"""
323
+ **Instructions:**
324
+ 1. This Space runs a **GAIA-style Agent** using `transformers.HfAgent` with the `{LLM_ENDPOINT}` model.
325
+ 2. It uses **DuckDuckGo Search** and a **Calculator** tool.
326
+ 3. **IMPORTANT:** The Space owner must set the `HF_TOKEN` in the Space secrets for the agent to work.
327
+ 4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
328
+ 5. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
329
+ ---
330
+ **Disclaimers:**
331
+ - Processing all questions can take significant time (several minutes) depending on the LLM and question complexity.
332
+ - Ensure your `HF_TOKEN` has 'read' access.
333
+ - The agent's performance depends on the LLM, prompt, and tool effectiveness.
334
+ """
335
+ )
336
+ gr.LoginButton()
337
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
338
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False) # Increased lines
339
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400) # Added height
340
+
341
+ run_button.click(
342
+ fn=run_and_submit_all,
343
+ inputs=None, # No direct input from UI other than login profile
344
+ outputs=[status_output, results_table],
345
+ # api_name="run_evaluation" # Optional: if you want to expose this as an API endpoint
346
+ )
347
 
348
  if __name__ == "__main__":
349
+ print("\n" + "-"*30 + " GAIA Agent App Starting " + "-"*30)
350
+
351
+ # Check for critical environment variables at startup
352
+ if not HF_TOKEN:
353
+ print("🔴 WARNING: HF_TOKEN environment variable is NOT SET at startup.")
354
+ print(" The agent will likely FAIL to initialize. Please set HF_TOKEN as a secret in your Space settings.")
355
+ else:
356
+ print(f"✅ HF_TOKEN found (length: {len(HF_TOKEN)}). Agent will attempt to use it.")
357
+
358
+ space_id_startup = os.getenv("SPACE_ID")
359
+ if space_id_startup:
360
+ print(f"✅ SPACE_ID found: {space_id_startup}")
361
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
362
  else:
363
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
364
+
365
+ print("-"*(60 + len(" GAIA Agent App Starting ")) + "\n")
366
+ print("Launching Gradio Interface for GAIA Agent Evaluation...")
367
+ demo.launch(debug=True, share=False) # share=False for security if not needed