jonathan9879 commited on
Commit
e3c5ce5
·
verified ·
1 Parent(s): 4e55bbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -183
app.py CHANGED
@@ -8,181 +8,134 @@ import re
8
  import time
9
  from google.api_core import exceptions
10
 
11
- # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
- MAX_ITERATIONS = 7 # Max steps in the ReAct loop
14
- MAX_RETRIES = 5 # NEW: Max retries for API calls
15
-
16
- # --- Tool Definitions (No changes here, kept for completeness) ---
17
 
 
18
  class WebSearchTool:
19
- """
20
- A tool to search the web using the Perplexity API.
21
- It returns a concise answer to a given query.
22
- """
23
  def __init__(self, api_key):
24
  self.api_key = api_key
25
  self.url = "https://api.perplexity.ai/chat/completions"
26
- print("WebSearchTool initialized.")
27
-
28
  def execute(self, query: str) -> str:
29
  print(f"Executing WebSearchTool with query: {query}")
30
- payload = {
31
- "model": "llama-3-sonar-small-32k-online",
32
- "messages": [
33
- # MODIFIED: A slightly better prompt for GAIA-style questions
34
- {"role": "system", "content": "You are a world-class research assistant. Answer the user's query based on verifiable public information. Be precise and comprehensive."},
35
- {"role": "user", "content": query}
36
- ]
37
- }
38
- headers = {
39
- "accept": "application/json",
40
- "content-type": "application/json",
41
- "Authorization": f"Bearer {self.api_key}"
42
- }
43
  try:
44
  response = requests.post(self.url, json=payload, headers=headers, timeout=30)
45
  response.raise_for_status()
46
- result = response.json()
47
- answer = result['choices'][0]['message']['content']
48
- print(f"WebSearchTool result: {answer[:150]}...")
49
- return answer
50
  except requests.exceptions.RequestException as e:
51
- print(f"Error calling Perplexity API: {e}")
52
  return f"Error: Could not get a response from the web search tool. {e}"
53
 
54
  class FileDownloaderTool:
55
- """
56
- A tool to download and read the content of a file associated with a task.
57
- The input should be the task_id.
58
- """
59
  def __init__(self, api_url: str):
60
  self.api_url = api_url
61
- print("FileDownloaderTool initialized.")
62
-
63
  def execute(self, task_id: str) -> str:
64
- print(f"Executing FileDownloaderTool for task_id: {task_id}")
65
  file_url = f"{self.api_url}/files/{task_id}"
66
  try:
67
  response = requests.get(file_url, timeout=20)
68
  response.raise_for_status()
69
  content = response.text
70
- print(f"FileDownloaderTool successfully read file for task {task_id}. Content length: {len(content)}")
71
- if len(content) > 5000:
72
- return f"File content (first 5000 chars):\n{content[:5000]}"
73
  return f"File content:\n{content}"
74
  except requests.exceptions.HTTPError as e:
75
- if e.response.status_code == 404:
76
- print(f"No file found for task_id: {task_id}")
77
- return "No file is associated with this task."
78
- else:
79
- print(f"HTTP error downloading file for task_id {task_id}: {e}")
80
- return f"Error: Failed to download file due to an HTTP error: {e}"
81
  except requests.exceptions.RequestException as e:
82
- print(f"Network error downloading file for task_id {task_id}: {e}")
83
  return f"Error: Failed to download file due to a network error: {e}"
84
 
85
-
86
  # --- GAIA Agent Definition ---
87
  class GAIAAgent:
88
  def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
89
  print("Initializing GAIAAgent...")
90
  genai.configure(api_key=gemini_api_key)
91
  self.model = genai.GenerativeModel('gemini-1.5-flash-latest')
 
92
 
93
- self.tools = {
94
- "WebSearch": WebSearchTool(api_key=pplx_api_key),
95
- "FileDownloader": FileDownloaderTool(api_url=api_url),
96
- }
97
-
98
- # MODIFIED: A simpler prompt for the initial zero-shot check
99
  self.zero_shot_prompt_template = """
100
- You are a helpful assistant. Your job is to answer the user's question directly and concisely.
101
- Do not explain your reasoning.
102
- Do not use tools.
103
- If you can answer the question with high confidence, provide the answer.
104
- If the question requires browsing the web, accessing a file, or performing complex calculations, respond with the single word: "UNSURE".
105
 
106
  Question: {question}
107
- Answer:
108
- """
109
 
 
110
  self.react_prompt_template = """
111
  You are a helpful assistant designed to answer questions accurately.
112
 
113
  To solve the user's question, you must use a sequence of thoughts and actions.
114
  You have access to the following tools:
115
 
116
- - **WebSearch[query]**: Use this to search the internet for up-to-date information, facts, or general knowledge.
117
- - **FileDownloader[task_id]**: Use this to download and read a file associated with the current task. The task_id is '{task_id}'.
118
 
119
  Your reasoning process should follow this format:
120
 
121
  Thought: I need to figure out what information is missing. I will use a tool to find it.
122
  Action: ToolName[input for the tool]
123
  Observation: [The result from the tool will be inserted here]
124
- ... (this Thought/Action/Observation cycle can repeat multiple times)
125
 
126
  Thought: I have now gathered enough information to answer the user's question.
127
  Final Answer: The final answer to the original question.
128
 
129
  **Important Rules:**
130
- 1. The `Action` line must be *exactly* in the format `ToolName[input]`. For example: `WebSearch[When was the Eiffel Tower built?]`.
131
  2. The `task_id` for the current question is '{task_id}'. Use it ONLY with the FileDownloader tool.
132
- 3. Once you have the final answer, do not use any more tools. State the final answer clearly after "Final Answer:". Your entire response should end here.
133
 
134
  Here is the question:
135
- {question}
136
- """
137
  print("GAIAAgent initialized successfully.")
138
 
139
- # NEW: Function to handle API calls with exponential backoff
140
  def _call_gemini_api_with_backoff(self, prompt_text):
141
  retries = 0
142
  while retries < MAX_RETRIES:
143
  try:
144
- print(f"Attempt {retries + 1} to call Gemini API...")
145
  response = self.model.generate_content(prompt_text)
146
  return response.text
147
  except exceptions.ResourceExhausted as e:
148
- print(f"API Rate Limit Exceeded (429). Waiting to retry... ({e.message})")
149
- wait_time = (2 ** retries) + 1 # Exponential backoff: 2, 3, 5, 9, 17 seconds
150
  time.sleep(wait_time)
151
  retries += 1
152
  except Exception as e:
153
- print(f"An unexpected error occurred with Gemini API: {e}")
154
  return f"AGENT_ERROR: An unexpected error occurred: {e}"
155
-
156
- print("Max retries reached. Failing.")
157
  return "AGENT_ERROR: API rate limit exceeded after multiple retries."
158
 
159
  def __call__(self, question: str, task_id: str) -> str:
160
  print(f"\n{'='*20}\nProcessing Task ID: {task_id}\nQuestion: {question[:100]}...")
161
 
162
- # === NEW: Step 1 - Zero-Shot Attempt ===
163
  print("--- Step 1: Zero-Shot Attempt ---")
164
  zero_shot_prompt = self.zero_shot_prompt_template.format(question=question)
165
  zero_shot_answer = self._call_gemini_api_with_backoff(zero_shot_prompt).strip()
166
 
167
- if "AGENT_ERROR" in zero_shot_answer:
168
- return zero_shot_answer # Propagate API failure
169
 
170
  if "UNSURE" not in zero_shot_answer.upper():
171
  print(f"Zero-shot successful! Answer: {zero_shot_answer}")
172
  return zero_shot_answer
173
 
174
- # === MODIFIED: Step 2 - ReAct Loop ===
175
  print("--- Step 2: Zero-shot failed, starting ReAct loop ---")
176
- react_prompt = self.react_prompt_template.format(question=question, task_id=task_id)
 
 
 
177
 
178
  for i in range(MAX_ITERATIONS):
179
  print(f"\n--- ReAct Iteration {i+1} ---")
180
 
181
- response_text = self._call_gemini_api_with_backoff(react_prompt)
182
  print(f"LLM Response:\n{response_text}")
183
 
184
- if "AGENT_ERROR" in response_text:
185
- return response_text # Propagate API failure
186
 
187
  final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
188
  if final_answer_match:
@@ -196,152 +149,86 @@ Here is the question:
196
  tool_input = action_match.group(2).strip()
197
 
198
  if tool_name in self.tools:
199
- print(f"Executing tool '{tool_name}' with input '{tool_input}'")
200
  tool = self.tools[tool_name]
201
  try:
202
- if tool_name == "FileDownloader":
203
- observation = tool.execute(task_id)
204
- else:
205
- observation = tool.execute(tool_input)
206
  except Exception as e:
207
  observation = f"Error executing tool: {e}"
208
-
209
- react_prompt += f"{response_text}\nObservation: {observation}\n"
210
  else:
211
- print(f"Error: Agent tried to use an unknown tool: {tool_name}")
212
- react_prompt += f"{response_text}\nObservation: Error - The tool '{tool_name}' does not exist.\n"
213
  else:
214
  print("Error: Agent did not provide a valid Action or Final Answer. Returning last response.")
215
  return response_text.strip()
216
 
217
- print("Agent reached max iterations without finding a final answer.")
218
- return "AGENT_ERROR: Agent could not determine the answer within the allowed number of steps."
219
-
220
 
 
221
  def run_and_submit_all(profile: gr.OAuthProfile | None):
222
- # This function is mostly the same, with one key change added.
223
  space_id = os.getenv("SPACE_ID")
224
-
225
- if profile:
226
- username = f"{profile.username}"
227
- print(f"User logged in: {username}")
228
- else:
229
- print("User not logged in.")
230
- return "Please Login to Hugging Face with the button.", None
231
 
232
  pplx_key = os.getenv("PPLX_API_KEY")
233
  gemini_key = os.getenv("GEMINI_API_KEY")
234
-
235
- if not pplx_key or not gemini_key:
236
- error_msg = "API keys not found in Space secrets. Please set PPLX_API_KEY and GEMINI_API_KEY."
237
- print(error_msg)
238
- return error_msg, None
239
 
240
  api_url = DEFAULT_API_URL
241
- questions_url = f"{api_url}/questions"
242
- submit_url = f"{api_url}/submit"
243
-
244
  try:
245
  agent = GAIAAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
246
- except Exception as e:
247
- print(f"Error instantiating agent: {e}")
248
- return f"Error initializing agent: {e}", None
249
-
250
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
251
- print(f"Agent code link: {agent_code}")
252
-
253
- try:
254
- response = requests.get(questions_url, timeout=15)
255
- response.raise_for_status()
256
- questions_data = response.json()
257
- if not questions_data:
258
- return "Fetched questions list is empty or invalid format.", None
259
- print(f"Fetched {len(questions_data)} questions.")
260
- except Exception as e:
261
- return f"Error fetching questions: {e}", None
262
 
263
- results_log = []
264
- answers_payload = []
265
- print(f"Running agent on {len(questions_data)} questions...")
266
  for item in questions_data:
267
- task_id = item.get("task_id")
268
- question_text = item.get("question")
269
- if not task_id or question_text is None:
270
- continue
271
  try:
272
  submitted_answer = agent(question_text, task_id)
273
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
274
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
275
  except Exception as e:
276
- print(f"Error running agent on task {task_id}: {e}")
277
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
278
 
279
- # NEW: Add a delay between each question to respect rate limits
280
- print(f"--- Waiting for 5 seconds before next question... ---")
281
- time.sleep(5)
282
-
283
- if not answers_payload:
284
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
285
 
 
 
 
286
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
287
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
288
- print(status_update)
289
-
290
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
291
  try:
292
- response = requests.post(submit_url, json=submission_data, timeout=120)
293
  response.raise_for_status()
294
  result_data = response.json()
295
- final_status = (
296
- f"Submission Successful!\n"
297
- f"User: {result_data.get('username')}\n"
298
- f"Overall Score: {result_data.get('score', 'N/A')}% "
299
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
300
- f"Message: {result_data.get('message', 'No message received.')}"
301
- )
302
- print("Submission successful.")
303
- results_df = pd.DataFrame(results_log)
304
- return final_status, results_df
305
- except requests.exceptions.HTTPError as e:
306
- error_detail = f"Server responded with status {e.response.status_code}."
307
- try:
308
- error_json = e.response.json()
309
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
310
- except requests.exceptions.JSONDecodeError:
311
- error_detail += f" Response: {e.response.text[:500]}"
312
- status_message = f"Submission Failed: {error_detail}"
313
- print(status_message)
314
- results_df = pd.DataFrame(results_log)
315
- return status_message, results_df
316
- except Exception as e:
317
- status_message = f"An unexpected error occurred during submission: {e}"
318
- print(status_message)
319
- results_df = pd.DataFrame(results_log)
320
- return status_message, results_df
321
-
322
- # --- Gradio Interface (No changes here) ---
323
  with gr.Blocks() as demo:
324
  gr.Markdown("# GAIA Agent Evaluation Runner")
325
- gr.Markdown(
326
- """
327
  **Instructions:**
328
  1. Ensure you have added your `PPLX_API_KEY` and `GEMINI_API_KEY` to this Space's **Settings > Secrets**.
329
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
330
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
331
  ---
332
  **Disclaimers:**
333
- This process will now be slower due to the added delays to respect API rate limits, but it should be much more reliable. Please be patient.
334
- """
335
- )
336
  gr.LoginButton()
337
  run_button = gr.Button("Run Evaluation & Submit All Answers")
338
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
339
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
340
- run_button.click(
341
- fn=run_and_submit_all,
342
- outputs=[status_output, results_table]
343
- )
344
 
345
  if __name__ == "__main__":
346
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
347
  demo.launch(debug=True, share=False)
 
8
  import time
9
  from google.api_core import exceptions
10
 
11
+ # --- Constants (No Changes) ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+ MAX_ITERATIONS = 7
14
+ MAX_RETRIES = 5
 
 
15
 
16
+ # --- Tool Definitions (No Changes) ---
17
  class WebSearchTool:
 
 
 
 
18
  def __init__(self, api_key):
19
  self.api_key = api_key
20
  self.url = "https://api.perplexity.ai/chat/completions"
 
 
21
  def execute(self, query: str) -> str:
22
  print(f"Executing WebSearchTool with query: {query}")
23
+ payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a world-class research assistant. Answer the user's query based on verifiable public information. Be precise and comprehensive."}, {"role": "user", "content": query}]}
24
+ headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
  response = requests.post(self.url, json=payload, headers=headers, timeout=30)
27
  response.raise_for_status()
28
+ return response.json()['choices'][0]['message']['content']
 
 
 
29
  except requests.exceptions.RequestException as e:
 
30
  return f"Error: Could not get a response from the web search tool. {e}"
31
 
32
  class FileDownloaderTool:
 
 
 
 
33
  def __init__(self, api_url: str):
34
  self.api_url = api_url
 
 
35
  def execute(self, task_id: str) -> str:
 
36
  file_url = f"{self.api_url}/files/{task_id}"
37
  try:
38
  response = requests.get(file_url, timeout=20)
39
  response.raise_for_status()
40
  content = response.text
41
+ if len(content) > 5000: return f"File content (first 5000 chars):\n{content[:5000]}"
 
 
42
  return f"File content:\n{content}"
43
  except requests.exceptions.HTTPError as e:
44
+ if e.response.status_code == 404: return "No file is associated with this task."
45
+ return f"Error: Failed to download file due to an HTTP error: {e}"
 
 
 
 
46
  except requests.exceptions.RequestException as e:
 
47
  return f"Error: Failed to download file due to a network error: {e}"
48
 
 
49
  # --- GAIA Agent Definition ---
50
  class GAIAAgent:
51
  def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
52
  print("Initializing GAIAAgent...")
53
  genai.configure(api_key=gemini_api_key)
54
  self.model = genai.GenerativeModel('gemini-1.5-flash-latest')
55
+ self.tools = {"WebSearch": WebSearchTool(api_key=pplx_api_key), "FileDownloader": FileDownloaderTool(api_url=api_url)}
56
 
57
+ # MODIFIED: Made the zero-shot prompt even stricter to prevent conversational filler.
 
 
 
 
 
58
  self.zero_shot_prompt_template = """
59
+ You are a highly intelligent question-answering bot. Follow these instructions precisely.
60
+ 1. Analyze the user's question.
61
+ 2. If the question is simple and you are 100% certain of the answer without needing any tools, provide ONLY the answer and nothing else.
62
+ 3. If the question requires web searches, file access, or complex reasoning, respond with the single word: UNSURE.
63
+ Do not add any explanations or introductory phrases.
64
 
65
  Question: {question}
66
+ Answer:"""
 
67
 
68
+ # MODIFIED: Added a new, explicit rule for how to fail gracefully.
69
  self.react_prompt_template = """
70
  You are a helpful assistant designed to answer questions accurately.
71
 
72
  To solve the user's question, you must use a sequence of thoughts and actions.
73
  You have access to the following tools:
74
 
75
+ - **WebSearch[query]**: Use this to search the internet for up-to-date information.
76
+ - **FileDownloader[task_id]**: Use this to download a file associated with the current task. The task_id is '{task_id}'.
77
 
78
  Your reasoning process should follow this format:
79
 
80
  Thought: I need to figure out what information is missing. I will use a tool to find it.
81
  Action: ToolName[input for the tool]
82
  Observation: [The result from the tool will be inserted here]
83
+ ... (this Thought/Action/Observation cycle can repeat)
84
 
85
  Thought: I have now gathered enough information to answer the user's question.
86
  Final Answer: The final answer to the original question.
87
 
88
  **Important Rules:**
89
+ 1. The `Action` line must be *exactly* in the format `ToolName[input]`.
90
  2. The `task_id` for the current question is '{task_id}'. Use it ONLY with the FileDownloader tool.
91
+ 3. **CRITICAL RULE:** If you determine that the question cannot be answered with your tools (e.g., a required file is missing, the information is not on the web), you MUST conclude with: `Final Answer: I am unable to answer this question.` Do not make up an answer.
92
 
93
  Here is the question:
94
+ {question}"""
 
95
  print("GAIAAgent initialized successfully.")
96
 
 
97
  def _call_gemini_api_with_backoff(self, prompt_text):
98
  retries = 0
99
  while retries < MAX_RETRIES:
100
  try:
101
+ time.sleep(1) # Add a small base delay
102
  response = self.model.generate_content(prompt_text)
103
  return response.text
104
  except exceptions.ResourceExhausted as e:
105
+ wait_time = (2 ** retries)
106
+ print(f"API Rate Limit Exceeded (429). Waiting for {wait_time}s to retry...")
107
  time.sleep(wait_time)
108
  retries += 1
109
  except Exception as e:
 
110
  return f"AGENT_ERROR: An unexpected error occurred: {e}"
 
 
111
  return "AGENT_ERROR: API rate limit exceeded after multiple retries."
112
 
113
  def __call__(self, question: str, task_id: str) -> str:
114
  print(f"\n{'='*20}\nProcessing Task ID: {task_id}\nQuestion: {question[:100]}...")
115
 
 
116
  print("--- Step 1: Zero-Shot Attempt ---")
117
  zero_shot_prompt = self.zero_shot_prompt_template.format(question=question)
118
  zero_shot_answer = self._call_gemini_api_with_backoff(zero_shot_prompt).strip()
119
 
120
+ if "AGENT_ERROR" in zero_shot_answer: return zero_shot_answer
 
121
 
122
  if "UNSURE" not in zero_shot_answer.upper():
123
  print(f"Zero-shot successful! Answer: {zero_shot_answer}")
124
  return zero_shot_answer
125
 
 
126
  print("--- Step 2: Zero-shot failed, starting ReAct loop ---")
127
+
128
+ # CRITICAL FIX: Reset the prompt history for each question to prevent context bleed.
129
+ # This was the cause of the botany/bird video mix-up.
130
+ current_prompt_history = self.react_prompt_template.format(question=question, task_id=task_id)
131
 
132
  for i in range(MAX_ITERATIONS):
133
  print(f"\n--- ReAct Iteration {i+1} ---")
134
 
135
+ response_text = self._call_gemini_api_with_backoff(current_prompt_history)
136
  print(f"LLM Response:\n{response_text}")
137
 
138
+ if "AGENT_ERROR" in response_text: return response_text
 
139
 
140
  final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
141
  if final_answer_match:
 
149
  tool_input = action_match.group(2).strip()
150
 
151
  if tool_name in self.tools:
 
152
  tool = self.tools[tool_name]
153
  try:
154
+ observation = tool.execute(task_id if tool_name == "FileDownloader" else tool_input)
 
 
 
155
  except Exception as e:
156
  observation = f"Error executing tool: {e}"
157
+ # Append the whole thought/action/observation cycle
158
+ current_prompt_history += f"\n{response_text}\nObservation: {observation}"
159
  else:
160
+ current_prompt_history += f"\n{response_text}\nObservation: Error - The tool '{tool_name}' does not exist."
 
161
  else:
162
  print("Error: Agent did not provide a valid Action or Final Answer. Returning last response.")
163
  return response_text.strip()
164
 
165
+ return "AGENT_ERROR: Agent reached max iterations."
 
 
166
 
167
+ # --- Main run_and_submit_all function (No significant changes needed here, only added a longer sleep) ---
168
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
169
  space_id = os.getenv("SPACE_ID")
170
+ if not profile: return "Please Login to Hugging Face with the button.", None
171
+ username = f"{profile.username}"
172
+ print(f"User logged in: {username}")
 
 
 
 
173
 
174
  pplx_key = os.getenv("PPLX_API_KEY")
175
  gemini_key = os.getenv("GEMINI_API_KEY")
176
+ if not pplx_key or not gemini_key: return "API keys not found in Space secrets.", None
 
 
 
 
177
 
178
  api_url = DEFAULT_API_URL
 
 
 
179
  try:
180
  agent = GAIAAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
181
+ questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
182
+ except Exception as e: return f"Error during setup: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ results_log, answers_payload = [], []
 
 
185
  for item in questions_data:
186
+ task_id, question_text = item.get("task_id"), item.get("question")
187
+ if not task_id or question_text is None: continue
 
 
188
  try:
189
  submitted_answer = agent(question_text, task_id)
190
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
191
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
192
  except Exception as e:
 
193
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
194
 
195
+ print(f"--- Waiting for 8 seconds before next question to respect rate limits ---")
196
+ time.sleep(8) # Increased delay to be safer
 
 
 
 
197
 
198
+ if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
199
+
200
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
201
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
202
+
 
 
 
203
  try:
204
+ response = requests.post(f"{api_url}/submit", json=submission_data, timeout=120)
205
  response.raise_for_status()
206
  result_data = response.json()
207
+ final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
208
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
209
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
210
+ f"Message: {result_data.get('message', 'No message received.')}")
211
+ return final_status, pd.DataFrame(results_log)
212
+ except requests.exceptions.RequestException as e:
213
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
214
+
215
+ # --- Gradio Interface (No changes) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  with gr.Blocks() as demo:
217
  gr.Markdown("# GAIA Agent Evaluation Runner")
218
+ gr.Markdown("""
 
219
  **Instructions:**
220
  1. Ensure you have added your `PPLX_API_KEY` and `GEMINI_API_KEY` to this Space's **Settings > Secrets**.
221
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
222
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
223
  ---
224
  **Disclaimers:**
225
+ This process is slow due to the added delays to respect API rate limits. Please be patient.
226
+ """)
 
227
  gr.LoginButton()
228
  run_button = gr.Button("Run Evaluation & Submit All Answers")
229
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
230
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
231
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
232
 
233
  if __name__ == "__main__":
 
234
  demo.launch(debug=True, share=False)