cacaprog commited on
Commit
7909c2e
·
verified ·
1 Parent(s): 643195b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -45
app.py CHANGED
@@ -15,6 +15,8 @@ from tavily import TavilyClient
15
  # Load environment variables
16
  load_dotenv()
17
 
 
 
18
  class GAIAAgent:
19
  def __init__(self):
20
  print("Initializing GAIA Agent...")
@@ -36,8 +38,14 @@ class GAIAAgent:
36
  # Initialize tools
37
  self.tools = self._initialize_tools()
38
 
39
- # GAIA-specific system prompt
40
- self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
 
 
 
 
 
 
41
 
42
  # Create agent
43
  self.agent = ReActAgent.from_tools(
@@ -155,31 +163,193 @@ class GAIAAgent:
155
  }
156
 
157
 
158
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  """
160
- Fetches all questions, runs the GAIAAgent on them, submits all answers,
161
  and displays the results.
162
  """
163
- space_id = os.getenv("SPACE_ID")
 
164
 
165
  if profile:
166
- username = f"{profile.username}"
167
  print(f"User logged in: {username}")
168
  else:
169
  print("User not logged in.")
170
  return "Please Login to Hugging Face with the button.", None
171
 
172
- api_url = "https://agents-course-unit4-scoring.hf.space"
173
  questions_url = f"{api_url}/questions"
174
  submit_url = f"{api_url}/submit"
175
 
176
- # 1. Instantiate Agent
177
  try:
178
- agent = GAIAAgent()
179
  except Exception as e:
180
  print(f"Error instantiating agent: {e}")
181
  return f"Error initializing agent: {e}", None
182
-
183
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
184
  print(agent_code)
185
 
@@ -190,12 +360,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
190
  response.raise_for_status()
191
  questions_data = response.json()
192
  if not questions_data:
193
- print("Fetched questions list is empty.")
194
- return "Fetched questions list is empty or invalid format.", None
195
  print(f"Fetched {len(questions_data)} questions.")
196
- except Exception as e:
197
  print(f"Error fetching questions: {e}")
198
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
199
 
200
  # 3. Run your Agent
201
  results_log = []
@@ -208,37 +385,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
208
  print(f"Skipping item with missing task_id or question: {item}")
209
  continue
210
  try:
211
- agent_response = agent(question_text)
212
- answers_payload.append({
213
- "task_id": task_id,
214
- "model_answer": agent_response["model_answer"],
215
- "reasoning_trace": agent_response["reasoning_trace"]
216
- })
217
- results_log.append({
218
- "Task ID": task_id,
219
- "Question": question_text,
220
- "Submitted Answer": agent_response["model_answer"],
221
- "Reasoning": agent_response["reasoning_trace"]
222
- })
223
  except Exception as e:
224
- print(f"Error running agent on task {task_id}: {e}")
225
- results_log.append({
226
- "Task ID": task_id,
227
- "Question": question_text,
228
- "Submitted Answer": f"AGENT ERROR: {e}",
229
- "Reasoning": f"Error occurred: {str(e)}"
230
- })
231
 
232
  if not answers_payload:
233
  print("Agent did not produce any answers to submit.")
234
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
235
 
236
  # 4. Prepare Submission
237
- submission_data = {
238
- "username": username.strip(),
239
- "agent_code": agent_code,
240
- "answers": answers_payload
241
- }
242
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
243
  print(status_update)
244
 
@@ -258,21 +417,47 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
258
  print("Submission successful.")
259
  results_df = pd.DataFrame(results_log)
260
  return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  except Exception as e:
262
- status_message = f"Submission Failed: {str(e)}"
263
  print(status_message)
264
  results_df = pd.DataFrame(results_log)
265
  return status_message, results_df
266
 
267
 
268
- #--- Build Gradio Interface using Blocks ---
269
  with gr.Blocks() as demo:
270
- gr.Markdown("# GAIA Agent Evaluation Runner")
271
  gr.Markdown(
272
  """
273
  **Instructions:**
274
- 1. Log in to your Hugging Face account using the button below.
275
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
276
  """
277
  )
278
 
@@ -281,6 +466,7 @@ with gr.Blocks() as demo:
281
  run_button = gr.Button("Run Evaluation & Submit All Answers")
282
 
283
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
284
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
285
 
286
  run_button.click(
@@ -290,19 +476,24 @@ with gr.Blocks() as demo:
290
 
291
  if __name__ == "__main__":
292
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
293
  space_host_startup = os.getenv("SPACE_HOST")
294
- space_id_startup = os.getenv("SPACE_ID")
295
 
296
  if space_host_startup:
297
  print(f"✅ SPACE_HOST found: {space_host_startup}")
298
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
 
 
299
 
300
- if space_id_startup:
301
  print(f"✅ SPACE_ID found: {space_id_startup}")
302
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
303
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
304
 
305
  print("-"*(60 + len(" App Starting ")) + "\n")
306
 
307
- print("Launching Gradio Interface for GAIA Agent Evaluation...")
308
  demo.launch(debug=True, share=False)
 
15
  # Load environment variables
16
  load_dotenv()
17
 
18
+
19
+ # Agent
20
  class GAIAAgent:
21
  def __init__(self):
22
  print("Initializing GAIA Agent...")
 
38
  # Initialize tools
39
  self.tools = self._initialize_tools()
40
 
41
+ # Load system prompt from file
42
+ try:
43
+ with open('system_prompt.txt', 'r') as f:
44
+ self.system_prompt = f.read()
45
+ print("✅ System prompt loaded successfully")
46
+ except Exception as e:
47
+ self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
48
+ print(f"⚠️ Couldn't load system prompt: {str(e)}. Using fallback prompt.")
49
 
50
  # Create agent
51
  self.agent = ReActAgent.from_tools(
 
163
  }
164
 
165
 
166
+ #def run_and_submit_all(profile: gr.OAuthProfile | None):
167
+ # """
168
+ # Fetches all questions, runs the GAIAAgent on them, submits all answers,
169
+ # and displays the results.
170
+ # """
171
+ # space_id = os.getenv("SPACE_ID")
172
+ #
173
+ # if profile:
174
+ # username = f"{profile.username}"
175
+ # print(f"User logged in: {username}")
176
+ # else:
177
+ # print("User not logged in.")
178
+ # return "Please Login to Hugging Face with the button.", None
179
+ #
180
+ # api_url = "https://agents-course-unit4-scoring.hf.space"
181
+ # questions_url = f"{api_url}/questions"
182
+ # submit_url = f"{api_url}/submit"
183
+ #
184
+ # # 1. Instantiate Agent
185
+ # try:
186
+ # agent = GAIAAgent()
187
+ # except Exception as e:
188
+ # print(f"Error instantiating agent: {e}")
189
+ # return f"Error initializing agent: {e}", None
190
+ # # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
191
+ # agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
192
+ # print(agent_code)
193
+ #
194
+ #
195
+ # # 2. Fetch Questions
196
+ # print(f"Fetching questions from: {questions_url}")
197
+ # try:
198
+ # response = requests.get(questions_url, timeout=15)
199
+ # response.raise_for_status()
200
+ # questions_data = response.json()
201
+ # if not questions_data:
202
+ # print("Fetched questions list is empty.")
203
+ # return "Fetched questions list is empty or invalid format.", None
204
+ # print(f"Fetched {len(questions_data)} questions.")
205
+ # except requests.exceptions.RequestException as e:
206
+ # print(f"Error fetching questions: {e}")
207
+ # return f"Error fetching questions: {e}", None
208
+ # except requests.exceptions.JSONDecodeError as e:
209
+ # print(f"Error decoding JSON response from questions endpoint: {e}")
210
+ # print(f"Response text: {response.text[:500]}")
211
+ # return f"Error decoding server response for questions: {e}", None
212
+ # except Exception as e:
213
+ # print(f"An unexpected error occurred fetching questions: {e}")
214
+ # return f"An unexpected error occurred fetching questions: {e}", None
215
+ #
216
+ # # 3. Run your Agent
217
+ # results_log = []
218
+ # answers_payload = []
219
+ # print(f"Running agent on {len(questions_data)} questions...")
220
+ # for item in questions_data:
221
+ # task_id = item.get("task_id")
222
+ # question_text = item.get("question")
223
+ # if not task_id or question_text is None:
224
+ # print(f"Skipping item with missing task_id or question: {item}")
225
+ # continue
226
+ # try:
227
+ # agent_response = agent(question_text)
228
+ # answers_payload.append({
229
+ # "task_id": task_id,
230
+ # "model_answer": agent_response["model_answer"],
231
+ # "reasoning_trace": agent_response["reasoning_trace"]
232
+ # })
233
+ # results_log.append({
234
+ # "Task ID": task_id,
235
+ # "Question": question_text,
236
+ # "Submitted Answer": agent_response["model_answer"],
237
+ # "Reasoning": agent_response["reasoning_trace"]
238
+ # })
239
+ # except Exception as e:
240
+ # print(f"Error running agent on task {task_id}: {e}")
241
+ # results_log.append({
242
+ # "Task ID": task_id,
243
+ # "Question": question_text,
244
+ # "Submitted Answer": f"AGENT ERROR: {e}",
245
+ # "Reasoning": f"Error occurred: {str(e)}"
246
+ # })
247
+ #
248
+ # if not answers_payload:
249
+ # print("Agent did not produce any answers to submit.")
250
+ # return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
251
+ #
252
+ # # 4. Prepare Submission
253
+ # submission_data = {
254
+ # "username": username.strip(),
255
+ # "agent_code": agent_code,
256
+ # "answers": answers_payload
257
+ # }
258
+ # status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
259
+ # print(status_update)
260
+ #
261
+ # # 5. Submit
262
+ # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
263
+ # try:
264
+ # response = requests.post(submit_url, json=submission_data, timeout=60)
265
+ # response.raise_for_status()
266
+ # result_data = response.json()
267
+ # final_status = (
268
+ # f"Submission Successful!\n"
269
+ # f"User: {result_data.get('username')}\n"
270
+ # f"Overall Score: {result_data.get('score', 'N/A')}% "
271
+ # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
272
+ # f"Message: {result_data.get('message', 'No message received.')}"
273
+ # )
274
+ # print("Submission successful.")
275
+ # results_df = pd.DataFrame(results_log)
276
+ # return final_status, results_df
277
+ # except Exception as e:
278
+ # status_message = f"Submission Failed: {str(e)}"
279
+ # print(status_message)
280
+ # results_df = pd.DataFrame(results_log)
281
+ # return status_message, results_df
282
+ #
283
+ #
284
+ ##--- Build Gradio Interface using Blocks ---
285
+ #with gr.Blocks() as demo:
286
+ # gr.Markdown("# GAIA Agent Evaluation Runner")
287
+ # gr.Markdown(
288
+ # """
289
+ # **Instructions:**
290
+ # 1. Log in to your Hugging Face account using the button below.
291
+ # 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
292
+ # """
293
+ # )
294
+ #
295
+ # gr.LoginButton()
296
+ #
297
+ # run_button = gr.Button("Run Evaluation & Submit All Answers")
298
+ #
299
+ # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
300
+ # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
301
+ #
302
+ # run_button.click(
303
+ # fn=run_and_submit_all,
304
+ # outputs=[status_output, results_table]
305
+ # )
306
+ #
307
+ #if __name__ == "__main__":
308
+ # print("\n" + "-"*30 + " App Starting " + "-"*30)
309
+ # space_host_startup = os.getenv("SPACE_HOST")
310
+ # space_id_startup = os.getenv("SPACE_ID")
311
+ #
312
+ # if space_host_startup:
313
+ # print(f"✅ SPACE_HOST found: {space_host_startup}")
314
+ # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
315
+ #
316
+ # if space_id_startup:
317
+ # print(f"✅ SPACE_ID found: {space_id_startup}")
318
+ # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
319
+ # print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
320
+ #
321
+ # print("-"*(60 + len(" App Starting ")) + "\n")
322
+ #
323
+ # print("Launching Gradio Interface for GAIA Agent Evaluation...")
324
+ # demo.launch(debug=True, share=True)
325
+
326
+
327
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
328
  """
329
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
330
  and displays the results.
331
  """
332
+ # --- Determine HF Space Runtime URL and Repo URL ---
333
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
334
 
335
  if profile:
336
+ username= f"{profile.username}"
337
  print(f"User logged in: {username}")
338
  else:
339
  print("User not logged in.")
340
  return "Please Login to Hugging Face with the button.", None
341
 
342
+ api_url = DEFAULT_API_URL
343
  questions_url = f"{api_url}/questions"
344
  submit_url = f"{api_url}/submit"
345
 
346
+ # 1. Instantiate Agent ( modify this part to create your agent)
347
  try:
348
+ agent = BasicAgent()
349
  except Exception as e:
350
  print(f"Error instantiating agent: {e}")
351
  return f"Error initializing agent: {e}", None
352
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
353
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
354
  print(agent_code)
355
 
 
360
  response.raise_for_status()
361
  questions_data = response.json()
362
  if not questions_data:
363
+ print("Fetched questions list is empty.")
364
+ return "Fetched questions list is empty or invalid format.", None
365
  print(f"Fetched {len(questions_data)} questions.")
366
+ except requests.exceptions.RequestException as e:
367
  print(f"Error fetching questions: {e}")
368
  return f"Error fetching questions: {e}", None
369
+ except requests.exceptions.JSONDecodeError as e:
370
+ print(f"Error decoding JSON response from questions endpoint: {e}")
371
+ print(f"Response text: {response.text[:500]}")
372
+ return f"Error decoding server response for questions: {e}", None
373
+ except Exception as e:
374
+ print(f"An unexpected error occurred fetching questions: {e}")
375
+ return f"An unexpected error occurred fetching questions: {e}", None
376
 
377
  # 3. Run your Agent
378
  results_log = []
 
385
  print(f"Skipping item with missing task_id or question: {item}")
386
  continue
387
  try:
388
+ submitted_answer = agent(question_text)
389
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
390
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
391
  except Exception as e:
392
+ print(f"Error running agent on task {task_id}: {e}")
393
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
394
 
395
  if not answers_payload:
396
  print("Agent did not produce any answers to submit.")
397
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
398
 
399
  # 4. Prepare Submission
400
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
401
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
402
  print(status_update)
403
 
 
417
  print("Submission successful.")
418
  results_df = pd.DataFrame(results_log)
419
  return final_status, results_df
420
+ except requests.exceptions.HTTPError as e:
421
+ error_detail = f"Server responded with status {e.response.status_code}."
422
+ try:
423
+ error_json = e.response.json()
424
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
425
+ except requests.exceptions.JSONDecodeError:
426
+ error_detail += f" Response: {e.response.text[:500]}"
427
+ status_message = f"Submission Failed: {error_detail}"
428
+ print(status_message)
429
+ results_df = pd.DataFrame(results_log)
430
+ return status_message, results_df
431
+ except requests.exceptions.Timeout:
432
+ status_message = "Submission Failed: The request timed out."
433
+ print(status_message)
434
+ results_df = pd.DataFrame(results_log)
435
+ return status_message, results_df
436
+ except requests.exceptions.RequestException as e:
437
+ status_message = f"Submission Failed: Network error - {e}"
438
+ print(status_message)
439
+ results_df = pd.DataFrame(results_log)
440
+ return status_message, results_df
441
  except Exception as e:
442
+ status_message = f"An unexpected error occurred during submission: {e}"
443
  print(status_message)
444
  results_df = pd.DataFrame(results_log)
445
  return status_message, results_df
446
 
447
 
448
+ # --- Build Gradio Interface using Blocks ---
449
  with gr.Blocks() as demo:
450
+ gr.Markdown("# Basic Agent Evaluation Runner")
451
  gr.Markdown(
452
  """
453
  **Instructions:**
454
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
455
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
456
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
457
+ ---
458
+ **Disclaimers:**
459
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
460
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
461
  """
462
  )
463
 
 
466
  run_button = gr.Button("Run Evaluation & Submit All Answers")
467
 
468
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
469
+ # Removed max_rows=10 from DataFrame constructor
470
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
471
 
472
  run_button.click(
 
476
 
477
  if __name__ == "__main__":
478
  print("\n" + "-"*30 + " App Starting " + "-"*30)
479
+ # Check for SPACE_HOST and SPACE_ID at startup for information
480
  space_host_startup = os.getenv("SPACE_HOST")
481
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
482
 
483
  if space_host_startup:
484
  print(f"✅ SPACE_HOST found: {space_host_startup}")
485
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
486
+ else:
487
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
488
 
489
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
490
  print(f"✅ SPACE_ID found: {space_id_startup}")
491
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
492
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
493
+ else:
494
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
495
 
496
  print("-"*(60 + len(" App Starting ")) + "\n")
497
 
498
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
499
  demo.launch(debug=True, share=False)