mabelwang21 commited on
Commit
3cc0589
·
1 Parent(s): 2c6f69a

quick check accuracy

Browse files
Files changed (1) hide show
  1. test_agent.py +49 -15
test_agent.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  from pathlib import Path
3
- from agent import MyAgent
4
 
5
 
6
  def test_agent(
@@ -8,11 +8,17 @@ def test_agent(
8
  max_tests: int = 5,
9
  ):
10
  """
11
- Load up to max_tests questions from the GAIA metadata JSONL file
12
- and run them through MyAgent for a quick functionality check.
13
  """
14
- # Initialize agent
15
- agent = MyAgent()
 
 
 
 
 
 
16
 
17
  metadata_file = Path(metadata_path)
18
  if not metadata_file.exists():
@@ -29,10 +35,9 @@ def test_agent(
29
  print(f"Invalid JSON on line {i+1}")
30
  continue
31
 
32
- # Support both 'task_id' and 'id'
33
  task_id = meta.get("task_id") or meta.get("id") or ""
34
- # Support both 'question' and 'text'
35
  question = meta.get("Question") or meta.get("text") or ""
 
36
 
37
  print(f"--- Test {i+1}/{max_tests}: Task ID {task_id} ---")
38
  print(f"Question: {question}")
@@ -42,19 +47,48 @@ def test_agent(
42
  continue
43
 
44
  try:
45
- # If there's a file_name field, pass it to agent.run
46
- file_arg = None
47
- if meta.get("file_name"):
48
- file_arg = meta.get("file_name")
49
- # Call agent with question and optional file
50
  if file_arg:
51
- answer = agent.run(question, file_paths=[file_arg])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  else:
53
- answer = agent.run(question)
54
- print(f"Answer: {answer}\n")
 
55
  except Exception as e:
56
  print(f"Error running agent on question '{question}': {e}\n")
57
 
 
 
 
 
 
 
 
 
58
 
59
  if __name__ == "__main__":
60
  import argparse
 
1
  import json
2
  from pathlib import Path
3
+ from agent3 import MyAgent
4
 
5
 
6
  def test_agent(
 
8
  max_tests: int = 5,
9
  ):
10
  """
11
+ Load up to max_tests questions from the GAIA metadata JSONL file,
12
+ run them through MyAgent, and compare with the correct answer.
13
  """
14
+ try:
15
+ agent = MyAgent()
16
+ except Exception as e:
17
+ print(f"Error initializing agent: {e}")
18
+ return
19
+
20
+ correct_count = 0
21
+ total_count = 0
22
 
23
  metadata_file = Path(metadata_path)
24
  if not metadata_file.exists():
 
35
  print(f"Invalid JSON on line {i+1}")
36
  continue
37
 
 
38
  task_id = meta.get("task_id") or meta.get("id") or ""
 
39
  question = meta.get("Question") or meta.get("text") or ""
40
+ correct_answer = meta.get("Final answer") or meta.get("final answer") or meta.get("Answer") or ""
41
 
42
  print(f"--- Test {i+1}/{max_tests}: Task ID {task_id} ---")
43
  print(f"Question: {question}")
 
47
  continue
48
 
49
  try:
50
+ file_arg = meta.get("file_name")
 
 
 
 
51
  if file_arg:
52
+ try:
53
+ answer = agent.run(question, file_paths=[file_arg])
54
+ except Exception as e:
55
+ import traceback
56
+ print(f"Error running agent with file: {e}")
57
+ print(traceback.format_exc())
58
+ continue
59
+ else:
60
+ try:
61
+ answer = agent.run(question)
62
+ except Exception as e:
63
+ import traceback
64
+ print(f"Error running agent: {e}")
65
+ print(traceback.format_exc())
66
+ continue
67
+ print(f"Agent Answer: {answer}")
68
+ print(f"Correct Answer: {correct_answer}")
69
+
70
+ # Normalize for comparison
71
+ def normalize(s):
72
+ return str(s).strip().lower()
73
+
74
+ if normalize(answer) == normalize(correct_answer):
75
+ print("✅ MATCH\n")
76
+ correct_count += 1
77
  else:
78
+ print("❌ NO MATCH\n")
79
+ total_count += 1
80
+
81
  except Exception as e:
82
  print(f"Error running agent on question '{question}': {e}\n")
83
 
84
+ print(f"=== Final Results ===")
85
+ print(f"Total Tests: {total_count}")
86
+ print(f"Correct Answers: {correct_count}")
87
+ if total_count > 0:
88
+ print(f"Accuracy: {correct_count / total_count * 100:.2f}%")
89
+ else:
90
+ print("No valid tests run.")
91
+
92
 
93
  if __name__ == "__main__":
94
  import argparse