alisamak commited on
Commit
1abcd94
·
verified ·
1 Parent(s): b8c3c10

Update evaluate_agent.py

Browse files
Files changed (1) hide show
  1. evaluate_agent.py +31 -47
evaluate_agent.py CHANGED
@@ -8,22 +8,22 @@ def test_questions():
8
  ),
9
  "expected_keywords": ["clarify", "incomplete", "missing", "please provide", "need more information"]
10
  },
11
- # {
12
- # "task_id": "q7",
13
- # "question": (
14
- # "Given this table defining * on the set S = {a, b, c, d, e}\n\n"
15
- # "|*|a|b|c|d|e|\n"
16
- # "|---|---|---|---|---|---|\n"
17
- # "|a|a|b|c|b|d|\n"
18
- # "|b|b|c|a|e|c|\n"
19
- # "|c|c|a|b|b|a|\n"
20
- # "|d|b|e|b|e|d|\n"
21
- # "|e|d|b|a|d|c|\n\n"
22
- # "Provide the subset of S involved in any possible counter-examples that prove * is not commutative. "
23
- # "Provide your answer as a comma-separated list of the elements in the set in alphabetical order."
24
- # ),
25
- # "expected_keywords": ["b, e"]
26
- # },
27
  {
28
  "task_id": "q3",
29
  "question": (
@@ -45,14 +45,14 @@ def test_questions():
45
  # "broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"
46
  # ]
47
  # },
48
- # {
49
- # "task_id": "q2",
50
- # "question": (
51
- # "How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? "
52
- # "Use Wikipedia to find the answer."
53
- # ),
54
- # "expected_keywords": ["3", "three"]
55
- # },
56
  # {
57
  # "task_id": "q3",
58
  # "question": (
@@ -61,29 +61,13 @@ def test_questions():
61
  # ),
62
  # "expected_keywords": ["14", "fourteen"]
63
  # },
64
- # {
65
- # "task_id": "q4",
66
- # "question": (
67
- # "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"
68
- # ),
69
- # "expected_keywords": ["FunkMonk"]
70
- # },
71
- # {
72
- # "task_id": "q5",
73
- # "question": (
74
- # "Given this table defining * on the set S = {a, b, c, d, e}\n\n"
75
- # "|*|a|b|c|d|e|\n"
76
- # "|---|---|---|---|---|---|\n"
77
- # "|a|a|b|c|b|d|\n"
78
- # "|b|b|c|a|e|c|\n"
79
- # "|c|c|a|b|b|a|\n"
80
- # "|d|b|e|b|e|d|\n"
81
- # "|e|d|b|a|d|c|\n\n"
82
- # "provide the subset of S involved in any possible counter-examples that prove * is not commutative. "
83
- # "Provide your answer as a comma separated list of the elements in the set in alphabetical order."
84
- # ),
85
- # "expected_keywords": ["b,c,d,e", "b,c,d,e"] # accepted formatted answer
86
- # }
87
  ]
88
 
89
 
 
8
  ),
9
  "expected_keywords": ["clarify", "incomplete", "missing", "please provide", "need more information"]
10
  },
11
+ {
12
+ "task_id": "q7",
13
+ "question": (
14
+ "Given this table defining * on the set S = {a, b, c, d, e}\n\n"
15
+ "|*|a|b|c|d|e|\n"
16
+ "|---|---|---|---|---|---|\n"
17
+ "|a|a|b|c|b|d|\n"
18
+ "|b|b|c|a|e|c|\n"
19
+ "|c|c|a|b|b|a|\n"
20
+ "|d|b|e|b|e|d|\n"
21
+ "|e|d|b|a|d|c|\n\n"
22
+ "Provide the subset of S involved in any possible counter-examples that prove * is not commutative. "
23
+ "Provide your answer as a comma-separated list of the elements in the set in alphabetical order."
24
+ ),
25
+ "expected_keywords": ["b, e"]
26
+ },
27
  {
28
  "task_id": "q3",
29
  "question": (
 
45
  # "broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"
46
  # ]
47
  # },
48
+ {
49
+ "task_id": "q2",
50
+ "question": (
51
+ "How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? "
52
+ "Use Wikipedia to find the answer."
53
+ ),
54
+ "expected_keywords": ["3", "three"]
55
+ },
56
  # {
57
  # "task_id": "q3",
58
  # "question": (
 
61
  # ),
62
  # "expected_keywords": ["14", "fourteen"]
63
  # },
64
+ {
65
+ "task_id": "q4",
66
+ "question": (
67
+ "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"
68
+ ),
69
+ "expected_keywords": ["FunkMonk"]
70
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  ]
72
 
73