alisamak commited on
Commit
ea2f226
·
verified ·
1 Parent(s): d283dcd

Update evaluate_agent.py

Browse files
Files changed (1) hide show
  1. evaluate_agent.py +14 -5
evaluate_agent.py CHANGED
@@ -1,11 +1,20 @@
1
  def test_questions():
2
  return [
3
  {
4
- "task_id": "q5",
5
- "question": (
6
- "How many Olympic Games were held between 1960 and 1980? Use Wikipedia to find the answer."
7
- ),
8
- "expected_keywords": ["6", "six"]
 
 
 
 
 
 
 
 
 
9
  },
10
  {
11
  "task_id": "q3",
 
1
  def test_questions():
2
  return [
3
  {
4
+ "task_id": "q7",
5
+ "question": (
6
+ "Given this table defining * on the set S = {a, b, c, d, e}\n\n"
7
+ "|*|a|b|c|d|e|\n"
8
+ "|---|---|---|---|---|---|\n"
9
+ "|a|a|b|c|b|d|\n"
10
+ "|b|b|c|a|e|c|\n"
11
+ "|c|c|a|b|b|a|\n"
12
+ "|d|b|e|b|e|d|\n"
13
+ "|e|d|b|a|d|c|\n\n"
14
+ "Provide the subset of S involved in any possible counter-examples that prove * is not commutative. "
15
+ "Provide your answer as a comma-separated list of the elements in the set in alphabetical order."
16
+ ),
17
+ "expected_keywords": ["a, b, c, d, e"]
18
  },
19
  {
20
  "task_id": "q3",