alisamak commited on
Commit
fd4610a
·
verified ·
1 Parent(s): 333dc2b

Update evaluate_agent.py

Browse files
Files changed (1) hide show
  1. evaluate_agent.py +29 -29
evaluate_agent.py CHANGED
@@ -16,35 +16,35 @@ def test_questions():
16
  ),
17
  "expected_keywords": ["b, e"]
18
  },
19
- {
20
- "task_id": "q3",
21
- "question": (
22
- "'.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI'"
23
- ),
24
- "expected_keywords": ["right"]
25
- },
26
- {
27
- "task_id": "q10",
28
- "question": (
29
- "I’m organizing a grocery list and only want to include true vegetables "
30
- "(not fruits, even if they’re used as vegetables in cooking). "
31
- "From the following list, which items are true vegetables?\n\n"
32
- "milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, "
33
- "green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, "
34
- "zucchini, lettuce, peanuts"
35
- ),
36
- "expected_keywords": [
37
- "broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"
38
- ]
39
- },
40
- {
41
- "task_id": "q2",
42
- "question": (
43
- "How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? "
44
- "Use Wikipedia to find the answer."
45
- ),
46
- "expected_keywords": ["3", "three"]
47
- },
48
  # {
49
  # "task_id": "q3",
50
  # "question": (
 
16
  ),
17
  "expected_keywords": ["b, e"]
18
  },
19
+ # {
20
+ # "task_id": "q3",
21
+ # "question": (
22
+ # "'.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI'"
23
+ # ),
24
+ # "expected_keywords": ["right"]
25
+ # },
26
+ # {
27
+ # "task_id": "q10",
28
+ # "question": (
29
+ # "I’m organizing a grocery list and only want to include true vegetables "
30
+ # "(not fruits, even if they’re used as vegetables in cooking). "
31
+ # "From the following list, which items are true vegetables?\n\n"
32
+ # "milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, "
33
+ # "green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, "
34
+ # "zucchini, lettuce, peanuts"
35
+ # ),
36
+ # "expected_keywords": [
37
+ # "broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"
38
+ # ]
39
+ # },
40
+ # {
41
+ # "task_id": "q2",
42
+ # "question": (
43
+ # "How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? "
44
+ # "Use Wikipedia to find the answer."
45
+ # ),
46
+ # "expected_keywords": ["3", "three"]
47
+ # },
48
  # {
49
  # "task_id": "q3",
50
  # "question": (