Spaces:
Runtime error
Runtime error
Update evaluate_agent.py
Browse files- evaluate_agent.py +29 -29
evaluate_agent.py
CHANGED
|
@@ -16,35 +16,35 @@ def test_questions():
|
|
| 16 |
),
|
| 17 |
"expected_keywords": ["b, e"]
|
| 18 |
},
|
| 19 |
-
{
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
},
|
| 40 |
-
{
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
},
|
| 48 |
# {
|
| 49 |
# "task_id": "q3",
|
| 50 |
# "question": (
|
|
|
|
| 16 |
),
|
| 17 |
"expected_keywords": ["b, e"]
|
| 18 |
},
|
| 19 |
+
# {
|
| 20 |
+
# "task_id": "q3",
|
| 21 |
+
# "question": (
|
| 22 |
+
# "'.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI'"
|
| 23 |
+
# ),
|
| 24 |
+
# "expected_keywords": ["right"]
|
| 25 |
+
# },
|
| 26 |
+
# {
|
| 27 |
+
# "task_id": "q10",
|
| 28 |
+
# "question": (
|
| 29 |
+
# "I’m organizing a grocery list and only want to include true vegetables "
|
| 30 |
+
# "(not fruits, even if they’re used as vegetables in cooking). "
|
| 31 |
+
# "From the following list, which items are true vegetables?\n\n"
|
| 32 |
+
# "milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, "
|
| 33 |
+
# "green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, "
|
| 34 |
+
# "zucchini, lettuce, peanuts"
|
| 35 |
+
# ),
|
| 36 |
+
# "expected_keywords": [
|
| 37 |
+
# "broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"
|
| 38 |
+
# ]
|
| 39 |
+
# },
|
| 40 |
+
# {
|
| 41 |
+
# "task_id": "q2",
|
| 42 |
+
# "question": (
|
| 43 |
+
# "How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)? "
|
| 44 |
+
# "Use Wikipedia to find the answer."
|
| 45 |
+
# ),
|
| 46 |
+
# "expected_keywords": ["3", "three"]
|
| 47 |
+
# },
|
| 48 |
# {
|
| 49 |
# "task_id": "q3",
|
| 50 |
# "question": (
|