Spaces:
Sleeping
Sleeping
Rodrigo Ferreira Rodrigues
commited on
Commit
·
3d6fc0c
1
Parent(s):
9d0a322
Correcting input types
Browse files- keywords_evaluate.py +3 -2
- tests.py +3 -3
keywords_evaluate.py
CHANGED
|
@@ -47,7 +47,7 @@ Examples:
|
|
| 47 |
Here is an exemple on how to use the metric:
|
| 48 |
|
| 49 |
>>> metric = evaluate.load("rfr2003/keywords_evaluate")
|
| 50 |
-
>>> results = metric.compute(generations=["yes", "no"], golds=[["yes"], ["yes"]], keywords=
|
| 51 |
>>> print(results)
|
| 52 |
{'accuracy': 0.5}
|
| 53 |
"""
|
|
@@ -77,7 +77,7 @@ class Keywords_evaluate(evaluate.Metric):
|
|
| 77 |
# TODO: Download external resources if needed
|
| 78 |
pass
|
| 79 |
|
| 80 |
-
def _compute(self, generations, golds, keywords=
|
| 81 |
'''Calculate Accuracy scores between model generations and golden answers where the task is to generate the good(s) keyword(s) among a list of them. If strict is True, we expect to find all the expected keywords generated, if not we want only one'''
|
| 82 |
assert len(generations) == len(golds)
|
| 83 |
assert isinstance(golds, list)
|
|
@@ -85,6 +85,7 @@ class Keywords_evaluate(evaluate.Metric):
|
|
| 85 |
correct, total = 0, 0
|
| 86 |
|
| 87 |
if keywords:
|
|
|
|
| 88 |
pattern = r"\b(" + "|".join(map(re.escape, keywords)) + r")\b"
|
| 89 |
|
| 90 |
else:
|
|
|
|
| 47 |
Here is an exemple on how to use the metric:
|
| 48 |
|
| 49 |
>>> metric = evaluate.load("rfr2003/keywords_evaluate")
|
| 50 |
+
>>> results = metric.compute(generations=["yes", "no"], golds=[["yes"], ["yes"]], keywords=['yes', 'no'])
|
| 51 |
>>> print(results)
|
| 52 |
{'accuracy': 0.5}
|
| 53 |
"""
|
|
|
|
| 77 |
# TODO: Download external resources if needed
|
| 78 |
pass
|
| 79 |
|
| 80 |
+
def _compute(self, generations, golds, keywords=['yes', 'no'], strict=True):
|
| 81 |
'''Calculate Accuracy scores between model generations and golden answers where the task is to generate the good(s) keyword(s) among a list of them. If strict is True, we expect to find all the expected keywords generated, if not we want only one'''
|
| 82 |
assert len(generations) == len(golds)
|
| 83 |
assert isinstance(golds, list)
|
|
|
|
| 85 |
correct, total = 0, 0
|
| 86 |
|
| 87 |
if keywords:
|
| 88 |
+
keywords = set(keywords)
|
| 89 |
pattern = r"\b(" + "|".join(map(re.escape, keywords)) + r")\b"
|
| 90 |
|
| 91 |
else:
|
tests.py
CHANGED
|
@@ -2,21 +2,21 @@ test_cases = [
|
|
| 2 |
{
|
| 3 |
"generations": ["yes", "no"],
|
| 4 |
"golds": [["yes"], ["yes"]],
|
| 5 |
-
"keywords":
|
| 6 |
"strict": True,
|
| 7 |
"result": {"accuracy": 1.0}
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"generations": ["[up, left]", "[right]"],
|
| 11 |
"golds": [['up', 'left'], ['right', 'down']],
|
| 12 |
-
"keywords":
|
| 13 |
"strict": True,
|
| 14 |
"result": {"accuracy": 0.5}
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"generations": ["[up, left]", "[right]"],
|
| 18 |
"golds": [['up', 'left'], ['right', 'down']],
|
| 19 |
-
"keywords":
|
| 20 |
"strict": False,
|
| 21 |
"result": {"accuracy": 1.0}
|
| 22 |
}
|
|
|
|
| 2 |
{
|
| 3 |
"generations": ["yes", "no"],
|
| 4 |
"golds": [["yes"], ["yes"]],
|
| 5 |
+
"keywords": ['yes', 'no']
|
| 6 |
"strict": True,
|
| 7 |
"result": {"accuracy": 1.0}
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"generations": ["[up, left]", "[right]"],
|
| 11 |
"golds": [['up', 'left'], ['right', 'down']],
|
| 12 |
+
"keywords": ['up', 'left', 'right', 'down']
|
| 13 |
"strict": True,
|
| 14 |
"result": {"accuracy": 0.5}
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"generations": ["[up, left]", "[right]"],
|
| 18 |
"golds": [['up', 'left'], ['right', 'down']],
|
| 19 |
+
"keywords": ['up', 'left', 'right', 'down']
|
| 20 |
"strict": False,
|
| 21 |
"result": {"accuracy": 1.0}
|
| 22 |
}
|