Spaces:
Sleeping
Sleeping
| from timebench_eval import TimebenchEval | |
| import pytest | |
| from conftest import ( | |
| PREDICTION_1, | |
| PREDICTION_2, | |
| PREDICTION_3, | |
| PREDICTION_4, | |
| PREDICTION_5, | |
| ) | |
| def test_eval(prediction, reference, task, expected_metrics): | |
| metrics = TimebenchEval()._compute([prediction], [reference], task) | |
| assert metrics == expected_metrics | |
| def test_eval_many(): | |
| metrics = TimebenchEval()._compute( | |
| [PREDICTION_3, PREDICTION_4], ["unanswerable", "Cardiff City"], "MenatQA" | |
| ) | |
| assert metrics == { | |
| "exact_match": [1, 1], | |
| "f1": [1, 1], | |
| } | |