| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """TODO: Add a description here.""" |
|
|
| import evaluate |
| import datasets |
| from collections import Counter |
| import numpy as np |
|
|
| |
| _CITATION = """\ |
| @InProceedings{huggingface:module, |
| title = {A great new module}, |
| authors={huggingface, Inc.}, |
| year={2020} |
| } |
| """ |
|
|
| |
| _DESCRIPTION = """\ |
| This module calculates the unigram precision, recall, and f1 score. |
| """ |
|
|
|
|
| |
| _KWARGS_DESCRIPTION = """ |
| Calculates how good are predictions given some references, using certain scores |
| Args: |
| predictions: list of list of int (token) |
| references: list of list of int (tokens) |
| Returns: |
| f1: the unigram f1 score. |
| precision: the unigram accuracy. |
| recall: the unigram recall. |
| Examples: |
| |
| >>> my_new_module = evaluate.load("ckb/unigram") |
| >>> results = my_new_module.compute(references=[[0, 1]], predictions=[[0, 1]]) |
| >>> print(results) |
| {'accuracy': 1.0} |
| """ |
|
|
|
|
|
|
| @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| class unigram(evaluate.Metric): |
| """TODO: Short description of my evaluation module.""" |
|
|
| def _info(self): |
| |
| return evaluate.MetricInfo( |
| |
| module_type="metric", |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| inputs_description=_KWARGS_DESCRIPTION, |
| |
| features=datasets.Features({ |
| 'predictions': datasets.Sequence(datasets.Value('int64')), |
| 'references': datasets.Sequence(datasets.Value('int64')), |
| }), |
| |
| homepage="http://module.homepage", |
| |
| codebase_urls=["http://github.com/path/to/codebase/of/new_module"], |
| reference_urls=["http://path.to.reference.url/new_module"] |
| ) |
|
|
|
|
| def _prec_recall_f1_score(self, pred_items, gold_items): |
| """ |
| Compute precision, recall and f1 given a set of gold and prediction items. |
| :param pred_items: iterable of predicted values |
| :param gold_items: iterable of gold values |
| :return: tuple (p, r, f1) for precision, recall, f1 |
| """ |
| common = Counter(gold_items) & Counter(pred_items) |
| num_same = sum(common.values()) |
| if num_same == 0: |
| return 0, 0, 0 |
| precision = 1.0 * num_same / len(pred_items) |
| recall = 1.0 * num_same / len(gold_items) |
| f1 = (2 * precision * recall) / (precision + recall) |
| return np.array([precision, recall, f1]) |
|
|
| def _compute(self, predictions, references): |
| """Returns the scores""" |
| |
| score = sum([self._prec_recall_f1_score(i, j) for i, j in zip(predictions, references)]) / float(len(predictions)) |
| |
| return { |
| "precision": score[0], |
| "recall": score[1], |
| "f1": score[2], |
| } |
|
|