| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| import evaluate |
| import datasets |
| from .tokenizer_13a import Tokenizer13a |
|
|
|
|
|
|
| _CITATION = """\ |
| @inproceedings{liu-etal-2022-rethinking, |
| title = "Rethinking and Refining the Distinct Metric", |
| author = "Liu, Siyang and |
| Sabour, Sahand and |
| Zheng, Yinhe and |
| Ke, Pei and |
| Zhu, Xiaoyan and |
| Huang, Minlie", |
| booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", |
| year = "2022", |
| publisher = "Association for Computational Linguistics", |
| url = "https://aclanthology.org/2022.acl-short.86", |
| doi = "10.18653/v1/2022.acl-short.86", |
| } |
| |
| @inproceedings{li-etal-2016-diversity, |
| title = "A Diversity-Promoting Objective Function for Neural Conversation Models", |
| author = "Li, Jiwei and |
| Galley, Michel and |
| Brockett, Chris and |
| Gao, Jianfeng and |
| Dolan, Bill", |
| booktitle = "Proceedings of the 2016 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies", |
| year = "2016", |
| publisher = "Association for Computational Linguistics", |
| url = "https://aclanthology.org/N16-1014", |
| doi = "10.18653/v1/N16-1014", |
| } |
| """ |
| |
|
|
| _DESCRIPTION = """\ |
| Distinct metric is to calculate corpus-level diversity of language. We provide two versions of distinct score. Expectation-Adjusted-Distinct (EAD) is the default one, which removes |
| the biases of the original distinct score on lengthier sentences (see Figure below). Distinct is the original version. |
| |
| """ |
|
|
|
|
|
|
| _KWARGS_DESCRIPTION = """ |
| Calculates how good are predictions given some references, using certain scores |
| Args: |
| predictions: list of sentecnes. Each prediction should be a string. |
| Returns: |
| Expectation-Adjusted-Distinct |
| Distinct-1 |
| Distinct-2 |
| Distinct-3 |
| Examples: |
| Examples should be written in doctest format, and should illustrate how |
| to use the function. |
| |
| >>> my_new_module = evaluate.load("lsy641/distinct") |
| >>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], vocab_size=50257) |
| >>> print(results) |
| |
| |
| >>> dataset = ["This is my friend jack", "I'm sorry to hear that", "But you know I am the one who always support you", "Welcome to our family"] |
| >>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], dataForVocabCal = dataset) |
| >>> print(results) |
| |
| |
| >>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], mode="Distinct") |
| >>> print(results) |
| |
| """ |
|
|
| |
| BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" |
|
|
|
|
| @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| class distinct(evaluate.Measurement): |
|
|
| def _info(self): |
| return evaluate.MeasurementInfo( |
| |
| module_type="measurement", |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| inputs_description=_KWARGS_DESCRIPTION, |
| |
| features=datasets.Features({ |
| 'predictions': datasets.Value('string') |
| }), |
| |
| homepage="https://huggingface.co/spaces/lsy641/distinct", |
| |
| codebase_urls=["https://github.com/lsy641/Expectation-Adjusted-Distinct/tree/main"], |
| reference_urls=["https://aclanthology.org/2022.acl-short.86/"] |
| ) |
|
|
| def _download_and_prepare(self, dl_manager): |
| """Optional: download external resources useful to compute the scores""" |
|
|
| def _compute(self, predictions, dataForVocabCal=None, vocab_size=None, tokenizer="white_space", mode="Expectation-Adjusted-Distinct"): |
| from nltk.util import ngrams |
| from nltk.tokenize import WhitespaceTokenizer |
|
|
| |
| |
| """Returns the scores""" |
| if mode == "Expectation-Adjusted-Distinct" and vocab_size is None and dataForVocabCal is None: |
| raise ValueError("Either vocab_size or dataForVocabCal needs to be specified when using mode 'Expectation-Adjusted-Distinct'. See https://github.com/lsy641/Expectation-Adjusted-Distinct/blob/main/EAD.ipynb for vocab_size specification. \n Or use mode='Distinct' to get original version of distinct score.") |
| elif mode == "Expectation-Adjusted-Distinct" and vocab_size is not None and dataForVocabCal is not None: |
| raise Warning("We've detected that both vocab_size and dataForVocabCal are specified. We will use dataForVocabCal.") |
| elif mode == "Distinct": |
| pass |
| |
| if tokenizer == "white_space": |
| tokenizer = WhitespaceTokenizer() |
| |
| if mode == "Expectation-Adjusted-Distinct" and dataForVocabCal is not None: |
| if isinstance(dataForVocabCal, list) and len(dataForVocabCal) > 0 and isinstance(dataForVocabCal[0], str): |
| vocab = set() |
| for sentence in dataForVocabCal: |
| |
| |
| |
| vocab = vocab | set(tokenizer.tokenize(sentence)) |
| vocab_size = len(vocab) |
| else: |
| raise TypeError("Argument dataForVocabCal should be a list of strings") |
| distinct_tokens = set() |
| distinct_tokens_2grams = set() |
| distinct_tokens_3grams = set() |
| total_tokens = [] |
| total_tokens_2grams = [] |
| total_tokens_3grams = [] |
|
|
| for prediction in predictions: |
| try: |
| tokens = list(tokenizer.tokenize(prediction)) |
| print(tokens) |
| tokens_2grams = list(ngrams(list(tokenizer.tokenize(prediction)), 2, pad_left=True, left_pad_symbol='<s>')) |
| tokens_3grams = list(ngrams(list(tokenizer.tokenize(prediction)), 3, pad_left=True, left_pad_symbol='<s>')) |
| except Exception as e: |
| raise e |
|
|
| distinct_tokens = distinct_tokens | set(tokens) |
| distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams) |
| distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams) |
| total_tokens.extend(tokens) |
| total_tokens_2grams.extend(list(tokens_2grams)) |
| total_tokens_3grams.extend(list(tokens_3grams)) |
| |
| Distinct_1 = len(distinct_tokens)/len(total_tokens) |
| Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams) |
| Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams) |
| if mode == "Expectation-Adjusted-Distinct": |
| Expectation_Adjusted_Distinct = len(distinct_tokens)/(vocab_size*(1-((vocab_size-1)/vocab_size)**len(total_tokens))) |
| return { |
| "Expectation-Adjusted-Distinct": Expectation_Adjusted_Distinct, |
| "Distinct-1": Distinct_1, |
| "Distinct-2": Distinct_2, |
| "Distinct-3": Distinct_3 |
| } |
|
|
| if mode == "Distinct": |
| return { |
| "Distinct-1": Distinct_1, |
| "Distinct-2": Distinct_2, |
| "Distinct-3": Distinct_3 |
| } |