| from typing import Dict, List |
|
|
| import datasets |
| import evaluate |
|
|
| from .reused import bleuFromMaps, splitPuncts |
|
|
| _CITATION = """\ |
| @inproceedings{tao2021evaluation, |
| title={On the Evaluation of Commit Message Generation Models: An Experimental Study}, |
| author={Tao, Wei and Wang, Yanlin and Shi, Ensheng and Du, Lun and Han, Shi and Zhang, Hongyu and Zhang, Dongmei and Zhang, Wenqiang}, |
| booktitle={2021 IEEE International Conference on Software Maintenance and Evolution (ICSME)}, |
| pages={126--136}, |
| year={2021}, |
| organization={IEEE} |
| } |
| @inproceedings{Papineni02bleu:a, |
| author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu}, |
| title = {BLEU: a Method for Automatic Evaluation of Machine Translation}, |
| booktitle = {}, |
| year = {2002}, |
| pages = {311--318} |
| } |
| @inproceedings{lin-och-2004-orange, |
| title = "{ORANGE}: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation", |
| author = "Lin, Chin-Yew and |
| Och, Franz Josef", |
| booktitle = "{COLING} 2004: Proceedings of the 20th International Conference on Computational Linguistics", |
| month = "aug 23{--}aug 27", |
| year = "2004", |
| address = "Geneva, Switzerland", |
| publisher = "COLING", |
| url = "https://www.aclweb.org/anthology/C04-1072", |
| pages = "501--507", |
| } |
| """ |
|
|
| _DESCRIPTION = """\ |
| B-Norm is a variation of BLEU. It uses smoothing by Lin and Och, 2004 and does some additional preprocessing steps. |
| It was recommended for evaluation of commit message generation approaches in the |
| "On the Evaluation of Commit Message Generation Models: An Experimental Study" paper accepted to ICSME 2021. |
| This class uses implementation provided in the replication package. |
| """ |
|
|
|
|
| class BNorm(evaluate.Metric): |
| def _info(self): |
| return evaluate.MetricInfo( |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| features=datasets.Features( |
| { |
| "predictions": datasets.Value("string", id="sequence"), |
| "references": datasets.Value("string", id="sequence"), |
| } |
| ), |
| codebase_urls=["https://github.com/DeepSoftwareAnalytics/CommitMsgEmpirical/blob/main/metrics/B-Norm.py"], |
| ) |
|
|
| def _compute(self, predictions: List[str], references: List[str]) -> Dict[str, float]: |
| prediction_map = {i: [splitPuncts(pred.strip().lower())] for i, pred in enumerate(predictions)} |
| gold_map = {i: [splitPuncts(ref.strip().lower())] for i, ref in enumerate(references)} |
| return {"b_norm": bleuFromMaps(gold_map, prediction_map)[0] / 100.0} |