Spaces:

saridormi
/

b_norm

Sleeping

App Files Files Community

b_norm / b_norm.py

saridormi

try to fix naming errors

b529fa6 over 1 year ago

raw

history blame contribute delete

2.63 kB

	from typing import Dict, List

	import datasets
	import evaluate

	from .reused import bleuFromMaps, splitPuncts

	_CITATION = """\
	@inproceedings{tao2021evaluation,
	title={On the Evaluation of Commit Message Generation Models: An Experimental Study},
	author={Tao, Wei and Wang, Yanlin and Shi, Ensheng and Du, Lun and Han, Shi and Zhang, Hongyu and Zhang, Dongmei and Zhang, Wenqiang},
	booktitle={2021 IEEE International Conference on Software Maintenance and Evolution (ICSME)},
	pages={126--136},
	year={2021},
	organization={IEEE}
	}
	@inproceedings{Papineni02bleu:a,
	author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu},
	title = {BLEU: a Method for Automatic Evaluation of Machine Translation},
	booktitle = {},
	year = {2002},
	pages = {311--318}
	}
	@inproceedings{lin-och-2004-orange,
	title = "{ORANGE}: a Method for Evaluating Automatic Evaluation Metrics for Machine Translation",
	author = "Lin, Chin-Yew and
	Och, Franz Josef",
	booktitle = "{COLING} 2004: Proceedings of the 20th International Conference on Computational Linguistics",
	month = "aug 23{--}aug 27",
	year = "2004",
	address = "Geneva, Switzerland",
	publisher = "COLING",
	url = "https://www.aclweb.org/anthology/C04-1072",
	pages = "501--507",
	}
	"""

	_DESCRIPTION = """\
	B-Norm is a variation of BLEU. It uses smoothing by Lin and Och, 2004 and does some additional preprocessing steps.
	It was recommended for evaluation of commit message generation approaches in the
	"On the Evaluation of Commit Message Generation Models: An Experimental Study" paper accepted to ICSME 2021.
	This class uses implementation provided in the replication package.
	"""


	class BNorm(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	description=_DESCRIPTION,
	citation=_CITATION,
	features=datasets.Features(
	{
	"predictions": datasets.Value("string", id="sequence"),
	"references": datasets.Value("string", id="sequence"),
	}
	),
	codebase_urls=["https://github.com/DeepSoftwareAnalytics/CommitMsgEmpirical/blob/main/metrics/B-Norm.py"],
	)

	def _compute(self, predictions: List[str], references: List[str]) -> Dict[str, float]: # type: ignore[override]
	prediction_map = {i: [splitPuncts(pred.strip().lower())] for i, pred in enumerate(predictions)}
	gold_map = {i: [splitPuncts(ref.strip().lower())] for i, ref in enumerate(references)}
	return {"b_norm": bleuFromMaps(gold_map, prediction_map)[0] / 100.0}