Spaces:

symanto
/

regression_evaluator

Sleeping

regression_evaluator / regression_evaluator.py

José Ángel González

float casting

019cdb7 over 1 year ago

4.93 kB

	import evaluate
	from datasets import Features, Value
	from scipy.stats import kendalltau, pearsonr, spearmanr
	from sklearn.metrics import (
	max_error,
	mean_absolute_error,
	mean_absolute_percentage_error,
	mean_squared_error,
	r2_score,
	)

	_CITATION = """
	@article{scikit-learn,
	title={Scikit-learn: Machine Learning in {P}ython},
	author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
	and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
	and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
	Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
	journal={Journal of Machine Learning Research},
	volume={12},
	pages={2825--2830},
	year={2011}
	}

	@article{2020SciPy-NMeth,
	author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
	Haberland, Matt and Reddy, Tyler and Cournapeau, David and
	Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
	Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
	Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
	Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
	Kern, Robert and Larson, Eric and Carey, C J and
	Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
	{VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
	Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
	Harris, Charles R. and Archibald, Anne M. and
	Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
	{van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
	title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
	Computing in Python}},
	journal = {Nature Methods},
	year = {2020},
	volume = {17},
	pages = {261--272},
	adsurl = {https://rdcu.be/b08Wh},
	doi = {10.1038/s41592-019-0686-2},
	}
	"""


	_DESCRIPTION = """
	This evaluator computes multiple regression metrics to assess the performance of a model. Metrics calculated include: mean absolute error (MAE),
	mean absolute percentage error (MAPE), mean squared error (MSE), R-squared (R2), max error (ME), Pearson, Spearman and Kendall Tau correlation measures.
	"""

	_KWARGS_DESCRIPTION = """
	Args:
	predictions (`list` of `float`): Predicted values.
	references (`list` of `float`): Ground truth values.
	Returns:
	Returns: a dict containing:
	mean_absolute_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html
	mean_absolute_performance_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html
	mean_squared_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
	r2_score (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html
	max_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html
	pearson_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
	(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html)
	spearman_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
	(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html)
	kendall_tau_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
	(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)
	"""


	class RegressionEvaluator(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	features=Features(
	{"predictions": Value("float"), "references": Value("float")}
	),
	)

	def _compute(self, predictions, references):
	error_fns = [
	mean_absolute_error,
	mean_absolute_percentage_error,
	mean_squared_error,
	max_error,
	r2_score,
	]
	correlation_fns = [pearsonr, spearmanr, kendalltau]
	results = {}

	# Compute error functions
	for fn in error_fns:
	results[fn.__name__] = float(fn(references, predictions))

	# Compute statistical measures with p-values
	for fn in correlation_fns:
	output = fn(references, predictions)
	score, p_value = output.statistic, output.pvalue
	results[fn.__name__] = (float(score), float(p_value))

	return results