Pyannote (models, models_onnx)

8c838e7 verified 2 months ago

13.5 kB

	#!/usr/bin/env python
	# encoding: utf-8

	# The MIT License (MIT)

	# Copyright (c) 2012- CNRS

	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:

	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.

	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.

	# AUTHORS
	# Hervé BREDIN - http://herve.niderb.fr
	from typing import List, Union, Optional, Set, Tuple

	import warnings
	import numpy as np
	import pandas as pd
	import scipy.stats
	from pyannote_audio_utils.core import Annotation, Timeline

	from pyannote_audio_utils.metrics.types import Details, MetricComponents


	class BaseMetric:
	"""
	:class:`BaseMetric` is the base class for most pyannote_audio_utils evaluation metrics.

	Attributes
	----------
	name : str
	Human-readable name of the metric (eg. 'diarization error rate')
	"""

	@classmethod
	def metric_name(cls) -> str:
	raise NotImplementedError(
	cls.__name__ + " is missing a 'metric_name' class method. "
	"It should return the name of the metric as string."
	)

	@classmethod
	def metric_components(cls) -> MetricComponents:
	raise NotImplementedError(
	cls.__name__ + " is missing a 'metric_components' class method. "
	"It should return the list of names of metric components."
	)

	def __init__(self, **kwargs):
	super(BaseMetric, self).__init__()
	self.metric_name_ = self.__class__.metric_name()
	self.components_: Set[str] = set(self.__class__.metric_components())
	self.reset()

	def init_components(self):
	return {value: 0.0 for value in self.components_}

	def reset(self):
	"""Reset accumulated components and metric values"""
	self.accumulated_: Details = dict()
	self.results_: List = list()
	for value in self.components_:
	self.accumulated_[value] = 0.0

	@property
	def name(self):
	"""Metric name."""
	return self.metric_name()

	# TODO: use joblib/locky to allow parallel processing?
	# TODO: signature could be something like __call__(self, reference_iterator, hypothesis_iterator, ...)

	def __call__(self, reference: Union[Timeline, Annotation],
	hypothesis: Union[Timeline, Annotation],
	detailed: bool = False, uri: Optional[str] = None, **kwargs):
	"""Compute metric value and accumulate components

	Parameters
	----------
	reference : type depends on the metric
	Manual `reference`
	hypothesis : type depends on the metric
	Evaluated `hypothesis`
	uri : optional
	Override uri.
	detailed : bool, optional
	By default (False), return metric value only.
	Set `detailed` to True to return dictionary where keys are
	components names and values are component values

	Returns
	-------
	value : float (if `detailed` is False)
	Metric value
	components : dict (if `detailed` is True)
	`components` updated with metric value
	"""

	# compute metric components
	components = self.compute_components(reference, hypothesis, **kwargs)

	# compute rate based on components
	components[self.metric_name_] = self.compute_metric(components)

	# keep track of this computation
	uri = uri or getattr(reference, "uri", "NA")
	self.results_.append((uri, components))

	# accumulate components
	for name in self.components_:
	self.accumulated_[name] += components[name]

	if detailed:
	return components

	return components[self.metric_name_]

	def report(self, display: bool = False) -> pd.DataFrame:
	"""Evaluation report

	Parameters
	----------
	display : bool, optional
	Set to True to print the report to stdout.

	Returns
	-------
	report : pandas.DataFrame
	Dataframe with one column per metric component, one row per
	evaluated item, and one final row for accumulated results.
	"""

	report = []
	uris = []

	percent = "total" in self.metric_components()

	for uri, components in self.results_:
	row = {}
	if percent:
	total = components["total"]
	for key, value in components.items():
	if key == self.name:
	row[key, "%"] = 100 * value
	elif key == "total":
	row[key, ""] = value
	else:
	row[key, ""] = value
	if percent:
	if total > 0:
	row[key, "%"] = 100 * value / total
	else:
	row[key, "%"] = np.NaN

	report.append(row)
	uris.append(uri)

	row = {}
	components = self.accumulated_

	if percent:
	total = components["total"]

	for key, value in components.items():
	if key == self.name:
	row[key, "%"] = 100 * value
	elif key == "total":
	row[key, ""] = value
	else:
	row[key, ""] = value
	if percent:
	if total > 0:
	row[key, "%"] = 100 * value / total
	else:
	row[key, "%"] = np.NaN

	row[self.name, "%"] = 100 * abs(self)
	report.append(row)
	uris.append("TOTAL")

	df = pd.DataFrame(report)

	df["item"] = uris
	df = df.set_index("item")

	df.columns = pd.MultiIndex.from_tuples(df.columns)

	df = df[[self.name] + self.metric_components()]

	if display:
	print(
	df.to_string(
	index=True,
	sparsify=False,
	justify="right",
	float_format=lambda f: "{0:.2f}".format(f),
	)
	)

	return df

	def __str__(self):
	report = self.report(display=False)
	return report.to_string(
	sparsify=False, float_format=lambda f: "{0:.2f}".format(f)
	)

	def __abs__(self):
	"""Compute metric value from accumulated components"""
	return self.compute_metric(self.accumulated_)

	def __getitem__(self, component: str) -> Union[float, Details]:
	"""Get value of accumulated `component`.

	Parameters
	----------
	component : str
	Name of `component`

	Returns
	-------
	value : type depends on the metric
	Value of accumulated `component`

	"""
	if component == slice(None, None, None):
	return dict(self.accumulated_)
	else:
	return self.accumulated_[component]

	def __iter__(self):
	"""Iterator over the accumulated (uri, value)"""
	for uri, component in self.results_:
	yield uri, component

	def compute_components(self,
	reference: Union[Timeline, Annotation],
	hypothesis: Union[Timeline, Annotation],
	**kwargs) -> Details:
	"""Compute metric components

	Parameters
	----------
	reference : type depends on the metric
	Manual `reference`
	hypothesis : same as `reference`
	Evaluated `hypothesis`

	Returns
	-------
	components : dict
	Dictionary where keys are component names and values are component
	values

	"""
	raise NotImplementedError(
	self.__class__.__name__ + " is missing a 'compute_components' method."
	"It should return a dictionary where keys are component names "
	"and values are component values."
	)

	def compute_metric(self, components: Details):
	"""Compute metric value from computed `components`

	Parameters
	----------
	components : dict
	Dictionary where keys are components names and values are component
	values

	Returns
	-------
	value : type depends on the metric
	Metric value
	"""
	raise NotImplementedError(
	self.__class__.__name__ + " is missing a 'compute_metric' method. "
	"It should return the actual value of the metric based "
	"on the precomputed component dictionary given as input."
	)

	def confidence_interval(self, alpha: float = 0.9) \
	-> Tuple[float, Tuple[float, float]]:
	"""Compute confidence interval on accumulated metric values

	Parameters
	----------
	alpha : float, optional
	Probability that the returned confidence interval contains
	the true metric value.

	Returns
	-------
	(center, (lower, upper))
	with center the mean of the conditional pdf of the metric value
	and (lower, upper) is a confidence interval centered on the median,
	containing the estimate to a probability alpha.

	See Also:
	---------
	scipy.stats.bayes_mvs

	"""

	values = [r[self.metric_name_] for _, r in self.results_]

	if len(values) == 0:
	raise ValueError("Please evaluate a bunch of files before computing confidence interval.")

	elif len(values) == 1:
	warnings.warn("Cannot compute a reliable confidence interval out of just one file.")
	center = lower = upper = values[0]
	return center, (lower, upper)

	else:
	return scipy.stats.bayes_mvs(values, alpha=alpha)[0]


	PRECISION_NAME = "precision"
	PRECISION_RETRIEVED = "# retrieved"
	PRECISION_RELEVANT_RETRIEVED = "# relevant retrieved"


	class Precision(BaseMetric):
	"""
	:class:`Precision` is a base class for precision-like evaluation metrics.

	It defines two components '# retrieved' and '# relevant retrieved' and the
	compute_metric() method to compute the actual precision:

	Precision = # retrieved / # relevant retrieved

	Inheriting classes must implement compute_components().
	"""

	@classmethod
	def metric_name(cls):
	return PRECISION_NAME

	@classmethod
	def metric_components(cls) -> MetricComponents:
	return [PRECISION_RETRIEVED, PRECISION_RELEVANT_RETRIEVED]

	def compute_metric(self, components: Details) -> float:
	"""Compute precision from `components`"""
	numerator = components[PRECISION_RELEVANT_RETRIEVED]
	denominator = components[PRECISION_RETRIEVED]
	if denominator == 0.0:
	if numerator == 0:
	return 1.0
	else:
	raise ValueError("")
	else:
	return numerator / denominator


	RECALL_NAME = "recall"
	RECALL_RELEVANT = "# relevant"
	RECALL_RELEVANT_RETRIEVED = "# relevant retrieved"


	class Recall(BaseMetric):
	"""
	:class:`Recall` is a base class for recall-like evaluation metrics.

	It defines two components '# relevant' and '# relevant retrieved' and the
	compute_metric() method to compute the actual recall:

	Recall = # relevant retrieved / # relevant

	Inheriting classes must implement compute_components().
	"""

	@classmethod
	def metric_name(cls):
	return RECALL_NAME

	@classmethod
	def metric_components(cls) -> MetricComponents:
	return [RECALL_RELEVANT, RECALL_RELEVANT_RETRIEVED]

	def compute_metric(self, components: Details) -> float:
	"""Compute recall from `components`"""
	numerator = components[RECALL_RELEVANT_RETRIEVED]
	denominator = components[RECALL_RELEVANT]
	if denominator == 0.0:
	if numerator == 0:
	return 1.0
	else:
	raise ValueError("")
	else:
	return numerator / denominator


	def f_measure(precision: float, recall: float, beta=1.0) -> float:
	"""Compute f-measure

	f-measure is defined as follows:
	F(P, R, b) = (1+b²).P.R / (b².P + R)

	where P is `precision`, R is `recall` and b is `beta`
	"""
	if precision + recall == 0.0:
	return 0
	return (1 + beta * beta) * precision * recall / (beta * beta * precision + recall)