File size: 13,459 Bytes

8c838e7

#!/usr/bin/env python
# encoding: utf-8

# The MIT License (MIT)

# Copyright (c) 2012- CNRS

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# AUTHORS
# Hervé BREDIN - http://herve.niderb.fr
from typing import List, Union, Optional, Set, Tuple

import warnings
import numpy as np
import pandas as pd
import scipy.stats
from pyannote_audio_utils.core import Annotation, Timeline

from pyannote_audio_utils.metrics.types import Details, MetricComponents


class BaseMetric:
    """
    :class:`BaseMetric` is the base class for most pyannote_audio_utils evaluation metrics.

    Attributes
    ----------
    name : str
        Human-readable name of the metric (eg. 'diarization error rate')
    """

    @classmethod
    def metric_name(cls) -> str:
        raise NotImplementedError(
            cls.__name__ + " is missing a 'metric_name' class method. "
                           "It should return the name of the metric as string."
        )

    @classmethod
    def metric_components(cls) -> MetricComponents:
        raise NotImplementedError(
            cls.__name__ + " is missing a 'metric_components' class method. "
                           "It should return the list of names of metric components."
        )

    def __init__(self, **kwargs):
        super(BaseMetric, self).__init__()
        self.metric_name_ = self.__class__.metric_name()
        self.components_: Set[str] = set(self.__class__.metric_components())
        self.reset()

    def init_components(self):
        return {value: 0.0 for value in self.components_}

    def reset(self):
        """Reset accumulated components and metric values"""
        self.accumulated_: Details = dict()
        self.results_: List = list()
        for value in self.components_:
            self.accumulated_[value] = 0.0

    @property
    def name(self):
        """Metric name."""
        return self.metric_name()

    # TODO: use joblib/locky to allow parallel processing?
    # TODO: signature could be something like __call__(self, reference_iterator, hypothesis_iterator, ...)

    def __call__(self, reference: Union[Timeline, Annotation],
                 hypothesis: Union[Timeline, Annotation],
                 detailed: bool = False, uri: Optional[str] = None, **kwargs):
        """Compute metric value and accumulate components

        Parameters
        ----------
        reference : type depends on the metric
            Manual `reference`
        hypothesis : type depends on the metric
            Evaluated `hypothesis`
        uri : optional
            Override uri.
        detailed : bool, optional
            By default (False), return metric value only.
            Set `detailed` to True to return dictionary where keys are
            components names and values are component values

        Returns
        -------
        value : float (if `detailed` is False)
            Metric value
        components : dict (if `detailed` is True)
            `components` updated with metric value
        """

        # compute metric components
        components = self.compute_components(reference, hypothesis, **kwargs)

        # compute rate based on components
        components[self.metric_name_] = self.compute_metric(components)

        # keep track of this computation
        uri = uri or getattr(reference, "uri", "NA")
        self.results_.append((uri, components))

        # accumulate components
        for name in self.components_:
            self.accumulated_[name] += components[name]

        if detailed:
            return components

        return components[self.metric_name_]

    def report(self, display: bool = False) -> pd.DataFrame:
        """Evaluation report

        Parameters
        ----------
        display : bool, optional
            Set to True to print the report to stdout.

        Returns
        -------
        report : pandas.DataFrame
            Dataframe with one column per metric component, one row per
            evaluated item, and one final row for accumulated results.
        """

        report = []
        uris = []

        percent = "total" in self.metric_components()

        for uri, components in self.results_:
            row = {}
            if percent:
                total = components["total"]
            for key, value in components.items():
                if key == self.name:
                    row[key, "%"] = 100 * value
                elif key == "total":
                    row[key, ""] = value
                else:
                    row[key, ""] = value
                    if percent:
                        if total > 0:
                            row[key, "%"] = 100 * value / total
                        else:
                            row[key, "%"] = np.NaN

            report.append(row)
            uris.append(uri)

        row = {}
        components = self.accumulated_

        if percent:
            total = components["total"]

        for key, value in components.items():
            if key == self.name:
                row[key, "%"] = 100 * value
            elif key == "total":
                row[key, ""] = value
            else:
                row[key, ""] = value
                if percent:
                    if total > 0:
                        row[key, "%"] = 100 * value / total
                    else:
                        row[key, "%"] = np.NaN

        row[self.name, "%"] = 100 * abs(self)
        report.append(row)
        uris.append("TOTAL")

        df = pd.DataFrame(report)

        df["item"] = uris
        df = df.set_index("item")

        df.columns = pd.MultiIndex.from_tuples(df.columns)

        df = df[[self.name] + self.metric_components()]

        if display:
            print(
                df.to_string(
                    index=True,
                    sparsify=False,
                    justify="right",
                    float_format=lambda f: "{0:.2f}".format(f),
                )
            )

        return df

    def __str__(self):
        report = self.report(display=False)
        return report.to_string(
            sparsify=False, float_format=lambda f: "{0:.2f}".format(f)
        )

    def __abs__(self):
        """Compute metric value from accumulated components"""
        return self.compute_metric(self.accumulated_)

    def __getitem__(self, component: str) -> Union[float, Details]:
        """Get value of accumulated `component`.

        Parameters
        ----------
        component : str
            Name of `component`

        Returns
        -------
        value : type depends on the metric
            Value of accumulated `component`

        """
        if component == slice(None, None, None):
            return dict(self.accumulated_)
        else:
            return self.accumulated_[component]

    def __iter__(self):
        """Iterator over the accumulated (uri, value)"""
        for uri, component in self.results_:
            yield uri, component

    def compute_components(self,
                           reference: Union[Timeline, Annotation],
                           hypothesis: Union[Timeline, Annotation],
                           **kwargs) -> Details:
        """Compute metric components

        Parameters
        ----------
        reference : type depends on the metric
            Manual `reference`
        hypothesis : same as `reference`
            Evaluated `hypothesis`

        Returns
        -------
        components : dict
            Dictionary where keys are component names and values are component
            values

        """
        raise NotImplementedError(
            self.__class__.__name__ + " is missing a 'compute_components' method."
                                      "It should return a dictionary where keys are component names "
                                      "and values are component values."
        )

    def compute_metric(self, components: Details):
        """Compute metric value from computed `components`

        Parameters
        ----------
        components : dict
            Dictionary where keys are components names and values are component
            values

        Returns
        -------
        value : type depends on the metric
            Metric value
        """
        raise NotImplementedError(
            self.__class__.__name__ + " is missing a 'compute_metric' method. "
                                      "It should return the actual value of the metric based "
                                      "on the precomputed component dictionary given as input."
        )

    def confidence_interval(self, alpha: float = 0.9) \
            -> Tuple[float, Tuple[float, float]]:
        """Compute confidence interval on accumulated metric values

        Parameters
        ----------
        alpha : float, optional
            Probability that the returned confidence interval contains
            the true metric value.

        Returns
        -------
        (center, (lower, upper))
            with center the mean of the conditional pdf of the metric value
            and (lower, upper) is a confidence interval centered on the median,
            containing the estimate to a probability alpha.

        See Also:
        ---------
        scipy.stats.bayes_mvs

        """

        values = [r[self.metric_name_] for _, r in self.results_]

        if len(values) == 0:
            raise ValueError("Please evaluate a bunch of files before computing confidence interval.")
        
        elif len(values) == 1:
            warnings.warn("Cannot compute a reliable confidence interval out of just one file.")
            center = lower = upper = values[0]
            return center, (lower, upper)
        
        else:
            return scipy.stats.bayes_mvs(values, alpha=alpha)[0]


PRECISION_NAME = "precision"
PRECISION_RETRIEVED = "# retrieved"
PRECISION_RELEVANT_RETRIEVED = "# relevant retrieved"


class Precision(BaseMetric):
    """
    :class:`Precision` is a base class for precision-like evaluation metrics.

    It defines two components '# retrieved' and '# relevant retrieved' and the
    compute_metric() method to compute the actual precision:

        Precision = # retrieved / # relevant retrieved

    Inheriting classes must implement compute_components().
    """

    @classmethod
    def metric_name(cls):
        return PRECISION_NAME

    @classmethod
    def metric_components(cls) -> MetricComponents:
        return [PRECISION_RETRIEVED, PRECISION_RELEVANT_RETRIEVED]

    def compute_metric(self, components: Details) -> float:
        """Compute precision from `components`"""
        numerator = components[PRECISION_RELEVANT_RETRIEVED]
        denominator = components[PRECISION_RETRIEVED]
        if denominator == 0.0:
            if numerator == 0:
                return 1.0
            else:
                raise ValueError("")
        else:
            return numerator / denominator


RECALL_NAME = "recall"
RECALL_RELEVANT = "# relevant"
RECALL_RELEVANT_RETRIEVED = "# relevant retrieved"


class Recall(BaseMetric):
    """
    :class:`Recall` is a base class for recall-like evaluation metrics.

    It defines two components '# relevant' and '# relevant retrieved' and the
    compute_metric() method to compute the actual recall:

        Recall = # relevant retrieved / # relevant

    Inheriting classes must implement compute_components().
    """

    @classmethod
    def metric_name(cls):
        return RECALL_NAME

    @classmethod
    def metric_components(cls) -> MetricComponents:
        return [RECALL_RELEVANT, RECALL_RELEVANT_RETRIEVED]

    def compute_metric(self, components: Details) -> float:
        """Compute recall from `components`"""
        numerator = components[RECALL_RELEVANT_RETRIEVED]
        denominator = components[RECALL_RELEVANT]
        if denominator == 0.0:
            if numerator == 0:
                return 1.0
            else:
                raise ValueError("")
        else:
            return numerator / denominator


def f_measure(precision: float, recall: float, beta=1.0) -> float:
    """Compute f-measure

    f-measure is defined as follows:
        F(P, R, b) = (1+b²).P.R / (b².P + R)

    where P is `precision`, R is `recall` and b is `beta`
    """
    if precision + recall == 0.0:
        return 0
    return (1 + beta * beta) * precision * recall / (beta * beta * precision + recall)