File size: 10,687 Bytes

8c838e7

#!/usr/bin/env python
# encoding: utf-8

# The MIT License (MIT)

# Copyright (c) 2016-2020 CNRS

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# AUTHORS
# Hervé BREDIN - http://herve.niderb.fr


import warnings
import pandas as pd
from pyannote_audio_utils.core import Segment, Timeline, Annotation

from typing import Text

DatabaseName = Text
PathTemplate = Text


def get_unique_identifier(item):
    """Return unique item identifier

    The complete format is {database}/{uri}_{channel}:
    * prefixed by "{database}/" only when `item` has a 'database' key.
    * suffixed by "_{channel}" only when `item` has a 'channel' key.

    Parameters
    ----------
    item : dict
        Item as yielded by pyannote_audio_utils.database protocols

    Returns
    -------
    identifier : str
        Unique item identifier
    """

    IDENTIFIER = ""

    # {database}/{uri}_{channel}
    database = item.get("database", None)
    if database is not None:
        IDENTIFIER += f"{database}/"
    IDENTIFIER += item["uri"]
    channel = item.get("channel", None)
    if channel is not None:
        IDENTIFIER += f"_{channel:d}"

    return IDENTIFIER


# This function is used in custom.py
def get_annotated(current_file):
    """Get part of the file that is annotated.

        Parameters
        ----------
        current_file : `dict`
            File generated by a `pyannote_audio_utils.database` protocol.

        Returns
        -------
        annotated : `pyannote_audio_utils.core.Timeline`
            Part of the file that is annotated. Defaults to
            `current_file["annotated"]`. When it does not exist, try to use the
            full audio extent. When that fails, use "annotation" extent.
    """

    # if protocol provides 'annotated' key, use it
    if "annotated" in current_file:
        annotated = current_file["annotated"]
        return annotated

    # if it does not, but does provide 'audio' key
    # try and use wav duration

    if "duration" in current_file:
        try:
            duration = current_file["duration"]
        except ImportError:
            pass
        else:
            annotated = Timeline([Segment(0, duration)])
            msg = '"annotated" was approximated by [0, audio duration].'
            warnings.warn(msg)
            return annotated

    extent = current_file["annotation"].get_timeline().extent()
    annotated = Timeline([extent])

    msg = (
        '"annotated" was approximated by "annotation" extent. '
        'Please provide "annotated" directly, or at the very '
        'least, use a "duration" preprocessor.'
    )
    warnings.warn(msg)

    return annotated


def get_label_identifier(label, current_file):
    """Return unique label identifier

    Parameters
    ----------
    label : str
        Database-internal label
    current_file
        Yielded by pyannote_audio_utils.database protocols

    Returns
    -------
    unique_label : str
        Global label
    """

    # TODO. when the "true" name of a person is used,
    # do not preprend database name.
    database = current_file["database"]
    return database + "|" + label


def load_rttm(file_rttm, keep_type="SPEAKER"):
    """Load RTTM file

    Parameter
    ---------
    file_rttm : `str`
        Path to RTTM file.
    keep_type : str, optional
        Only keep lines with this type (field #1 in RTTM specs).
        Defaults to "SPEAKER".

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote_audio_utils.core.Annotation} dictionary.
    """

    names = [
        "type",
        "uri",
        "NA2",
        "start",
        "duration",
        "NA3",
        "NA4",
        "speaker",
        "NA5",
        "NA6",
    ]
    dtype = {"uri": str, "start": float, "duration": float, "speaker": str}
    data = pd.read_csv(
        file_rttm,
        names=names,
        dtype=dtype,
        # delim_whitespace=True,
        sep='\s+',
        keep_default_na=True,
    )

    annotations = dict()
    for uri, turns in data.groupby("uri"):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            if turn.type != keep_type:
                continue
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations


def load_stm(file_stm):
    """Load STM file (speaker-info only)

    Parameter
    ---------
    file_stm : str
        Path to STM file

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote_audio_utils.core.Annotation} dictionary.
    """

    dtype = {"uri": str, "speaker": str, "start": float, "end": float}
    data = pd.read_csv(
        file_stm,
        # delim_whitespace=True,
        sep='\s+',
        usecols=[0, 2, 3, 4],
        dtype=dtype,
        names=list(dtype),
    )

    annotations = dict()
    for uri, turns in data.groupby("uri"):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.end)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations


def load_mdtm(file_mdtm):
    """Load MDTM file

    Parameter
    ---------
    file_mdtm : `str`
        Path to MDTM file.

    Returns
    -------
    annotations : `dict`
        Speaker diarization as a {uri: pyannote_audio_utils.core.Annotation} dictionary.
    """

    names = ["uri", "NA1", "start", "duration", "NA2", "NA3", "NA4", "speaker"]
    dtype = {"uri": str, "start": float, "duration": float, "speaker": str}
    data = pd.read_csv(
        file_mdtm,
        names=names,
        dtype=dtype,
        # delim_whitespace=True,
        sep='\s+',
        keep_default_na=False,
    )

    annotations = dict()
    for uri, turns in data.groupby("uri"):
        annotation = Annotation(uri=uri)
        for i, turn in turns.iterrows():
            segment = Segment(turn.start, turn.start + turn.duration)
            annotation[segment, i] = turn.speaker
        annotations[uri] = annotation

    return annotations


def load_uem(file_uem):
    """Load UEM file

    Parameter
    ---------
    file_uem : `str`
        Path to UEM file.

    Returns
    -------
    timelines : `dict`
        Evaluation map as a {uri: pyannote_audio_utils.core.Timeline} dictionary.
    """

    names = ["uri", "NA1", "start", "end"]
    dtype = {"uri": str, "start": float, "end": float}
    data = pd.read_csv(file_uem, names=names, dtype=dtype, sep='\s+',)

    timelines = dict()
    for uri, parts in data.groupby("uri"):
        segments = [Segment(part.start, part.end) for i, part in parts.iterrows()]
        timelines[uri] = Timeline(segments=segments, uri=uri)

    return timelines


def load_lab(path, uri: str = None) -> Annotation:
    """Load LAB file

    Parameter
    ---------
    file_lab : `str`
        Path to LAB file

    Returns
    -------
    data : `pyannote_audio_utils.core.Annotation`
    """

    names = ["start", "end", "label"]
    dtype = {"start": float, "end": float, "label": str}
    data = pd.read_csv(path, names=names, dtype=dtype, sep='\s+',)

    annotation = Annotation(uri=uri)
    for i, turn in data.iterrows():
        segment = Segment(turn.start, turn.end)
        annotation[segment, i] = turn.label

    return annotation


def load_lst(file_lst):
    """Load LST file

    LST files provide a list of URIs (one line per URI)

    Parameter
    ---------
    file_lst : `str`
        Path to LST file.

    Returns
    -------
    uris : `list`
        List or uris
    """

    with open(file_lst, mode="r") as fp:
        lines = fp.readlines()
    return [line.strip() for line in lines]


def load_mapping(mapping_txt):
    """Load mapping file

    Parameter
    ---------
    mapping_txt : `str`
        Path to mapping file

    Returns
    -------
    mapping : `dict`
        {1st field: 2nd field} dictionary
    """

    with open(mapping_txt, mode="r") as fp:
        lines = fp.readlines()

    mapping = dict()
    for line in lines:
        key, value, *left = line.strip().split()
        mapping[key] = value

    return mapping


class LabelMapper(object):
    """Label mapper for use as pyannote_audio_utils.database preprocessor

    Parameters
    ----------
    mapping : `dict`
        Mapping dictionary as used in `Annotation.rename_labels()`.
    keep_missing : `bool`, optional
        In case a label has no mapping, a `ValueError` will be raised.
        Set "keep_missing" to True to keep those labels unchanged instead.

    Usage
    -----
    >>> mapping = {'Hadrien': 'MAL', 'Marvin': 'MAL',
    ...            'Wassim': 'CHI', 'Herve': 'GOD'}
    >>> preprocessors = {'annotation': LabelMapper(mapping=mapping)}
    >>> protocol = registry.get_protocol('AMI.SpeakerDiarization.MixHeadset',
                                preprocessors=preprocessors)

    """

    def __init__(self, mapping, keep_missing=False):
        self.mapping = mapping
        self.keep_missing = keep_missing

    def __call__(self, current_file):

        if not self.keep_missing:
            missing = set(current_file["annotation"].labels()) - set(self.mapping)
            if missing and not self.keep_missing:
                label = missing.pop()
                msg = (
                    f'No mapping found for label "{label}". Set "keep_missing" '
                    f"to True to keep labels with no mapping."
                )
                raise ValueError(msg)

        return current_file["annotation"].rename_labels(mapping=self.mapping)