File size: 11,195 Bytes
8c838e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#!/usr/bin/env python
# encoding: utf-8

# The MIT License (MIT)

# Copyright (c) 2014-2019 CNRS

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# AUTHORS
# Hervé BREDIN - http://herve.niderb.fr


"""
########
Features
########

See :class:`pyannote_audio_utils.core.SlidingWindowFeature` for the complete reference.
"""
import numbers
import warnings
from typing import Tuple, Optional, Union, Iterator, List, Text

import numpy as np

from pyannote_audio_utils.core.utils.types import Alignment
from .segment import Segment
from .segment import SlidingWindow
from .timeline import Timeline


class SlidingWindowFeature(np.lib.mixins.NDArrayOperatorsMixin):
    """Periodic feature vectors

    Parameters
    ----------
    data : (n_frames, n_features) numpy array
    sliding_window : SlidingWindow
    labels : list, optional
        Textual description of each dimension.
    """

    def __init__(
        self, data: np.ndarray, sliding_window: SlidingWindow, labels: List[Text] = None
    ):
        self.sliding_window: SlidingWindow = sliding_window
        self.data = data
        self.labels = labels
        self.__i: int = -1

    def __len__(self):
        """Number of feature vectors"""
        return self.data.shape[0]

    @property
    def extent(self):
        return self.sliding_window.range_to_segment(0, len(self))

    @property
    def dimension(self):
        """Dimension of feature vectors"""
        return self.data.shape[1]

    def getNumber(self):
        warnings.warn("This is deprecated in favor of `__len__`", DeprecationWarning)
        return self.data.shape[0]

    def getDimension(self):
        warnings.warn(
            "This is deprecated in favor of `dimension` property", DeprecationWarning
        )
        return self.dimension

    def getExtent(self):
        warnings.warn(
            "This is deprecated in favor of `extent` property", DeprecationWarning
        )
        return self.extent

    def __getitem__(self, i: int) -> np.ndarray:
        """Get ith feature vector"""
        return self.data[i]

    def __iter__(self):
        self.__i = -1
        return self

    def __next__(self) -> Tuple[Segment, np.ndarray]:
        self.__i += 1
        try:
            return self.sliding_window[self.__i], self.data[self.__i]
        except IndexError as e:
            raise StopIteration()

    def next(self):
        return self.__next__()

    def iterfeatures(
        self, window: Optional[bool] = False
    ) -> Iterator[Union[Tuple[np.ndarray, Segment], np.ndarray]]:
        """Feature vector iterator

        Parameters
        ----------
        window : bool, optional
            When True, yield both feature vector and corresponding window.
            Default is to only yield feature vector

        """
        n_samples = self.data.shape[0]
        for i in range(n_samples):
            if window:
                yield self.data[i], self.sliding_window[i]
            else:
                yield self.data[i]

    def crop(
        self,
        focus: Union[Segment, Timeline],
        mode: Alignment = "loose",
        fixed: Optional[float] = None,
        return_data: bool = True,
    ) -> Union[np.ndarray, "SlidingWindowFeature"]:
        """Extract frames

        Parameters
        ----------
        focus : Segment or Timeline
        mode : {'loose', 'strict', 'center'}, optional
            In 'strict' mode, only frames fully included in 'focus' support are
            returned. In 'loose' mode, any intersecting frames are returned. In
            'center' mode, first and last frames are chosen to be the ones
            whose centers are the closest to 'focus' start and end times.
            Defaults to 'loose'.
        fixed : float, optional
            Overrides `Segment` 'focus' duration and ensures that the number of
            returned frames is fixed (which might otherwise not be the case
            because of rounding errors).
        return_data : bool, optional
            Return a numpy array (default). For `Segment` 'focus', setting it
            to False will return a `SlidingWindowFeature` instance.

        Returns
        -------
        data : `numpy.ndarray` or `SlidingWindowFeature`
            Frame features.

        See also
        --------
        SlidingWindow.crop

        """

        if (not return_data) and (not isinstance(focus, Segment)):
            msg = (
                '"focus" must be a "Segment" instance when "return_data"'
                "is set to False."
            )
            raise ValueError(msg)

        if (not return_data) and (fixed is not None):
            msg = '"fixed" cannot be set when "return_data" is set to False.'
            raise ValueError(msg)

        ranges = self.sliding_window.crop(
            focus, mode=mode, fixed=fixed, return_ranges=True
        )

        # total number of samples in features
        n_samples = self.data.shape[0]

        # 1 for vector features (e.g. MFCC in pyannote_audio_utils.audio)
        # 2 for matrix features (e.g. grey-level frames in pyannote_audio_utils.video)
        # 3 for 3rd order tensor (e.g. RBG frames in pyannote_audio_utils.video)
        n_dimensions = len(self.data.shape) - 1

        # clip ranges
        clipped_ranges, repeat_first, repeat_last = [], 0, 0
        for start, end in ranges:
            # count number of requested samples before first sample
            repeat_first += min(end, 0) - min(start, 0)
            # count number of requested samples after last sample
            repeat_last += max(end, n_samples) - max(start, n_samples)
            # if all requested samples are out of bounds, skip
            if end < 0 or start >= n_samples:
                continue
            else:
                # keep track of non-empty clipped ranges
                clipped_ranges += [[max(start, 0), min(end, n_samples)]]

        if clipped_ranges:
            data = np.vstack([self.data[start:end, :] for start, end in clipped_ranges])
        else:
            # if all ranges are out of bounds, just return empty data
            shape = (0,) + self.data.shape[1:]
            data = np.empty(shape)

        # corner case when "fixed" duration cropping is requested:
        # correct number of samples even with out-of-bounds indices
        if fixed is not None:
            data = np.vstack(
                [
                    # repeat first sample as many times as needed
                    np.tile(self.data[0], (repeat_first,) + (1,) * n_dimensions),
                    data,
                    # repeat last sample as many times as needed
                    np.tile(
                        self.data[n_samples - 1], (repeat_last,) + (1,) * n_dimensions
                    ),
                ]
            )

        # return data
        if return_data:
            return data

        # wrap data in a SlidingWindowFeature and return
        sliding_window = SlidingWindow(
            start=self.sliding_window[clipped_ranges[0][0]].start,
            duration=self.sliding_window.duration,
            step=self.sliding_window.step,
        )

        return SlidingWindowFeature(data, sliding_window, labels=self.labels)

    def _repr_png_(self):
        from .notebook import MATPLOTLIB_IS_AVAILABLE, MATPLOTLIB_WARNING

        if not MATPLOTLIB_IS_AVAILABLE:
            warnings.warn(MATPLOTLIB_WARNING.format(klass=self.__class__.__name__))
            return None

        from .notebook import repr_feature

        return repr_feature(self)

    _HANDLED_TYPES = (np.ndarray, numbers.Number)

    def __array__(self) -> np.ndarray:
        return self.data

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        out = kwargs.get("out", ())
        for x in inputs + out:
            # Only support operations with instances of _HANDLED_TYPES.
            # Use SlidingWindowFeature instead of type(self) for isinstance to
            # allow subclasses that don't override __array_ufunc__ to
            # handle SlidingWindowFeature objects.
            if not isinstance(x, self._HANDLED_TYPES + (SlidingWindowFeature,)):
                return NotImplemented

        # Defer to the implementation of the ufunc on unwrapped values.
        inputs = tuple(
            x.data if isinstance(x, SlidingWindowFeature) else x for x in inputs
        )
        if out:
            kwargs["out"] = tuple(
                x.data if isinstance(x, SlidingWindowFeature) else x for x in out
            )
        data = getattr(ufunc, method)(*inputs, **kwargs)

        if type(data) is tuple:
            # multiple return values
            return tuple(
                type(self)(x, self.sliding_window, labels=self.labels) for x in data
            )
        elif method == "at":
            # no return value
            return None
        else:
            # one return value
            return type(self)(data, self.sliding_window, labels=self.labels)

    def align(self, to: "SlidingWindowFeature") -> "SlidingWindowFeature":
        """Align features by linear temporal interpolation

        Parameters
        ----------
        to : SlidingWindowFeature
            Features to align with.

        Returns
        -------
        aligned : SlidingWindowFeature
            Aligned features
        """

        old_start = self.sliding_window.start
        old_step = self.sliding_window.step
        old_duration = self.sliding_window.duration
        old_samples = len(self)
        old_t = old_start + 0.5 * old_duration + np.arange(old_samples) * old_step

        new_start = to.sliding_window.start
        new_step = to.sliding_window.step
        new_duration = to.sliding_window.duration
        new_samples = len(to)
        new_t = new_start + 0.5 * new_duration + np.arange(new_samples) * new_step

        new_data = np.hstack(
            [
                np.interp(new_t, old_t, old_data)[:, np.newaxis]
                for old_data in self.data.T
            ]
        )
        return SlidingWindowFeature(new_data, to.sliding_window, labels=self.labels)


if __name__ == "__main__":
    import doctest

    doctest.testmod()