Prompt48 commited on
Commit
497f2ee
·
verified ·
1 Parent(s): ec4b898

Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\librosa\onset.py with huggingface_hub

Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//librosa//onset.py ADDED
@@ -0,0 +1,641 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Onset detection
5
+ ===============
6
+ .. autosummary::
7
+ :toctree: generated/
8
+
9
+ onset_detect
10
+ onset_backtrack
11
+ onset_strength
12
+ onset_strength_multi
13
+ """
14
+
15
+ import numpy as np
16
+ import scipy
17
+
18
+ from ._cache import cache
19
+ from . import core
20
+ from . import util
21
+ from .util.exceptions import ParameterError
22
+
23
+ from .feature.spectral import melspectrogram
24
+ from typing import Any, Callable, Optional, Union, Sequence
25
+
26
+ __all__ = ["onset_detect", "onset_strength", "onset_strength_multi", "onset_backtrack"]
27
+
28
+
29
+ def onset_detect(
30
+ *,
31
+ y: Optional[np.ndarray] = None,
32
+ sr: float = 22050,
33
+ onset_envelope: Optional[np.ndarray] = None,
34
+ hop_length: int = 512,
35
+ backtrack: bool = False,
36
+ energy: Optional[np.ndarray] = None,
37
+ units: str = "frames",
38
+ normalize: bool = True,
39
+ sparse: bool = True,
40
+ **kwargs: Any,
41
+ ) -> np.ndarray:
42
+ """Locate note onset events by picking peaks in an onset strength envelope.
43
+
44
+ The `peak_pick` parameters were chosen by large-scale hyper-parameter
45
+ optimization over the dataset provided by [#]_.
46
+
47
+ .. [#] https://github.com/CPJKU/onset_db
48
+
49
+ Parameters
50
+ ----------
51
+ y : np.ndarray [shape=(..., n)]
52
+ audio time-series. Multi-channel is supported.
53
+
54
+ sr : number > 0 [scalar]
55
+ sampling rate of ``y``
56
+
57
+ onset_envelope : np.ndarray [shape=(..., m)]
58
+ (optional) pre-computed onset strength envelope
59
+
60
+ hop_length : int > 0 [scalar]
61
+ hop length (in samples)
62
+
63
+ units : {'frames', 'samples', 'time'}
64
+ The units to encode detected onset events in.
65
+ By default, 'frames' are used.
66
+
67
+ backtrack : bool
68
+ If ``True``, detected onset events are backtracked to the nearest
69
+ preceding minimum of ``energy``.
70
+
71
+ This is primarily useful when using onsets as slice points for segmentation.
72
+
73
+ .. note:: backtracking is only supported if ``sparse=True``.
74
+
75
+ energy : np.ndarray [shape=(m,)] (optional)
76
+ An energy function to use for backtracking detected onset events.
77
+ If none is provided, then ``onset_envelope`` is used.
78
+
79
+ normalize : bool
80
+ If ``True`` (default), normalize the onset envelope to have minimum of 0 and
81
+ maximum of 1 prior to detection. This is helpful for standardizing the
82
+ parameters of `librosa.util.peak_pick`.
83
+
84
+ Otherwise, the onset envelope is left unnormalized.
85
+
86
+ sparse : bool
87
+ If ``True`` (default), detections are returned as an array of frames,
88
+ samples, or time indices (as specified by ``units=``).
89
+
90
+ If ``False``, detections are encoded as a dense boolean array where
91
+ ``onsets[n]`` is True if there's an onset at frame index ``n``.
92
+
93
+ .. note:: multi-channel input is only supported if ``sparse=False``.
94
+
95
+ **kwargs : additional keyword arguments
96
+ Additional parameters for peak picking.
97
+
98
+ See `librosa.util.peak_pick` for details.
99
+
100
+ Returns
101
+ -------
102
+ onsets : np.ndarray [shape=(n_onsets,) or onset_envelope.shape]
103
+ estimated positions of detected onsets, in whichever units
104
+ are specified. By default, frame indices.
105
+
106
+ If `sparse=False`, `onsets[..., n]` indicates an onset
107
+ detection at frame index `n`.
108
+
109
+ .. note::
110
+ If no onset strength could be detected, onset_detect returns
111
+ an empty array (sparse=True) or all-False array (sparse=False).
112
+
113
+ Raises
114
+ ------
115
+ ParameterError
116
+ if neither ``y`` nor ``onsets`` are provided
117
+
118
+ or if ``units`` is not one of 'frames', 'samples', or 'time'
119
+
120
+ See Also
121
+ --------
122
+ onset_strength : compute onset strength per-frame
123
+ onset_backtrack : backtracking onset events
124
+ librosa.util.peak_pick : pick peaks from a time series
125
+
126
+ Examples
127
+ --------
128
+ Get onset times from a signal
129
+
130
+ >>> y, sr = librosa.load(librosa.ex('trumpet'))
131
+ >>> librosa.onset.onset_detect(y=y, sr=sr, units='time')
132
+ array([0.07 , 0.232, 0.395, 0.604, 0.743, 0.929, 1.045, 1.115,
133
+ 1.416, 1.672, 1.881, 2.043, 2.206, 2.368, 2.554, 3.019])
134
+
135
+ Or use a pre-computed onset envelope
136
+
137
+ >>> o_env = librosa.onset.onset_strength(y=y, sr=sr)
138
+ >>> times = librosa.times_like(o_env, sr=sr)
139
+ >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
140
+
141
+ >>> import matplotlib.pyplot as plt
142
+ >>> D = np.abs(librosa.stft(y))
143
+ >>> fig, ax = plt.subplots(nrows=2, sharex=True)
144
+ >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
145
+ ... x_axis='time', y_axis='log', ax=ax[0], sr=sr)
146
+ >>> ax[0].set(title='Power spectrogram')
147
+ >>> ax[0].label_outer()
148
+ >>> ax[1].plot(times, o_env, label='Onset strength')
149
+ >>> ax[1].vlines(times[onset_frames], 0, o_env.max(), color='r', alpha=0.9,
150
+ ... linestyle='--', label='Onsets')
151
+ >>> ax[1].legend()
152
+ """
153
+ # First, get the frame->beat strength profile if we don't already have one
154
+ if onset_envelope is None:
155
+ if y is None:
156
+ raise ParameterError("y or onset_envelope must be provided")
157
+
158
+ onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
159
+
160
+ # Shift onset envelope up to be non-negative
161
+ # (a common normalization step to make the threshold more consistent)
162
+ if normalize:
163
+ # Normalize onset strength function to [0, 1] range
164
+ # Normalization is performed over the trailing axis
165
+ onset_envelope = onset_envelope - np.min(onset_envelope, keepdims=True, axis=-1)
166
+
167
+ # Mypy does not realize that oenv is not None by now
168
+ # Max-scale with safe division
169
+ onset_envelope /= np.max(onset_envelope, keepdims=True, axis=-1) + util.tiny(onset_envelope) # type: ignore
170
+
171
+ # help out mypy
172
+ assert onset_envelope is not None
173
+
174
+ # Do we have any onsets to grab?
175
+ if not onset_envelope.any() or not np.all(np.isfinite(onset_envelope)):
176
+ if sparse:
177
+ onsets = np.array([], dtype=int)
178
+ else:
179
+ onsets = np.zeros_like(onset_envelope, dtype=bool)
180
+
181
+ else:
182
+ # These parameter settings found by large-scale search
183
+ kwargs.setdefault("pre_max", 0.03 * sr // hop_length) # 30ms
184
+ kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1) # 0ms
185
+ kwargs.setdefault("pre_avg", 0.10 * sr // hop_length) # 100ms
186
+ kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1) # 100ms
187
+ kwargs.setdefault("wait", 0.03 * sr // hop_length) # 30ms
188
+ kwargs.setdefault("delta", 0.07)
189
+
190
+ # Peak pick the onset envelope
191
+ onsets = util.peak_pick(onset_envelope, sparse=sparse, axis=-1, **kwargs)
192
+
193
+ # Optionally backtrack the events
194
+ if backtrack:
195
+ if not sparse:
196
+ raise ParameterError("onset backtracking is only supported if sparse=True")
197
+
198
+ if energy is None:
199
+ energy = onset_envelope
200
+ assert energy is not None
201
+ onsets = onset_backtrack(onsets, energy)
202
+
203
+ if sparse:
204
+ if units == "frames":
205
+ pass
206
+ elif units == "samples":
207
+ onsets = core.frames_to_samples(onsets, hop_length=hop_length)
208
+ elif units == "time":
209
+ onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
210
+ else:
211
+ raise ParameterError(f"Invalid unit type: {units}")
212
+
213
+ return onsets
214
+
215
+
216
+ def onset_strength(
217
+ *,
218
+ y: Optional[np.ndarray] = None,
219
+ sr: float = 22050,
220
+ S: Optional[np.ndarray] = None,
221
+ lag: int = 1,
222
+ max_size: int = 1,
223
+ ref: Optional[np.ndarray] = None,
224
+ detrend: bool = False,
225
+ center: bool = True,
226
+ feature: Optional[Callable] = None,
227
+ aggregate: Optional[Union[Callable, bool]] = None,
228
+ **kwargs: Any,
229
+ ) -> np.ndarray:
230
+ """Compute a spectral flux onset strength envelope.
231
+
232
+ Onset strength at time ``t`` is determined by::
233
+
234
+ mean_f max(0, S[f, t] - ref[f, t - lag])
235
+
236
+ where ``ref`` is ``S`` after local max filtering along the frequency
237
+ axis [#]_.
238
+
239
+ By default, if a time series ``y`` is provided, S will be the
240
+ log-power Mel spectrogram.
241
+
242
+ .. [#] Böck, Sebastian, and Gerhard Widmer.
243
+ "Maximum filter vibrato suppression for onset detection."
244
+ 16th International Conference on Digital Audio Effects,
245
+ Maynooth, Ireland. 2013.
246
+
247
+ Parameters
248
+ ----------
249
+ y : np.ndarray [shape=(..., n)]
250
+ audio time-series. Multi-channel is supported.
251
+
252
+ sr : number > 0 [scalar]
253
+ sampling rate of ``y``
254
+
255
+ S : np.ndarray [shape=(..., d, m)]
256
+ pre-computed (log-power) spectrogram
257
+
258
+ lag : int > 0
259
+ time lag for computing differences
260
+
261
+ max_size : int > 0
262
+ size (in frequency bins) of the local max filter.
263
+ set to `1` to disable filtering.
264
+
265
+ ref : None or np.ndarray [shape=(..., d, m)]
266
+ An optional pre-computed reference spectrum, of the same shape as ``S``.
267
+ If not provided, it will be computed from ``S``.
268
+ If provided, it will override any local max filtering governed by ``max_size``.
269
+
270
+ detrend : bool [scalar]
271
+ Filter the onset strength to remove the DC component
272
+
273
+ center : bool [scalar]
274
+ Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
275
+ This corresponds to using a centered frame analysis in the short-time Fourier
276
+ transform.
277
+
278
+ feature : function
279
+ Function for computing time-series features, eg, scaled spectrograms.
280
+ By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
281
+
282
+ aggregate : function
283
+ Aggregation function to use when combining onsets
284
+ at different frequency bins.
285
+
286
+ Default: `np.mean`
287
+
288
+ **kwargs : additional keyword arguments
289
+ Additional parameters to ``feature()``, if ``S`` is not provided.
290
+
291
+ Returns
292
+ -------
293
+ onset_envelope : np.ndarray [shape=(..., m,)]
294
+ vector containing the onset strength envelope.
295
+ If the input contains multiple channels, then onset envelope is computed for each channel.
296
+
297
+ Raises
298
+ ------
299
+ ParameterError
300
+ if neither ``(y, sr)`` nor ``S`` are provided
301
+
302
+ or if ``lag`` or ``max_size`` are not positive integers
303
+
304
+ See Also
305
+ --------
306
+ onset_detect
307
+ onset_strength_multi
308
+
309
+ Examples
310
+ --------
311
+ First, load some audio and plot the spectrogram
312
+
313
+ >>> import matplotlib.pyplot as plt
314
+ >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
315
+ >>> D = np.abs(librosa.stft(y))
316
+ >>> times = librosa.times_like(D, sr=sr)
317
+ >>> fig, ax = plt.subplots(nrows=2, sharex=True)
318
+ >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
319
+ ... y_axis='log', x_axis='time', ax=ax[0], sr=sr)
320
+ >>> ax[0].set(title='Power spectrogram')
321
+ >>> ax[0].label_outer()
322
+
323
+ Construct a standard onset function
324
+
325
+ >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
326
+ >>> ax[1].plot(times, 2 + onset_env / onset_env.max(), alpha=0.8,
327
+ ... label='Mean (mel)')
328
+
329
+ Median aggregation, and custom mel options
330
+
331
+ >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
332
+ ... aggregate=np.median,
333
+ ... fmax=8000, n_mels=256)
334
+ >>> ax[1].plot(times, 1 + onset_env / onset_env.max(), alpha=0.8,
335
+ ... label='Median (custom mel)')
336
+
337
+ Constant-Q spectrogram instead of Mel
338
+
339
+ >>> C = np.abs(librosa.cqt(y=y, sr=sr))
340
+ >>> onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
341
+ >>> ax[1].plot(times, onset_env / onset_env.max(), alpha=0.8,
342
+ ... label='Mean (CQT)')
343
+ >>> ax[1].legend()
344
+ >>> ax[1].set(ylabel='Normalized strength', yticks=[])
345
+ """
346
+ if aggregate is False:
347
+ raise ParameterError(
348
+ "aggregate parameter cannot be False when computing full-spectrum onset strength."
349
+ )
350
+
351
+ odf_all = onset_strength_multi(
352
+ y=y,
353
+ sr=sr,
354
+ S=S,
355
+ lag=lag,
356
+ max_size=max_size,
357
+ ref=ref,
358
+ detrend=detrend,
359
+ center=center,
360
+ feature=feature,
361
+ aggregate=aggregate,
362
+ channels=None,
363
+ **kwargs,
364
+ )
365
+
366
+ return odf_all[..., 0, :]
367
+
368
+
369
+ def onset_backtrack(events: np.ndarray, energy: np.ndarray) -> np.ndarray:
370
+ """Backtrack detected onset events to the nearest preceding local
371
+ minimum of an energy function.
372
+
373
+ This function can be used to roll back the timing of detected onsets
374
+ from a detected peak amplitude to the preceding minimum.
375
+
376
+ This is most useful when using onsets to determine slice points for
377
+ segmentation, as described by [#]_.
378
+
379
+ .. [#] Jehan, Tristan.
380
+ "Creating music by listening"
381
+ Doctoral dissertation
382
+ Massachusetts Institute of Technology, 2005.
383
+
384
+ Parameters
385
+ ----------
386
+ events : np.ndarray, dtype=int
387
+ List of onset event frame indices, as computed by `onset_detect`
388
+ energy : np.ndarray, shape=(m,)
389
+ An energy function
390
+
391
+ Returns
392
+ -------
393
+ events_backtracked : np.ndarray, shape=events.shape
394
+ The input events matched to nearest preceding minima of ``energy``.
395
+
396
+ Examples
397
+ --------
398
+ Backtrack the events using the onset envelope
399
+
400
+ >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
401
+ >>> oenv = librosa.onset.onset_strength(y=y, sr=sr)
402
+ >>> times = librosa.times_like(oenv, sr=sr)
403
+ >>> # Detect events without backtracking
404
+ >>> onset_raw = librosa.onset.onset_detect(onset_envelope=oenv,
405
+ ... backtrack=False)
406
+ >>> onset_bt = librosa.onset.onset_backtrack(onset_raw, oenv)
407
+
408
+ Backtrack the events using the RMS values
409
+
410
+ >>> S = np.abs(librosa.stft(y=y))
411
+ >>> rms = librosa.feature.rms(S=S)
412
+ >>> onset_bt_rms = librosa.onset.onset_backtrack(onset_raw, rms[0])
413
+
414
+ Plot the results
415
+
416
+ >>> import matplotlib.pyplot as plt
417
+ >>> fig, ax = plt.subplots(nrows=3, sharex=True)
418
+ >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
419
+ ... y_axis='log', x_axis='time', ax=ax[0])
420
+ >>> ax[0].label_outer()
421
+ >>> ax[1].plot(times, oenv, label='Onset strength')
422
+ >>> ax[1].vlines(librosa.frames_to_time(onset_raw), 0, oenv.max(), label='Raw onsets')
423
+ >>> ax[1].vlines(librosa.frames_to_time(onset_bt), 0, oenv.max(), label='Backtracked', color='r')
424
+ >>> ax[1].legend()
425
+ >>> ax[1].label_outer()
426
+ >>> ax[2].plot(times, rms[0], label='RMS')
427
+ >>> ax[2].vlines(librosa.frames_to_time(onset_bt_rms), 0, rms.max(), label='Backtracked (RMS)', color='r')
428
+ >>> ax[2].legend()
429
+ """
430
+ # Find points where energy is non-increasing
431
+ # all points: energy[i] <= energy[i-1]
432
+ # tail points: energy[i] < energy[i+1]
433
+ minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) & (energy[1:-1] < energy[2:]))
434
+
435
+ # Pad on a 0, just in case we have onsets with no preceding minimum
436
+ # Shift by one to account for slicing in minima detection
437
+ minima = util.fix_frames(1 + minima, x_min=0)
438
+
439
+ # Only match going left from the detected events
440
+ results: np.ndarray = minima[util.match_events(events, minima, right=False)]
441
+ return results
442
+
443
+
444
+ @cache(level=30)
445
+ def onset_strength_multi(
446
+ *,
447
+ y: Optional[np.ndarray] = None,
448
+ sr: float = 22050,
449
+ S: Optional[np.ndarray] = None,
450
+ n_fft: int = 2048,
451
+ hop_length: int = 512,
452
+ lag: int = 1,
453
+ max_size: int = 1,
454
+ ref: Optional[np.ndarray] = None,
455
+ detrend: bool = False,
456
+ center: bool = True,
457
+ feature: Optional[Callable] = None,
458
+ aggregate: Optional[Union[Callable, bool]] = None,
459
+ channels: Optional[Union[Sequence[int], Sequence[slice]]] = None,
460
+ **kwargs: Any,
461
+ ) -> np.ndarray:
462
+ """Compute a spectral flux onset strength envelope across multiple channels.
463
+
464
+ Onset strength for channel ``i`` at time ``t`` is determined by::
465
+
466
+ mean_{f in channels[i]} max(0, S[f, t+1] - S[f, t])
467
+
468
+ Parameters
469
+ ----------
470
+ y : np.ndarray [shape=(..., n,)]
471
+ audio time-series. Multi-channel is supported.
472
+
473
+ sr : number > 0 [scalar]
474
+ sampling rate of ``y``
475
+
476
+ S : np.ndarray [shape=(..., d, m)]
477
+ pre-computed (log-power) spectrogram
478
+
479
+ n_fft : int > 0 [scalar]
480
+ FFT window size for use in ``feature()`` if ``S`` is not provided.
481
+
482
+ hop_length : int > 0 [scalar]
483
+ hop length for use in ``feature()`` if ``S`` is not provided.
484
+
485
+ lag : int > 0
486
+ time lag for computing differences
487
+
488
+ max_size : int > 0
489
+ size (in frequency bins) of the local max filter.
490
+ set to `1` to disable filtering.
491
+
492
+ ref : None or np.ndarray [shape=(d, m)]
493
+ An optional pre-computed reference spectrum, of the same shape as ``S``.
494
+ If not provided, it will be computed from ``S``.
495
+ If provided, it will override any local max filtering governed by ``max_size``.
496
+
497
+ detrend : bool [scalar]
498
+ Filter the onset strength to remove the DC component
499
+
500
+ center : bool [scalar]
501
+ Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
502
+ This corresponds to using a centered frame analysis in the short-time Fourier
503
+ transform.
504
+
505
+ feature : function
506
+ Function for computing time-series features, eg, scaled spectrograms.
507
+ By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
508
+
509
+ Must support arguments: ``y, sr, n_fft, hop_length``
510
+
511
+ aggregate : function or False
512
+ Aggregation function to use when combining onsets
513
+ at different frequency bins.
514
+
515
+ If ``False``, then no aggregation is performed.
516
+
517
+ Default: `np.mean`
518
+
519
+ channels : list or None
520
+ Array of channel boundaries or slice objects.
521
+ If `None`, then a single channel is generated to span all bands.
522
+
523
+ **kwargs : additional keyword arguments
524
+ Additional parameters to ``feature()``, if ``S`` is not provided.
525
+
526
+ Returns
527
+ -------
528
+ onset_envelope : np.ndarray [shape=(..., n_channels, m)]
529
+ array containing the onset strength envelope for each specified channel
530
+
531
+ Raises
532
+ ------
533
+ ParameterError
534
+ if neither ``(y, sr)`` nor ``S`` are provided
535
+
536
+ See Also
537
+ --------
538
+ onset_strength
539
+
540
+ Notes
541
+ -----
542
+ This function caches at level 30.
543
+
544
+ Examples
545
+ --------
546
+ First, load some audio and plot the spectrogram
547
+
548
+ >>> import matplotlib.pyplot as plt
549
+ >>> y, sr = librosa.load(librosa.ex('choice'), duration=5)
550
+ >>> D = np.abs(librosa.stft(y))
551
+ >>> fig, ax = plt.subplots(nrows=2, sharex=True)
552
+ >>> img1 = librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
553
+ ... y_axis='log', x_axis='time', ax=ax[0])
554
+ >>> ax[0].set(title='Power spectrogram')
555
+ >>> ax[0].label_outer()
556
+ >>> fig.colorbar(img1, ax=[ax[0]], format="%+2.f dB")
557
+
558
+ Construct a standard onset function over four sub-bands
559
+
560
+ >>> onset_subbands = librosa.onset.onset_strength_multi(y=y, sr=sr,
561
+ ... channels=[0, 32, 64, 96, 128])
562
+ >>> img2 = librosa.display.specshow(onset_subbands, x_axis='time', ax=ax[1])
563
+ >>> ax[1].set(ylabel='Sub-bands', title='Sub-band onset strength')
564
+ >>> fig.colorbar(img2, ax=[ax[1]])
565
+ """
566
+ if feature is None:
567
+ feature = melspectrogram
568
+ kwargs.setdefault("fmax", 0.5 * sr)
569
+
570
+ if aggregate is None:
571
+ aggregate = np.mean
572
+
573
+ if not util.is_positive_int(lag):
574
+ raise ParameterError(f"lag={lag} must be a positive integer")
575
+
576
+ if not util.is_positive_int(max_size):
577
+ raise ParameterError(f"max_size={max_size} must be a positive integer")
578
+
579
+ # First, compute mel spectrogram
580
+ if S is None:
581
+ S = np.abs(feature(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, **kwargs))
582
+
583
+ # Convert to dBs
584
+ S = core.power_to_db(S)
585
+
586
+ # Assertion to make type checking happy
587
+ assert S is not None
588
+
589
+ # Ensure that S is at least 2-d
590
+ S = np.atleast_2d(S)
591
+
592
+ # Compute the reference spectrogram.
593
+ # Efficiency hack: skip filtering step and pass by reference
594
+ # if max_size will produce a no-op.
595
+ if ref is None:
596
+ if max_size == 1:
597
+ ref = S
598
+ else:
599
+ ref = scipy.ndimage.maximum_filter1d(S, max_size, axis=-2)
600
+ elif ref.shape != S.shape:
601
+ raise ParameterError(
602
+ f"Reference spectrum shape {ref.shape} must match input spectrum {S.shape}"
603
+ )
604
+
605
+ # Compute difference to the reference, spaced by lag
606
+ onset_env = S[..., lag:] - ref[..., :-lag]
607
+
608
+ # Discard negatives (decreasing amplitude)
609
+ onset_env = np.maximum(0.0, onset_env)
610
+
611
+ # Aggregate within channels
612
+ pad = True
613
+ if channels is None:
614
+ channels = [slice(None)]
615
+ else:
616
+ pad = False
617
+
618
+ if callable(aggregate):
619
+ onset_env = util.sync(
620
+ onset_env, channels, aggregate=aggregate, pad=pad, axis=-2
621
+ )
622
+
623
+ # compensate for lag
624
+ pad_width = lag
625
+ if center:
626
+ # Counter-act framing effects. Shift the onsets by n_fft / hop_length
627
+ pad_width += n_fft // (2 * hop_length)
628
+
629
+ padding = [(0, 0) for _ in onset_env.shape]
630
+ padding[-1] = (int(pad_width), 0)
631
+ onset_env = np.pad(onset_env, padding, mode="constant")
632
+
633
+ # remove the DC component
634
+ if detrend:
635
+ onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env, axis=-1)
636
+
637
+ # Trim to match the input duration
638
+ if center:
639
+ onset_env = onset_env[..., : S.shape[-1]]
640
+
641
+ return onset_env