Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\librosa\onset.py with huggingface_hub

Browse files

Files changed (1) hide show

edit//Qwen3-TTS-test//.venv//Lib//site-packages//librosa//onset.py +641 -0

edit//Qwen3-TTS-test//.venv//Lib//site-packages//librosa//onset.py ADDED Viewed

	@@ -0,0 +1,641 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Onset detection
+===============
+.. autosummary::
+    :toctree: generated/
+    onset_detect
+    onset_backtrack
+    onset_strength
+    onset_strength_multi
+"""
+import numpy as np
+import scipy
+from ._cache import cache
+from . import core
+from . import util
+from .util.exceptions import ParameterError
+from .feature.spectral import melspectrogram
+from typing import Any, Callable, Optional, Union, Sequence
+__all__ = ["onset_detect", "onset_strength", "onset_strength_multi", "onset_backtrack"]
+def onset_detect(
+    *,
+    y: Optional[np.ndarray] = None,
+    sr: float = 22050,
+    onset_envelope: Optional[np.ndarray] = None,
+    hop_length: int = 512,
+    backtrack: bool = False,
+    energy: Optional[np.ndarray] = None,
+    units: str = "frames",
+    normalize: bool = True,
+    sparse: bool = True,
+    **kwargs: Any,
+) -> np.ndarray:
+    """Locate note onset events by picking peaks in an onset strength envelope.
+    The `peak_pick` parameters were chosen by large-scale hyper-parameter
+    optimization over the dataset provided by [#]_.
+    .. [#] https://github.com/CPJKU/onset_db
+    Parameters
+    ----------
+    y : np.ndarray [shape=(..., n)]
+        audio time-series. Multi-channel is supported.
+    sr : number > 0 [scalar]
+        sampling rate of ``y``
+    onset_envelope : np.ndarray [shape=(..., m)]
+        (optional) pre-computed onset strength envelope
+    hop_length : int > 0 [scalar]
+        hop length (in samples)
+    units : {'frames', 'samples', 'time'}
+        The units to encode detected onset events in.
+        By default, 'frames' are used.
+    backtrack : bool
+        If ``True``, detected onset events are backtracked to the nearest
+        preceding minimum of ``energy``.
+        This is primarily useful when using onsets as slice points for segmentation.
+        .. note:: backtracking is only supported if ``sparse=True``.
+    energy : np.ndarray [shape=(m,)] (optional)
+        An energy function to use for backtracking detected onset events.
+        If none is provided, then ``onset_envelope`` is used.
+    normalize : bool
+        If ``True`` (default), normalize the onset envelope to have minimum of 0 and
+        maximum of 1 prior to detection.  This is helpful for standardizing the
+        parameters of `librosa.util.peak_pick`.
+        Otherwise, the onset envelope is left unnormalized.
+    sparse : bool
+        If ``True`` (default), detections are returned as an array of frames,
+        samples, or time indices (as specified by ``units=``).
+        If ``False``, detections are encoded as a dense boolean array where
+        ``onsets[n]`` is True if there's an onset at frame index ``n``.
+        .. note:: multi-channel input is only supported if ``sparse=False``.
+    **kwargs : additional keyword arguments
+        Additional parameters for peak picking.
+        See `librosa.util.peak_pick` for details.
+    Returns
+    -------
+    onsets : np.ndarray [shape=(n_onsets,) or onset_envelope.shape]
+        estimated positions of detected onsets, in whichever units
+        are specified.  By default, frame indices.
+        If `sparse=False`, `onsets[..., n]` indicates an onset
+        detection at frame index `n`.
+        .. note::
+            If no onset strength could be detected, onset_detect returns
+            an empty array (sparse=True) or all-False array (sparse=False).
+    Raises
+    ------
+    ParameterError
+        if neither ``y`` nor ``onsets`` are provided
+        or if ``units`` is not one of 'frames', 'samples', or 'time'
+    See Also
+    --------
+    onset_strength : compute onset strength per-frame
+    onset_backtrack : backtracking onset events
+    librosa.util.peak_pick : pick peaks from a time series
+    Examples
+    --------
+    Get onset times from a signal
+    >>> y, sr = librosa.load(librosa.ex('trumpet'))
+    >>> librosa.onset.onset_detect(y=y, sr=sr, units='time')
+    array([0.07 , 0.232, 0.395, 0.604, 0.743, 0.929, 1.045, 1.115,
+           1.416, 1.672, 1.881, 2.043, 2.206, 2.368, 2.554, 3.019])
+    Or use a pre-computed onset envelope
+    >>> o_env = librosa.onset.onset_strength(y=y, sr=sr)
+    >>> times = librosa.times_like(o_env, sr=sr)
+    >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
+    >>> import matplotlib.pyplot as plt
+    >>> D = np.abs(librosa.stft(y))
+    >>> fig, ax = plt.subplots(nrows=2, sharex=True)
+    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
+    ...                          x_axis='time', y_axis='log', ax=ax[0], sr=sr)
+    >>> ax[0].set(title='Power spectrogram')
+    >>> ax[0].label_outer()
+    >>> ax[1].plot(times, o_env, label='Onset strength')
+    >>> ax[1].vlines(times[onset_frames], 0, o_env.max(), color='r', alpha=0.9,
+    ...            linestyle='--', label='Onsets')
+    >>> ax[1].legend()
+    """
+    # First, get the frame->beat strength profile if we don't already have one
+    if onset_envelope is None:
+        if y is None:
+            raise ParameterError("y or onset_envelope must be provided")
+        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
+    # Shift onset envelope up to be non-negative
+    # (a common normalization step to make the threshold more consistent)
+    if normalize:
+        # Normalize onset strength function to [0, 1] range
+        # Normalization is performed over the trailing axis
+        onset_envelope = onset_envelope - np.min(onset_envelope, keepdims=True, axis=-1)
+        # Mypy does not realize that oenv is not None by now
+        # Max-scale with safe division
+        onset_envelope /= np.max(onset_envelope, keepdims=True, axis=-1) + util.tiny(onset_envelope)  # type: ignore
+    # help out mypy
+    assert onset_envelope is not None
+    # Do we have any onsets to grab?
+    if not onset_envelope.any() or not np.all(np.isfinite(onset_envelope)):
+        if sparse:
+            onsets = np.array([], dtype=int)
+        else:
+            onsets = np.zeros_like(onset_envelope, dtype=bool)
+    else:
+        # These parameter settings found by large-scale search
+        kwargs.setdefault("pre_max", 0.03 * sr // hop_length)  # 30ms
+        kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1)  # 0ms
+        kwargs.setdefault("pre_avg", 0.10 * sr // hop_length)  # 100ms
+        kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1)  # 100ms
+        kwargs.setdefault("wait", 0.03 * sr // hop_length)  # 30ms
+        kwargs.setdefault("delta", 0.07)
+        # Peak pick the onset envelope
+        onsets = util.peak_pick(onset_envelope, sparse=sparse, axis=-1, **kwargs)
+        # Optionally backtrack the events
+        if backtrack:
+            if not sparse:
+                raise ParameterError("onset backtracking is only supported if sparse=True")
+            if energy is None:
+                energy = onset_envelope
+            assert energy is not None
+            onsets = onset_backtrack(onsets, energy)
+    if sparse:
+        if units == "frames":
+            pass
+        elif units == "samples":
+            onsets = core.frames_to_samples(onsets, hop_length=hop_length)
+        elif units == "time":
+            onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
+        else:
+            raise ParameterError(f"Invalid unit type: {units}")
+    return onsets
+def onset_strength(
+    *,
+    y: Optional[np.ndarray] = None,
+    sr: float = 22050,
+    S: Optional[np.ndarray] = None,
+    lag: int = 1,
+    max_size: int = 1,
+    ref: Optional[np.ndarray] = None,
+    detrend: bool = False,
+    center: bool = True,
+    feature: Optional[Callable] = None,
+    aggregate: Optional[Union[Callable, bool]] = None,
+    **kwargs: Any,
+) -> np.ndarray:
+    """Compute a spectral flux onset strength envelope.
+    Onset strength at time ``t`` is determined by::
+        mean_f max(0, S[f, t] - ref[f, t - lag])
+    where ``ref`` is ``S`` after local max filtering along the frequency
+    axis [#]_.
+    By default, if a time series ``y`` is provided, S will be the
+    log-power Mel spectrogram.
+    .. [#] Böck, Sebastian, and Gerhard Widmer.
+           "Maximum filter vibrato suppression for onset detection."
+           16th International Conference on Digital Audio Effects,
+           Maynooth, Ireland. 2013.
+    Parameters
+    ----------
+    y : np.ndarray [shape=(..., n)]
+        audio time-series. Multi-channel is supported.
+    sr : number > 0 [scalar]
+        sampling rate of ``y``
+    S : np.ndarray [shape=(..., d, m)]
+        pre-computed (log-power) spectrogram
+    lag : int > 0
+        time lag for computing differences
+    max_size : int > 0
+        size (in frequency bins) of the local max filter.
+        set to `1` to disable filtering.
+    ref : None or np.ndarray [shape=(..., d, m)]
+        An optional pre-computed reference spectrum, of the same shape as ``S``.
+        If not provided, it will be computed from ``S``.
+        If provided, it will override any local max filtering governed by ``max_size``.
+    detrend : bool [scalar]
+        Filter the onset strength to remove the DC component
+    center : bool [scalar]
+        Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
+        This corresponds to using a centered frame analysis in the short-time Fourier
+        transform.
+    feature : function
+        Function for computing time-series features, eg, scaled spectrograms.
+        By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
+    aggregate : function
+        Aggregation function to use when combining onsets
+        at different frequency bins.
+        Default: `np.mean`
+    **kwargs : additional keyword arguments
+        Additional parameters to ``feature()``, if ``S`` is not provided.
+    Returns
+    -------
+    onset_envelope : np.ndarray [shape=(..., m,)]
+        vector containing the onset strength envelope.
+        If the input contains multiple channels, then onset envelope is computed for each channel.
+    Raises
+    ------
+    ParameterError
+        if neither ``(y, sr)`` nor ``S`` are provided
+        or if ``lag`` or ``max_size`` are not positive integers
+    See Also
+    --------
+    onset_detect
+    onset_strength_multi
+    Examples
+    --------
+    First, load some audio and plot the spectrogram
+    >>> import matplotlib.pyplot as plt
+    >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
+    >>> D = np.abs(librosa.stft(y))
+    >>> times = librosa.times_like(D, sr=sr)
+    >>> fig, ax = plt.subplots(nrows=2, sharex=True)
+    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
+    ...                          y_axis='log', x_axis='time', ax=ax[0], sr=sr)
+    >>> ax[0].set(title='Power spectrogram')
+    >>> ax[0].label_outer()
+    Construct a standard onset function
+    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    >>> ax[1].plot(times, 2 + onset_env / onset_env.max(), alpha=0.8,
+    ...            label='Mean (mel)')
+    Median aggregation, and custom mel options
+    >>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
+    ...                                          aggregate=np.median,
+    ...                                          fmax=8000, n_mels=256)
+    >>> ax[1].plot(times, 1 + onset_env / onset_env.max(), alpha=0.8,
+    ...            label='Median (custom mel)')
+    Constant-Q spectrogram instead of Mel
+    >>> C = np.abs(librosa.cqt(y=y, sr=sr))
+    >>> onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
+    >>> ax[1].plot(times, onset_env / onset_env.max(), alpha=0.8,
+    ...          label='Mean (CQT)')
+    >>> ax[1].legend()
+    >>> ax[1].set(ylabel='Normalized strength', yticks=[])
+    """
+    if aggregate is False:
+        raise ParameterError(
+            "aggregate parameter cannot be False when computing full-spectrum onset strength."
+        )
+    odf_all = onset_strength_multi(
+        y=y,
+        sr=sr,
+        S=S,
+        lag=lag,
+        max_size=max_size,
+        ref=ref,
+        detrend=detrend,
+        center=center,
+        feature=feature,
+        aggregate=aggregate,
+        channels=None,
+        **kwargs,
+    )
+    return odf_all[..., 0, :]
+def onset_backtrack(events: np.ndarray, energy: np.ndarray) -> np.ndarray:
+    """Backtrack detected onset events to the nearest preceding local
+    minimum of an energy function.
+    This function can be used to roll back the timing of detected onsets
+    from a detected peak amplitude to the preceding minimum.
+    This is most useful when using onsets to determine slice points for
+    segmentation, as described by [#]_.
+    .. [#] Jehan, Tristan.
+           "Creating music by listening"
+           Doctoral dissertation
+           Massachusetts Institute of Technology, 2005.
+    Parameters
+    ----------
+    events : np.ndarray, dtype=int
+        List of onset event frame indices, as computed by `onset_detect`
+    energy : np.ndarray, shape=(m,)
+        An energy function
+    Returns
+    -------
+    events_backtracked : np.ndarray, shape=events.shape
+        The input events matched to nearest preceding minima of ``energy``.
+    Examples
+    --------
+    Backtrack the events using the onset envelope
+    >>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
+    >>> oenv = librosa.onset.onset_strength(y=y, sr=sr)
+    >>> times = librosa.times_like(oenv, sr=sr)
+    >>> # Detect events without backtracking
+    >>> onset_raw = librosa.onset.onset_detect(onset_envelope=oenv,
+    ...                                        backtrack=False)
+    >>> onset_bt = librosa.onset.onset_backtrack(onset_raw, oenv)
+    Backtrack the events using the RMS values
+    >>> S = np.abs(librosa.stft(y=y))
+    >>> rms = librosa.feature.rms(S=S)
+    >>> onset_bt_rms = librosa.onset.onset_backtrack(onset_raw, rms[0])
+    Plot the results
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(nrows=3, sharex=True)
+    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
+    ...                          y_axis='log', x_axis='time', ax=ax[0])
+    >>> ax[0].label_outer()
+    >>> ax[1].plot(times, oenv, label='Onset strength')
+    >>> ax[1].vlines(librosa.frames_to_time(onset_raw), 0, oenv.max(), label='Raw onsets')
+    >>> ax[1].vlines(librosa.frames_to_time(onset_bt), 0, oenv.max(), label='Backtracked', color='r')
+    >>> ax[1].legend()
+    >>> ax[1].label_outer()
+    >>> ax[2].plot(times, rms[0], label='RMS')
+    >>> ax[2].vlines(librosa.frames_to_time(onset_bt_rms), 0, rms.max(), label='Backtracked (RMS)', color='r')
+    >>> ax[2].legend()
+    """
+    # Find points where energy is non-increasing
+    # all points:  energy[i] <= energy[i-1]
+    # tail points: energy[i] < energy[i+1]
+    minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) & (energy[1:-1] < energy[2:]))
+    # Pad on a 0, just in case we have onsets with no preceding minimum
+    # Shift by one to account for slicing in minima detection
+    minima = util.fix_frames(1 + minima, x_min=0)
+    # Only match going left from the detected events
+    results: np.ndarray = minima[util.match_events(events, minima, right=False)]
+    return results
+@cache(level=30)
+def onset_strength_multi(
+    *,
+    y: Optional[np.ndarray] = None,
+    sr: float = 22050,
+    S: Optional[np.ndarray] = None,
+    n_fft: int = 2048,
+    hop_length: int = 512,
+    lag: int = 1,
+    max_size: int = 1,
+    ref: Optional[np.ndarray] = None,
+    detrend: bool = False,
+    center: bool = True,
+    feature: Optional[Callable] = None,
+    aggregate: Optional[Union[Callable, bool]] = None,
+    channels: Optional[Union[Sequence[int], Sequence[slice]]] = None,
+    **kwargs: Any,
+) -> np.ndarray:
+    """Compute a spectral flux onset strength envelope across multiple channels.
+    Onset strength for channel ``i`` at time ``t`` is determined by::
+        mean_{f in channels[i]} max(0, S[f, t+1] - S[f, t])
+    Parameters
+    ----------
+    y : np.ndarray [shape=(..., n,)]
+        audio time-series. Multi-channel is supported.
+    sr : number > 0 [scalar]
+        sampling rate of ``y``
+    S : np.ndarray [shape=(..., d, m)]
+        pre-computed (log-power) spectrogram
+    n_fft : int > 0 [scalar]
+        FFT window size for use in ``feature()`` if ``S`` is not provided.
+    hop_length : int > 0 [scalar]
+        hop length for use in ``feature()`` if ``S`` is not provided.
+    lag : int > 0
+        time lag for computing differences
+    max_size : int > 0
+        size (in frequency bins) of the local max filter.
+        set to `1` to disable filtering.
+    ref : None or np.ndarray [shape=(d, m)]
+        An optional pre-computed reference spectrum, of the same shape as ``S``.
+        If not provided, it will be computed from ``S``.
+        If provided, it will override any local max filtering governed by ``max_size``.
+    detrend : bool [scalar]
+        Filter the onset strength to remove the DC component
+    center : bool [scalar]
+        Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
+        This corresponds to using a centered frame analysis in the short-time Fourier
+        transform.
+    feature : function
+        Function for computing time-series features, eg, scaled spectrograms.
+        By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
+        Must support arguments: ``y, sr, n_fft, hop_length``
+    aggregate : function or False
+        Aggregation function to use when combining onsets
+        at different frequency bins.
+        If ``False``, then no aggregation is performed.
+        Default: `np.mean`
+    channels : list or None
+        Array of channel boundaries or slice objects.
+        If `None`, then a single channel is generated to span all bands.
+    **kwargs : additional keyword arguments
+        Additional parameters to ``feature()``, if ``S`` is not provided.
+    Returns
+    -------
+    onset_envelope : np.ndarray [shape=(..., n_channels, m)]
+        array containing the onset strength envelope for each specified channel
+    Raises
+    ------
+    ParameterError
+        if neither ``(y, sr)`` nor ``S`` are provided
+    See Also
+    --------
+    onset_strength
+    Notes
+    -----
+    This function caches at level 30.
+    Examples
+    --------
+    First, load some audio and plot the spectrogram
+    >>> import matplotlib.pyplot as plt
+    >>> y, sr = librosa.load(librosa.ex('choice'), duration=5)
+    >>> D = np.abs(librosa.stft(y))
+    >>> fig, ax = plt.subplots(nrows=2, sharex=True)
+    >>> img1 = librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
+    ...                          y_axis='log', x_axis='time', ax=ax[0])
+    >>> ax[0].set(title='Power spectrogram')
+    >>> ax[0].label_outer()
+    >>> fig.colorbar(img1, ax=[ax[0]], format="%+2.f dB")
+    Construct a standard onset function over four sub-bands
+    >>> onset_subbands = librosa.onset.onset_strength_multi(y=y, sr=sr,
+    ...                                                     channels=[0, 32, 64, 96, 128])
+    >>> img2 = librosa.display.specshow(onset_subbands, x_axis='time', ax=ax[1])
+    >>> ax[1].set(ylabel='Sub-bands', title='Sub-band onset strength')
+    >>> fig.colorbar(img2, ax=[ax[1]])
+    """
+    if feature is None:
+        feature = melspectrogram
+        kwargs.setdefault("fmax", 0.5 * sr)
+    if aggregate is None:
+        aggregate = np.mean
+    if not util.is_positive_int(lag):
+        raise ParameterError(f"lag={lag} must be a positive integer")
+    if not util.is_positive_int(max_size):
+        raise ParameterError(f"max_size={max_size} must be a positive integer")
+    # First, compute mel spectrogram
+    if S is None:
+        S = np.abs(feature(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, **kwargs))
+        # Convert to dBs
+        S = core.power_to_db(S)
+    # Assertion to make type checking happy
+    assert S is not None
+    # Ensure that S is at least 2-d
+    S = np.atleast_2d(S)
+    # Compute the reference spectrogram.
+    # Efficiency hack: skip filtering step and pass by reference
+    # if max_size will produce a no-op.
+    if ref is None:
+        if max_size == 1:
+            ref = S
+        else:
+            ref = scipy.ndimage.maximum_filter1d(S, max_size, axis=-2)
+    elif ref.shape != S.shape:
+        raise ParameterError(
+            f"Reference spectrum shape {ref.shape} must match input spectrum {S.shape}"
+        )
+    # Compute difference to the reference, spaced by lag
+    onset_env = S[..., lag:] - ref[..., :-lag]
+    # Discard negatives (decreasing amplitude)
+    onset_env = np.maximum(0.0, onset_env)
+    # Aggregate within channels
+    pad = True
+    if channels is None:
+        channels = [slice(None)]
+    else:
+        pad = False
+    if callable(aggregate):
+        onset_env = util.sync(
+            onset_env, channels, aggregate=aggregate, pad=pad, axis=-2
+        )
+    # compensate for lag
+    pad_width = lag
+    if center:
+        # Counter-act framing effects. Shift the onsets by n_fft / hop_length
+        pad_width += n_fft // (2 * hop_length)
+    padding = [(0, 0) for _ in onset_env.shape]
+    padding[-1] = (int(pad_width), 0)
+    onset_env = np.pad(onset_env, padding, mode="constant")
+    # remove the DC component
+    if detrend:
+        onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env, axis=-1)
+    # Trim to match the input duration
+    if center:
+        onset_env = onset_env[..., : S.shape[-1]]
+    return onset_env