Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\librosa\filters.py with huggingface_hub

Browse files

Files changed (1) hide show

edit//Qwen3-TTS-test//.venv//Lib//site-packages//librosa//filters.py +1669 -0

edit//Qwen3-TTS-test//.venv//Lib//site-packages//librosa//filters.py ADDED Viewed

	@@ -0,0 +1,1669 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Filters
+=======
+Filter bank construction
+------------------------
+.. autosummary::
+    :toctree: generated/
+    mel
+    chroma
+    wavelet
+    semitone_filterbank
+Window functions
+----------------
+.. autosummary::
+    :toctree: generated/
+    window_bandwidth
+    get_window
+Miscellaneous
+-------------
+.. autosummary::
+    :toctree: generated/
+    wavelet_lengths
+    cq_to_chroma
+    mr_frequencies
+    window_sumsquare
+    diagonal_filter
+Deprecated
+----------
+.. autosummary::
+    :toctree: generated/
+    constant_q
+    constant_q_lengths
+"""
+import warnings
+import numpy as np
+import scipy
+import scipy.signal
+import scipy.ndimage
+from numba import jit
+from ._cache import cache
+from . import util
+from .util.exceptions import ParameterError
+from .util.decorators import deprecated
+from .core.convert import note_to_hz, hz_to_midi, midi_to_hz, hz_to_octs
+from .core.convert import fft_frequencies, mel_frequencies
+from numpy.typing import ArrayLike, DTypeLike
+from typing import Any, List, Optional, Tuple, Union
+from typing_extensions import Literal
+from ._typing import _WindowSpec, _FloatLike_co
+__all__ = [
+    "mel",
+    "chroma",
+    "constant_q",
+    "constant_q_lengths",
+    "cq_to_chroma",
+    "window_bandwidth",
+    "get_window",
+    "mr_frequencies",
+    "semitone_filterbank",
+    "window_sumsquare",
+    "diagonal_filter",
+    "wavelet",
+    "wavelet_lengths",
+]
+# Dictionary of window function bandwidths
+WINDOW_BANDWIDTHS = {
+    "bart": 1.3334961334912805,
+    "barthann": 1.4560255965133932,
+    "bartlett": 1.3334961334912805,
+    "bkh": 2.0045975283585014,
+    "black": 1.7269681554262326,
+    "blackharr": 2.0045975283585014,
+    "blackman": 1.7269681554262326,
+    "blackmanharris": 2.0045975283585014,
+    "blk": 1.7269681554262326,
+    "bman": 1.7859588613860062,
+    "bmn": 1.7859588613860062,
+    "bohman": 1.7859588613860062,
+    "box": 1.0,
+    "boxcar": 1.0,
+    "brt": 1.3334961334912805,
+    "brthan": 1.4560255965133932,
+    "bth": 1.4560255965133932,
+    "cosine": 1.2337005350199792,
+    "flat": 2.7762255046484143,
+    "flattop": 2.7762255046484143,
+    "flt": 2.7762255046484143,
+    "halfcosine": 1.2337005350199792,
+    "ham": 1.3629455320350348,
+    "hamm": 1.3629455320350348,
+    "hamming": 1.3629455320350348,
+    "han": 1.50018310546875,
+    "hann": 1.50018310546875,
+    "nut": 1.9763500280946082,
+    "nutl": 1.9763500280946082,
+    "nuttall": 1.9763500280946082,
+    "ones": 1.0,
+    "par": 1.9174603174603191,
+    "parz": 1.9174603174603191,
+    "parzen": 1.9174603174603191,
+    "rect": 1.0,
+    "rectangular": 1.0,
+    "tri": 1.3331706523555851,
+    "triang": 1.3331706523555851,
+    "triangle": 1.3331706523555851,
+}
+@cache(level=10)
+def mel(
+    *,
+    sr: float,
+    n_fft: int,
+    n_mels: int = 128,
+    fmin: float = 0.0,
+    fmax: Optional[float] = None,
+    htk: bool = False,
+    norm: Optional[Union[Literal["slaney"], float]] = "slaney",
+    dtype: DTypeLike = np.float32,
+) -> np.ndarray:
+    """Create a Mel filter-bank.
+    This produces a linear transformation matrix to project
+    FFT bins onto Mel-frequency bins.
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        sampling rate of the incoming signal
+    n_fft : int > 0 [scalar]
+        number of FFT components
+    n_mels : int > 0 [scalar]
+        number of Mel bands to generate
+    fmin : float >= 0 [scalar]
+        lowest frequency (in Hz)
+    fmax : float >= 0 [scalar]
+        highest frequency (in Hz).
+        If `None`, use ``fmax = sr / 2.0``
+    htk : bool [scalar]
+        use HTK formula instead of Slaney
+    norm : {None, 'slaney', or number} [scalar]
+        If 'slaney', divide the triangular mel weights by the width of the mel band
+        (area normalization).
+        If numeric, use `librosa.util.normalize` to normalize each filter by to unit l_p norm.
+        See `librosa.util.normalize` for a full description of supported norm values
+        (including `+-np.inf`).
+        Otherwise, leave all the triangles aiming for a peak value of 1.0
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 32-bit (single-precision) floating point.
+    Returns
+    -------
+    M : np.ndarray [shape=(n_mels, 1 + n_fft/2)]
+        Mel transform matrix
+    See Also
+    --------
+    librosa.util.normalize
+    Notes
+    -----
+    This function caches at level 10.
+    Examples
+    --------
+    >>> melfb = librosa.filters.mel(sr=22050, n_fft=2048)
+    >>> melfb
+    array([[ 0.   ,  0.016, ...,  0.   ,  0.   ],
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ],
+           ...,
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ],
+           [ 0.   ,  0.   , ...,  0.   ,  0.   ]])
+    Clip the maximum frequency to 8KHz
+    >>> librosa.filters.mel(sr=22050, n_fft=2048, fmax=8000)
+    array([[ 0.  ,  0.02, ...,  0.  ,  0.  ],
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ],
+           ...,
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ],
+           [ 0.  ,  0.  , ...,  0.  ,  0.  ]])
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots()
+    >>> img = librosa.display.specshow(melfb, x_axis='linear', ax=ax)
+    >>> ax.set(ylabel='Mel filter', title='Mel filter bank')
+    >>> fig.colorbar(img, ax=ax)
+    """
+    if fmax is None:
+        fmax = float(sr) / 2
+    # Initialize the weights
+    n_mels = int(n_mels)
+    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
+    # Center freqs of each FFT bin
+    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)
+    fdiff = np.diff(mel_f)
+    ramps = np.subtract.outer(mel_f, fftfreqs)
+    for i in range(n_mels):
+        # lower and upper slopes for all bins
+        lower = -ramps[i] / fdiff[i]
+        upper = ramps[i + 2] / fdiff[i + 1]
+        # .. then intersect them with each other and zero
+        weights[i] = np.maximum(0, np.minimum(lower, upper))
+    if isinstance(norm, str):
+        if norm == "slaney":
+            # Slaney-style mel is scaled to be approx constant energy per channel
+            enorm = 2.0 / (mel_f[2 : n_mels + 2] - mel_f[:n_mels])
+            weights *= enorm[:, np.newaxis]
+        else:
+            raise ParameterError(f"Unsupported norm={norm}")
+    else:
+        weights = util.normalize(weights, norm=norm, axis=-1)
+    # Only check weights if f_mel[0] is positive
+    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
+        # This means we have an empty channel somewhere
+        warnings.warn(
+            "Empty filters detected in mel frequency basis. "
+            "Some channels will produce empty responses. "
+            "Try increasing your sampling rate (and fmax) or "
+            "reducing n_mels.",
+            stacklevel=2,
+        )
+    return weights
+@cache(level=10)
+def chroma(
+    *,
+    sr: float,
+    n_fft: int,
+    n_chroma: int = 12,
+    tuning: float = 0.0,
+    ctroct: float = 5.0,
+    octwidth: Union[float, None] = 2,
+    norm: Optional[float] = 2,
+    base_c: bool = True,
+    dtype: DTypeLike = np.float32,
+) -> np.ndarray:
+    """Create a chroma filter bank.
+    This creates a linear transformation matrix to project
+    FFT bins onto chroma bins (i.e. pitch classes).
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        audio sampling rate
+    n_fft : int > 0 [scalar]
+        number of FFT bins
+    n_chroma : int > 0 [scalar]
+        number of chroma bins
+    tuning : float
+        Tuning deviation from A440 in fractions of a chroma bin.
+    ctroct : float > 0 [scalar]
+    octwidth : float > 0 or None [scalar]
+        ``ctroct`` and ``octwidth`` specify a dominance window:
+        a Gaussian weighting centered on ``ctroct`` (in octs, A0 = 27.5Hz)
+        and with a gaussian half-width of ``octwidth``.
+        Set ``octwidth`` to `None` to use a flat weighting.
+    norm : float > 0 or np.inf
+        Normalization factor for each filter
+    base_c : bool
+        If True, the filter bank will start at 'C'.
+        If False, the filter bank will start at 'A'.
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 32-bit (single-precision) floating point.
+    Returns
+    -------
+    wts : ndarray [shape=(n_chroma, 1 + n_fft / 2)]
+        Chroma filter matrix
+    See Also
+    --------
+    librosa.util.normalize
+    librosa.feature.chroma_stft
+    Notes
+    -----
+    This function caches at level 10.
+    Examples
+    --------
+    Build a simple chroma filter bank
+    >>> chromafb = librosa.filters.chroma(sr=22050, n_fft=4096)
+    array([[  1.689e-05,   3.024e-04, ...,   4.639e-17,   5.327e-17],
+           [  1.716e-05,   2.652e-04, ...,   2.674e-25,   3.176e-25],
+    ...,
+           [  1.578e-05,   3.619e-04, ...,   8.577e-06,   9.205e-06],
+           [  1.643e-05,   3.355e-04, ...,   1.474e-10,   1.636e-10]])
+    Use quarter-tones instead of semitones
+    >>> librosa.filters.chroma(sr=22050, n_fft=4096, n_chroma=24)
+    array([[  1.194e-05,   2.138e-04, ...,   6.297e-64,   1.115e-63],
+           [  1.206e-05,   2.009e-04, ...,   1.546e-79,   2.929e-79],
+    ...,
+           [  1.162e-05,   2.372e-04, ...,   6.417e-38,   9.923e-38],
+           [  1.180e-05,   2.260e-04, ...,   4.697e-50,   7.772e-50]])
+    Equally weight all octaves
+    >>> librosa.filters.chroma(sr=22050, n_fft=4096, octwidth=None)
+    array([[  3.036e-01,   2.604e-01, ...,   2.445e-16,   2.809e-16],
+           [  3.084e-01,   2.283e-01, ...,   1.409e-24,   1.675e-24],
+    ...,
+           [  2.836e-01,   3.116e-01, ...,   4.520e-05,   4.854e-05],
+           [  2.953e-01,   2.888e-01, ...,   7.768e-10,   8.629e-10]])
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots()
+    >>> img = librosa.display.specshow(chromafb, x_axis='linear', ax=ax)
+    >>> ax.set(ylabel='Chroma filter', title='Chroma filter bank')
+    >>> fig.colorbar(img, ax=ax)
+    """
+    wts = np.zeros((n_chroma, n_fft))
+    # Get the FFT bins, not counting the DC component
+    frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:]
+    frqbins = n_chroma * hz_to_octs(
+        frequencies, tuning=tuning, bins_per_octave=n_chroma
+    )
+    # make up a value for the 0 Hz bin = 1.5 octaves below bin 1
+    # (so chroma is 50% rotated from bin 1, and bin width is broad)
+    frqbins = np.concatenate(([frqbins[0] - 1.5 * n_chroma], frqbins))
+    binwidthbins = np.concatenate((np.maximum(frqbins[1:] - frqbins[:-1], 1.0), [1]))
+    D = np.subtract.outer(frqbins, np.arange(0, n_chroma, dtype="d")).T
+    n_chroma2 = np.round(float(n_chroma) / 2)
+    # Project into range -n_chroma/2 .. n_chroma/2
+    # add on fixed offset of 10*n_chroma to ensure all values passed to
+    # rem are positive
+    D = np.remainder(D + n_chroma2 + 10 * n_chroma, n_chroma) - n_chroma2
+    # Gaussian bumps - 2*D to make them narrower
+    wts = np.exp(-0.5 * (2 * D / np.tile(binwidthbins, (n_chroma, 1))) ** 2)
+    # normalize each column
+    wts = util.normalize(wts, norm=norm, axis=0)
+    # Maybe apply scaling for fft bins
+    if octwidth is not None:
+        wts *= np.tile(
+            np.exp(-0.5 * (((frqbins / n_chroma - ctroct) / octwidth) ** 2)),
+            (n_chroma, 1),
+        )
+    if base_c:
+        wts = np.roll(wts, -3 * (n_chroma // 12), axis=0)
+    # remove aliasing columns, copy to ensure row-contiguity
+    return np.ascontiguousarray(wts[:, : int(1 + n_fft / 2)], dtype=dtype)
+def __float_window(window_spec):
+    """Decorate a window function to support fractional input lengths.
+    This function guarantees that for fractional ``x``, the following hold:
+    1. ``__float_window(window_function)(x)`` has length ``np.ceil(x)``
+    2. all values from ``np.floor(x)`` are set to 0.
+    For integer-valued ``x``, there should be no change in behavior.
+    """
+    def _wrap(n, *args, **kwargs):
+        """Wrap the window"""
+        n_min, n_max = int(np.floor(n)), int(np.ceil(n))
+        window = get_window(window_spec, n_min)
+        if len(window) < n_max:
+            window = np.pad(window, [(0, n_max - len(window))], mode="constant")
+        window[n_min:] = 0.0
+        return window
+    return _wrap
+@deprecated(version="0.9.0", version_removed="1.0")
+def constant_q(
+    *,
+    sr: float,
+    fmin: Optional[_FloatLike_co] = None,
+    n_bins: int = 84,
+    bins_per_octave: int = 12,
+    window: _WindowSpec = "hann",
+    filter_scale: float = 1,
+    pad_fft: bool = True,
+    norm: Optional[float] = 1,
+    dtype: DTypeLike = np.complex64,
+    gamma: float = 0,
+    **kwargs: Any,
+) -> Tuple[np.ndarray, np.ndarray]:
+    r"""Construct a constant-Q basis.
+    This function constructs a filter bank similar to Morlet wavelets,
+    where complex exponentials are windowed to different lengths
+    such that the number of cycles remains fixed for all frequencies.
+    By default, a Hann window (rather than the Gaussian window of Morlet wavelets)
+    is used, but this can be controlled by the ``window`` parameter.
+    Frequencies are spaced geometrically, increasing by a factor of
+    ``(2**(1./bins_per_octave))`` at each successive band.
+    .. warning:: This function is deprecated as of v0.9 and will be removed in 1.0.
+        See `librosa.filters.wavelet`.
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        Audio sampling rate
+    fmin : float > 0 [scalar]
+        Minimum frequency bin. Defaults to `C1 ~= 32.70`
+    n_bins : int > 0 [scalar]
+        Number of frequencies.  Defaults to 7 octaves (84 bins).
+    bins_per_octave : int > 0 [scalar]
+        Number of bins per octave
+    window : string, tuple, number, or function
+        Windowing function to apply to filters.
+    filter_scale : float > 0 [scalar]
+        Scale of filter windows.
+        Small values (<1) use shorter windows for higher temporal resolution.
+    pad_fft : boolean
+        Center-pad all filters up to the nearest integral power of 2.
+        By default, padding is done with zeros, but this can be overridden
+        by setting the ``mode=`` field in *kwargs*.
+    norm : {inf, -inf, 0, float > 0}
+        Type of norm to use for basis function normalization.
+        See librosa.util.normalize
+    gamma : number >= 0
+        Bandwidth offset for variable-Q transforms.
+        ``gamma=0`` produces a constant-Q filterbank.
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 64-bit (single precision) complex floating point.
+    **kwargs : additional keyword arguments
+        Arguments to `np.pad()` when ``pad==True``.
+    Returns
+    -------
+    filters : np.ndarray, ``len(filters) == n_bins``
+        ``filters[i]`` is ``i``\ th time-domain CQT basis filter
+    lengths : np.ndarray, ``len(lengths) == n_bins``
+        The (fractional) length of each filter
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    wavelet
+    constant_q_lengths
+    librosa.cqt
+    librosa.vqt
+    librosa.util.normalize
+    Examples
+    --------
+    Use a shorter window for each filter
+    >>> basis, lengths = librosa.filters.constant_q(sr=22050, filter_scale=0.5)
+    Plot one octave of filters in time and frequency
+    >>> import matplotlib.pyplot as plt
+    >>> basis, lengths = librosa.filters.constant_q(sr=22050)
+    >>> fig, ax = plt.subplots(nrows=2, figsize=(10, 6))
+    >>> notes = librosa.midi_to_note(np.arange(24, 24 + len(basis)))
+    >>> for i, (f, n) in enumerate(zip(basis, notes[:12])):
+    ...     f_scale = librosa.util.normalize(f) / 2
+    ...     ax[0].plot(i + f_scale.real)
+    ...     ax[0].plot(i + f_scale.imag, linestyle=':')
+    >>> ax[0].set(yticks=np.arange(len(notes[:12])), yticklabels=notes[:12],
+    ...           ylabel='CQ filters',
+    ...           title='CQ filters (one octave, time domain)',
+    ...           xlabel='Time (samples at 22050 Hz)')
+    >>> ax[0].legend(['Real', 'Imaginary'])
+    >>> F = np.abs(np.fft.fftn(basis, axes=[-1]))
+    >>> # Keep only the positive frequencies
+    >>> F = F[:, :(1 + F.shape[1] // 2)]
+    >>> librosa.display.specshow(F, x_axis='linear', y_axis='cqt_note', ax=ax[1])
+    >>> ax[1].set(ylabel='CQ filters', title='CQ filter magnitudes (frequency domain)')
+    """
+    if fmin is None:
+        fmin = note_to_hz("C1")
+    # Pass-through parameters to get the filter lengths
+    lengths = constant_q_lengths(
+        sr=sr,
+        fmin=fmin,
+        n_bins=n_bins,
+        bins_per_octave=bins_per_octave,
+        window=window,
+        filter_scale=filter_scale,
+        gamma=gamma,
+    )
+    freqs = fmin * (2.0 ** (np.arange(n_bins, dtype=float) / bins_per_octave))
+    # Build the filters
+    filters = []
+    for ilen, freq in zip(lengths, freqs):
+        # Build the filter: note, length will be ceil(ilen)
+        sig = util.phasor(
+            np.arange(-ilen // 2, ilen // 2, dtype=float) * 2 * np.pi * freq / sr
+        )
+        # Apply the windowing function
+        sig = sig * __float_window(window)(len(sig))
+        # Normalize
+        sig = util.normalize(sig, norm=norm)
+        filters.append(sig)
+    # Pad and stack
+    max_len = max(lengths)
+    if pad_fft:
+        max_len = int(2.0 ** (np.ceil(np.log2(max_len))))
+    else:
+        max_len = int(np.ceil(max_len))
+    filters = np.asarray(
+        [util.pad_center(filt, size=max_len, **kwargs) for filt in filters], dtype=dtype
+    )
+    return filters, np.asarray(lengths)
+@deprecated(version="0.9.0", version_removed="1.0")
+@cache(level=10)
+def constant_q_lengths(
+    *,
+    sr: float,
+    fmin: _FloatLike_co,
+    n_bins: int = 84,
+    bins_per_octave: int = 12,
+    window: _WindowSpec = "hann",
+    filter_scale: float = 1,
+    gamma: float = 0,
+) -> np.ndarray:
+    r"""Return length of each filter in a constant-Q basis.
+    .. warning:: This function is deprecated as of v0.9 and will be removed in 1.0.
+        See `librosa.filters.wavelet_lengths`.
+    Parameters
+    ----------
+    sr : number > 0 [scalar]
+        Audio sampling rate
+    fmin : float > 0 [scalar]
+        Minimum frequency bin.
+    n_bins : int > 0 [scalar]
+        Number of frequencies.  Defaults to 7 octaves (84 bins).
+    bins_per_octave : int > 0 [scalar]
+        Number of bins per octave
+    window : str or callable
+        Window function to use on filters
+    filter_scale : float > 0 [scalar]
+        Resolution of filter windows. Larger values use longer windows.
+    gamma : number >= 0
+        Bandwidth offset for variable-Q transforms.
+        ``gamma=0`` produces a constant-Q filterbank.
+    Returns
+    -------
+    lengths : np.ndarray
+        The length of each filter.
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    wavelet_lengths
+    """
+    if fmin <= 0:
+        raise ParameterError("fmin must be strictly positive")
+    if bins_per_octave <= 0:
+        raise ParameterError("bins_per_octave must be positive")
+    if filter_scale <= 0:
+        raise ParameterError("filter_scale must be positive")
+    if n_bins <= 0 or not isinstance(n_bins, (int, np.integer)):
+        raise ParameterError("n_bins must be a positive integer")
+    # Compute the frequencies
+    freq = fmin * (2.0 ** (np.arange(n_bins, dtype=float) / bins_per_octave))
+    # Q should be capitalized here, so we suppress the name warning
+    # pylint: disable=invalid-name
+    #
+    # Balance filter bandwidths
+    alpha = (2.0 ** (2 / bins_per_octave) - 1) / (2.0 ** (2 / bins_per_octave) + 1)
+    Q = float(filter_scale) / alpha
+    if max(freq * (1 + 0.5 * window_bandwidth(window) / Q)) > sr / 2.0:
+        raise ParameterError(
+            f"Maximum filter frequency={max(freq):.2f} would exceed Nyquist={sr/2}"
+        )
+    # Convert frequencies to filter lengths
+    lengths: np.ndarray = Q * sr / (freq + gamma / alpha)
+    return lengths
+@cache(level=10)
+def wavelet_lengths(
+    *,
+    freqs: ArrayLike,
+    sr: float = 22050,
+    window: _WindowSpec = "hann",
+    filter_scale: float = 1,
+    gamma: Optional[float] = 0,
+    alpha: Optional[Union[float, np.ndarray]] = None,
+) -> Tuple[np.ndarray, float]:
+    """Return length of each filter in a wavelet basis.
+    Parameters
+    ----------
+    freqs : np.ndarray (positive)
+        Center frequencies of the filters (in Hz).
+        Must be in ascending order.
+    sr : number > 0 [scalar]
+        Audio sampling rate
+    window : str or callable
+        Window function to use on filters
+    filter_scale : float > 0 [scalar]
+        Resolution of filter windows. Larger values use longer windows.
+    gamma : number >= 0 [scalar, optional]
+        Bandwidth offset for determining filter lengths, as used in
+        Variable-Q transforms.
+        Bandwidth for the k'th filter is determined by::
+            B[k] = alpha[k] * freqs[k] + gamma
+        ``alpha[k]`` is twice the relative difference between ``freqs[k+1]`` and ``freqs[k-1]``::
+            alpha[k] = (freqs[k+1]-freqs[k-1]) / (freqs[k+1]+freqs[k-1])
+        If ``freqs`` follows a geometric progression (as in CQT and VQT), the vector
+        ``alpha`` is constant and such that::
+            (1 + alpha) * freqs[k-1] = (1 - alpha) * freqs[k+1]
+        Furthermore, if ``gamma=0`` (default), ``alpha`` is such that even-``k`` and
+        odd-``k`` filters are interleaved::
+            freqs[k-1] + B[k-1] = freqs[k+1] - B[k+1]
+        If ``gamma=None`` is specified, then ``gamma`` is computed such
+        that each filter has bandwidth proportional to the equivalent
+        rectangular bandwidth (ERB) at frequency ``freqs[k]``::
+            gamma[k] = 24.7 * alpha[k] / 0.108
+        as derived by [#]_.
+        .. [#] Glasberg, Brian R., and Brian CJ Moore.
+            "Derivation of auditory filter shapes from notched-noise data."
+            Hearing research 47.1-2 (1990): 103-138.
+    alpha : number > 0 [optional]
+        Optional pre-computed relative bandwidth parameter.
+        Note that this must be provided if ``len(freqs)==1`` because bandwidth cannot be
+        inferred from a single frequency.
+        Otherwise, if left unspecified, it will be automatically derived by the rules
+        specified above.
+    Returns
+    -------
+    lengths : np.ndarray
+        The length of each filter.
+    f_cutoff : float
+        The lowest frequency at which all filters' main lobes have decayed by
+        at least 3dB.
+        This second output serves in cqt and vqt to ensure that all wavelet
+        bands remain below the Nyquist frequency.
+    Notes
+    -----
+    This function caches at level 10.
+    Raises
+    ------
+    ParameterError
+        - If ``filter_scale`` is not strictly positive
+        - If ``gamma`` is a negative number
+        - If any frequencies are <= 0
+        - If the frequency array is not sorted in ascending order
+    """
+    freqs = np.asarray(freqs)
+    if filter_scale <= 0:
+        raise ParameterError(f"filter_scale={filter_scale} must be positive")
+    if gamma is not None and gamma < 0:
+        raise ParameterError(f"gamma={gamma} must be non-negative")
+    if np.any(freqs <= 0):
+        raise ParameterError("frequencies must be strictly positive")
+    if len(freqs) > 1 and np.any(freqs[:-1] > freqs[1:]):
+        raise ParameterError(
+            f"Frequency array={freqs} must be in strictly ascending order"
+        )
+    if alpha is None:
+        alpha = _relative_bandwidth(freqs=freqs)
+    else:
+        alpha = np.asarray(alpha)
+    gamma_: Union[_FloatLike_co, np.ndarray]
+    if gamma is None:
+        gamma_ = alpha * 24.7 / 0.108
+    else:
+        gamma_ = gamma
+    # Q should be capitalized here, so we suppress the name warning
+    # pylint: disable=invalid-name
+    Q = float(filter_scale) / alpha
+    # How far up does our highest frequency reach?
+    f_cutoff = max(freqs * (1 + 0.5 * window_bandwidth(window) / Q) + 0.5 * gamma_)
+    # Convert frequencies to filter lengths
+    lengths = Q * sr / (freqs + gamma_ / alpha)
+    return lengths, f_cutoff
+def _relative_bandwidth(*, freqs: np.ndarray) -> np.ndarray:
+    """Compute the relative bandwidth for each of a set of specified frequencies.
+    This function is used as a helper in wavelet basis construction.
+    Parameters
+    ----------
+    freqs : np.ndarray
+        The array of frequencies
+    Returns
+    -------
+    alpha : np.ndarray
+        Relative bandwidth
+    """
+    if len(freqs) <= 1:
+        raise ParameterError(f"2 or more frequencies are required to compute bandwidths. Given freqs={freqs}")
+    # Approximate the local octave resolution around each frequency
+    bpo = np.empty_like(freqs)
+    logf = np.log2(freqs)
+    # Reflect at the lowest and highest frequencies
+    bpo[0] = 1 / (logf[1] - logf[0])
+    bpo[-1] = 1 / (logf[-1] - logf[-2])
+    # For everything else, do a centered difference
+    bpo[1:-1] = 2 / (logf[2:] - logf[:-2])
+    # Compute relative bandwidths
+    alpha = (2.0 ** (2 / bpo) - 1) / (2.0 ** (2 / bpo) + 1)
+    return alpha
+@cache(level=10)
+def wavelet(
+    *,
+    freqs: np.ndarray,
+    sr: float = 22050,
+    window: _WindowSpec = "hann",
+    filter_scale: float = 1,
+    pad_fft: bool = True,
+    norm: Optional[float] = 1,
+    dtype: DTypeLike = np.complex64,
+    gamma: float = 0,
+    alpha: Optional[float] = None,
+    **kwargs: Any,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Construct a wavelet basis using windowed complex sinusoids.
+    This function constructs a wavelet filterbank at a specified set of center
+    frequencies.
+    Parameters
+    ----------
+    freqs : np.ndarray (positive)
+        Center frequencies of the filters (in Hz).
+        Must be in ascending order.
+    sr : number > 0 [scalar]
+        Audio sampling rate
+    window : string, tuple, number, or function
+        Windowing function to apply to filters.
+    filter_scale : float > 0 [scalar]
+        Scale of filter windows.
+        Small values (<1) use shorter windows for higher temporal resolution.
+    pad_fft : boolean
+        Center-pad all filters up to the nearest integral power of 2.
+        By default, padding is done with zeros, but this can be overridden
+        by setting the ``mode=`` field in *kwargs*.
+    norm : {inf, -inf, 0, float > 0}
+        Type of norm to use for basis function normalization.
+        See librosa.util.normalize
+    gamma : number >= 0
+        Bandwidth offset for variable-Q transforms.
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 64-bit (single precision) complex floating point.
+    alpha : number > 0 [optional]
+        Optional pre-computed relative bandwidth parameter.
+        Note that this must be provided if ``len(freqs)==1`` because bandwidth cannot be
+        inferred from a single frequency.
+        Otherwise, if left unspecified, it will be automatically derived by the rules
+        specified above.
+    **kwargs : additional keyword arguments
+        Arguments to `np.pad()` when ``pad==True``.
+    Returns
+    -------
+    filters : np.ndarray, ``len(filters) == n_bins``
+        each ``filters[i]`` is a (complex) time-domain filter
+    lengths : np.ndarray, ``len(lengths) == n_bins``
+        The (fractional) length of each filter in samples
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    wavelet_lengths
+    librosa.cqt
+    librosa.vqt
+    librosa.util.normalize
+    Examples
+    --------
+    Create a constant-Q basis
+    >>> freqs = librosa.cqt_frequencies(n_bins=84, fmin=librosa.note_to_hz('C1'))
+    >>> basis, lengths = librosa.filters.wavelet(freqs=freqs, sr=22050)
+    Plot one octave of filters in time and frequency
+    >>> import matplotlib.pyplot as plt
+    >>> basis, lengths = librosa.filters.wavelet(freqs=freqs, sr=22050)
+    >>> fig, ax = plt.subplots(nrows=2, figsize=(10, 6))
+    >>> notes = librosa.midi_to_note(np.arange(24, 24 + len(basis)))
+    >>> for i, (f, n) in enumerate(zip(basis, notes[:12])):
+    ...     f_scale = librosa.util.normalize(f) / 2
+    ...     ax[0].plot(i + f_scale.real)
+    ...     ax[0].plot(i + f_scale.imag, linestyle=':')
+    >>> ax[0].set(yticks=np.arange(len(notes[:12])), yticklabels=notes[:12],
+    ...           ylabel='CQ filters',
+    ...           title='CQ filters (one octave, time domain)',
+    ...           xlabel='Time (samples at 22050 Hz)')
+    >>> ax[0].legend(['Real', 'Imaginary'])
+    >>> F = np.abs(np.fft.fftn(basis, axes=[-1]))
+    >>> # Keep only the positive frequencies
+    >>> F = F[:, :(1 + F.shape[1] // 2)]
+    >>> librosa.display.specshow(F, x_axis='linear', y_axis='cqt_note', ax=ax[1])
+    >>> ax[1].set(ylabel='CQ filters', title='CQ filter magnitudes (frequency domain)')
+    """
+    # Pass-through parameters to get the filter lengths
+    lengths, _ = wavelet_lengths(
+        freqs=freqs,
+        sr=sr,
+        window=window,
+        filter_scale=filter_scale,
+        gamma=gamma,
+        alpha=alpha,
+    )
+    # Build the filters
+    filters = []
+    for ilen, freq in zip(lengths, freqs):
+        # Build the filter: note, length will be ceil(ilen)
+        sig = util.phasor(
+            np.arange(-ilen // 2, ilen // 2, dtype=float) * 2 * np.pi * freq / sr
+        )
+        # Apply the windowing function
+        sig *= __float_window(window)(len(sig))
+        # Normalize
+        sig = util.normalize(sig, norm=norm)
+        filters.append(sig)
+    # Pad and stack
+    max_len = max(lengths)
+    if pad_fft:
+        max_len = int(2.0 ** (np.ceil(np.log2(max_len))))
+    else:
+        max_len = int(np.ceil(max_len))
+    filters = np.asarray(
+        [util.pad_center(filt, size=max_len, **kwargs) for filt in filters], dtype=dtype
+    )
+    return filters, lengths
+@cache(level=10)
+def cq_to_chroma(
+    n_input: int,
+    *,
+    bins_per_octave: int = 12,
+    n_chroma: int = 12,
+    fmin: Optional[_FloatLike_co] = None,
+    window: Optional[np.ndarray] = None,
+    base_c: bool = True,
+    dtype: DTypeLike = np.float32,
+) -> np.ndarray:
+    """Construct a linear transformation matrix to map Constant-Q bins
+    onto chroma bins (i.e., pitch classes).
+    Parameters
+    ----------
+    n_input : int > 0 [scalar]
+        Number of input components (CQT bins)
+    bins_per_octave : int > 0 [scalar]
+        How many bins per octave in the CQT
+    n_chroma : int > 0 [scalar]
+        Number of output bins (per octave) in the chroma
+    fmin : None or float > 0
+        Center frequency of the first constant-Q channel.
+        Default: 'C1' ~= 32.7 Hz
+    window : None or np.ndarray
+        If provided, the cq_to_chroma filter bank will be
+        convolved with ``window``.
+    base_c : bool
+        If True, the first chroma bin will start at 'C'
+        If False, the first chroma bin will start at 'A'
+    dtype : np.dtype
+        The data type of the output basis.
+        By default, uses 32-bit (single-precision) floating point.
+    Returns
+    -------
+    cq_to_chroma : np.ndarray [shape=(n_chroma, n_input)]
+        Transformation matrix: ``Chroma = np.dot(cq_to_chroma, CQT)``
+    Raises
+    ------
+    ParameterError
+        If ``n_input`` is not an integer multiple of ``n_chroma``
+    Notes
+    -----
+    This function caches at level 10.
+    Examples
+    --------
+    Get a CQT, and wrap bins to chroma
+    >>> y, sr = librosa.load(librosa.ex('trumpet'))
+    >>> CQT = np.abs(librosa.cqt(y, sr=sr))
+    >>> chroma_map = librosa.filters.cq_to_chroma(CQT.shape[0])
+    >>> chromagram = chroma_map.dot(CQT)
+    >>> # Max-normalize each time step
+    >>> chromagram = librosa.util.normalize(chromagram, axis=0)
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(nrows=3, sharex=True)
+    >>> imgcq = librosa.display.specshow(librosa.amplitude_to_db(CQT,
+    ...                                                         ref=np.max),
+    ...                                  y_axis='cqt_note', x_axis='time',
+    ...                                  ax=ax[0])
+    >>> ax[0].set(title='CQT Power')
+    >>> ax[0].label_outer()
+    >>> librosa.display.specshow(chromagram, y_axis='chroma', x_axis='time',
+    ...                          ax=ax[1])
+    >>> ax[1].set(title='Chroma (wrapped CQT)')
+    >>> ax[1].label_outer()
+    >>> chroma = librosa.feature.chroma_stft(y=y, sr=sr)
+    >>> imgchroma = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax[2])
+    >>> ax[2].set(title='librosa.feature.chroma_stft')
+    """
+    # How many fractional bins are we merging?
+    n_merge = float(bins_per_octave) / n_chroma
+    fmin_: _FloatLike_co
+    if fmin is None:
+        fmin_ = note_to_hz("C1")
+    else:
+        fmin_ = fmin
+    if np.mod(n_merge, 1) != 0:
+        raise ParameterError(
+            "Incompatible CQ merge: "
+            "input bins must be an "
+            "integer multiple of output bins."
+        )
+    # Tile the identity to merge fractional bins
+    cq_to_ch = np.repeat(np.eye(n_chroma), int(n_merge), axis=1)
+    # Roll it left to center on the target bin
+    cq_to_ch = np.roll(cq_to_ch, -int(n_merge // 2), axis=1)
+    # How many octaves are we repeating?
+    n_octaves = np.ceil(float(n_input) / bins_per_octave)
+    # Repeat and trim
+    cq_to_ch = np.tile(cq_to_ch, int(n_octaves))[:, :n_input]
+    # What's the note number of the first bin in the CQT?
+    # midi uses 12 bins per octave here
+    midi_0 = np.mod(hz_to_midi(fmin_), 12)
+    if base_c:
+        # rotate to C
+        roll = midi_0
+    else:
+        # rotate to A
+        roll = midi_0 - 9
+    # Adjust the roll in terms of how many chroma we want out
+    # We need to be careful with rounding here
+    roll = int(np.round(roll * (n_chroma / 12.0)))
+    # Apply the roll
+    cq_to_ch = np.roll(cq_to_ch, roll, axis=0).astype(dtype)
+    if window is not None:
+        cq_to_ch = scipy.signal.convolve(cq_to_ch, np.atleast_2d(window), mode="same")
+    return cq_to_ch
+@cache(level=10)
+def window_bandwidth(window: _WindowSpec, n: int = 1000) -> float:
+    """Get the equivalent noise bandwidth (ENBW) of a window function.
+    The ENBW of a window is defined by [#]_ (equation 11) as the normalized
+    ratio of the sum of squares to the square of sums::
+        enbw = n * sum(window**2) / sum(window)**2
+    .. [#] Harris, F. J.
+        "On the use of windows for harmonic analysis with the discrete Fourier transform."
+        Proceedings of the IEEE, 66(1), 51-83.  1978.
+    Parameters
+    ----------
+    window : callable or string
+        A window function, or the name of a window function,
+        e.g.: `scipy.signal.hann` or `'boxcar'`
+    n : int > 0
+        The number of coefficients to use in estimating the
+        window bandwidth
+    Returns
+    -------
+    bandwidth : float
+        The equivalent noise bandwidth (in FFT bins) of the
+        given window function
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    get_window
+    """
+    if hasattr(window, "__name__"):
+        key = window.__name__
+    else:
+        key = window
+    if key not in WINDOW_BANDWIDTHS:
+        win = get_window(window, n)
+        WINDOW_BANDWIDTHS[key] = (
+            n * np.sum(win**2) / (np.sum(win) ** 2 + util.tiny(win))
+        )
+    return WINDOW_BANDWIDTHS[key]
+@cache(level=10)
+def get_window(
+    window: _WindowSpec,
+    Nx: int,
+    *,
+    fftbins: Optional[bool] = True,
+) -> np.ndarray:
+    """Compute a window function.
+    This is a wrapper for `scipy.signal.get_window` that additionally
+    supports callable or pre-computed windows.
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        The window specification:
+        - If string, it's the name of the window function (e.g., `'hann'`)
+        - If tuple, it's the name of the window function and any parameters
+          (e.g., `('kaiser', 4.0)`)
+        - If numeric, it is treated as the beta parameter of the `'kaiser'`
+          window, as in `scipy.signal.get_window`.
+        - If callable, it's a function that accepts one integer argument
+          (the window length)
+        - If list-like, it's a pre-computed window of the correct length `Nx`
+    Nx : int > 0
+        The length of the window
+    fftbins : bool, optional
+        If True (default), create a periodic window for use with FFT
+        If False, create a symmetric window for filter design applications.
+    Returns
+    -------
+    get_window : np.ndarray
+        A window of length `Nx` and type `window`
+    See Also
+    --------
+    scipy.signal.get_window
+    Notes
+    -----
+    This function caches at level 10.
+    Raises
+    ------
+    ParameterError
+        If `window` is supplied as a vector of length != `n_fft`,
+        or is otherwise mis-specified.
+    """
+    if callable(window):
+        return window(Nx)
+    elif isinstance(window, (str, tuple)) or np.isscalar(window):
+        # TODO: if we add custom window functions in librosa, call them here
+        win: np.ndarray = scipy.signal.get_window(window, Nx, fftbins=fftbins)
+        return win
+    elif isinstance(window, (np.ndarray, list)):
+        if len(window) == Nx:
+            return np.asarray(window)
+        raise ParameterError(f"Window size mismatch: {len(window):d} != {Nx:d}")
+    else:
+        raise ParameterError(f"Invalid window specification: {window!r}")
+@cache(level=10)
+def _multirate_fb(
+    center_freqs: Optional[np.ndarray] = None,
+    sample_rates: Optional[np.ndarray] = None,
+    Q: float = 25.0,
+    passband_ripple: float = 1,
+    stopband_attenuation: float = 50,
+    ftype: str = "ellip",
+    flayout: str = "sos",
+) -> Tuple[List[Any], np.ndarray]:
+    r"""Construct a multirate filterbank.
+     A filter bank consists of multiple band-pass filters which divide the input signal
+     into subbands. In the case of a multirate filter bank, the band-pass filters
+     operate with resampled versions of the input signal, e.g. to keep the length
+     of a filter constant while shifting its center frequency.
+     This implementation uses `scipy.signal.iirdesign` to design the filters.
+    Parameters
+    ----------
+    center_freqs : np.ndarray [shape=(n,), dtype=float]
+        Center frequencies of the filter kernels.
+        Also defines the number of filters in the filterbank.
+    sample_rates : np.ndarray [shape=(n,), dtype=float]
+        Samplerate for each filter (used for multirate filterbank).
+    Q : float
+        Q factor (influences the filter bandwidth).
+    passband_ripple : float
+        The maximum loss in the passband (dB)
+        See `scipy.signal.iirdesign` for details.
+    stopband_attenuation : float
+        The minimum attenuation in the stopband (dB)
+        See `scipy.signal.iirdesign` for details.
+    ftype : str
+        The type of IIR filter to design
+        See `scipy.signal.iirdesign` for details.
+    flayout : string
+        Valid `output` argument for `scipy.signal.iirdesign`.
+        - If `ba`, returns numerators/denominators of the transfer functions,
+          used for filtering with `scipy.signal.filtfilt`.
+          Can be unstable for high-order filters.
+        - If `sos`, returns a series of second-order filters,
+          used for filtering with `scipy.signal.sosfiltfilt`.
+          Minimizes numerical precision errors for high-order filters, but is slower.
+        - If `zpk`, returns zeros, poles, and system gains of the transfer functions.
+    Returns
+    -------
+    filterbank : list [shape=(n,), dtype=float]
+        Each list entry comprises the filter coefficients for a single filter.
+    sample_rates : np.ndarray [shape=(n,), dtype=float]
+        Samplerate for each filter.
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    scipy.signal.iirdesign
+    Raises
+    ------
+    ParameterError
+        If ``center_freqs`` is ``None``.
+        If ``sample_rates`` is ``None``.
+        If ``center_freqs.shape`` does not match ``sample_rates.shape``.
+    """
+    if center_freqs is None:
+        raise ParameterError("center_freqs must be provided.")
+    if sample_rates is None:
+        raise ParameterError("sample_rates must be provided.")
+    if center_freqs.shape != sample_rates.shape:
+        raise ParameterError(
+            "Number of provided center_freqs and sample_rates must be equal."
+        )
+    nyquist = 0.5 * sample_rates
+    filter_bandwidths = center_freqs / float(Q)
+    filterbank = []
+    for cur_center_freq, cur_nyquist, cur_bw in zip(
+        center_freqs, nyquist, filter_bandwidths
+    ):
+        passband_freqs = [
+            cur_center_freq - 0.5 * cur_bw,
+            cur_center_freq + 0.5 * cur_bw,
+        ] / cur_nyquist
+        stopband_freqs = [
+            cur_center_freq - cur_bw,
+            cur_center_freq + cur_bw,
+        ] / cur_nyquist
+        cur_filter = scipy.signal.iirdesign(
+            passband_freqs,
+            stopband_freqs,
+            passband_ripple,
+            stopband_attenuation,
+            analog=False,
+            ftype=ftype,
+            output=flayout,
+        )
+        filterbank.append(cur_filter)
+    return filterbank, sample_rates
+@cache(level=10)
+def mr_frequencies(tuning: float) -> Tuple[np.ndarray, np.ndarray]:
+    r"""Generate center frequencies and sample rate pairs.
+    This function will return center frequency and corresponding sample rates
+    to obtain similar pitch filterbank settings as described in [#]_.
+    Instead of starting with MIDI pitch `A0`, we start with `C0`.
+    .. [#] Müller, Meinard.
+           "Information Retrieval for Music and Motion."
+           Springer Verlag. 2007.
+    Parameters
+    ----------
+    tuning : float [scalar]
+        Tuning deviation from A440, measure as a fraction of the equally
+        tempered semitone (1/12 of an octave).
+    Returns
+    -------
+    center_freqs : np.ndarray [shape=(n,), dtype=float]
+        Center frequencies of the filter kernels.
+        Also defines the number of filters in the filterbank.
+    sample_rates : np.ndarray [shape=(n,), dtype=float]
+        Sample rate for each filter, used for multirate filterbank.
+    Notes
+    -----
+    This function caches at level 10.
+    See Also
+    --------
+    librosa.filters.semitone_filterbank
+    """
+    center_freqs = midi_to_hz(np.arange(24 + tuning, 109 + tuning))
+    sample_rates = np.asarray(
+        len(np.arange(0, 36))
+        * [
+            882.0,
+        ]
+        + len(np.arange(36, 70))
+        * [
+            4410.0,
+        ]
+        + len(np.arange(70, 85))
+        * [
+            22050.0,
+        ]
+    )
+    return center_freqs, sample_rates
+def semitone_filterbank(
+    *,
+    center_freqs: Optional[np.ndarray] = None,
+    tuning: float = 0.0,
+    sample_rates: Optional[np.ndarray] = None,
+    flayout: str = "ba",
+    **kwargs: Any,
+) -> Tuple[List[Any], np.ndarray]:
+    r"""Construct a multi-rate bank of infinite-impulse response (IIR)
+    band-pass filters at user-defined center frequencies and sample rates.
+    By default, these center frequencies are set equal to the 88 fundamental
+    frequencies of the grand piano keyboard, according to a pitch tuning standard
+    of A440, that is, note A above middle C set to 440 Hz. The center frequencies
+    are tuned to the twelve-tone equal temperament, which means that they grow
+    exponentially at a rate of 2**(1/12), that is, twelve notes per octave.
+    The A440 tuning can be changed by the user while keeping twelve-tone equal
+    temperament. While A440 is currently the international standard in the music
+    industry (ISO 16), some orchestras tune to A441-A445, whereas baroque musicians
+    tune to A415.
+    See [#]_ for details.
+    .. [#] Müller, Meinard.
+           "Information Retrieval for Music and Motion."
+           Springer Verlag. 2007.
+    Parameters
+    ----------
+    center_freqs : np.ndarray [shape=(n,), dtype=float]
+        Center frequencies of the filter kernels.
+        Also defines the number of filters in the filterbank.
+    tuning : float [scalar]
+        Tuning deviation from A440 as a fraction of a semitone (1/12 of an octave
+        in equal temperament).
+    sample_rates : np.ndarray [shape=(n,), dtype=float]
+        Sample rates of each filter in the multirate filterbank.
+    flayout : string
+        - If `ba`, the standard difference equation is used for filtering with `scipy.signal.filtfilt`.
+          Can be unstable for high-order filters.
+        - If `sos`, a series of second-order filters is used for filtering with `scipy.signal.sosfiltfilt`.
+          Minimizes numerical precision errors for high-order filters, but is slower.
+    **kwargs : additional keyword arguments
+        Additional arguments to the private function `_multirate_fb()`.
+    Returns
+    -------
+    filterbank : list [shape=(n,), dtype=float]
+        Each list entry contains the filter coefficients for a single filter.
+    fb_sample_rates : np.ndarray [shape=(n,), dtype=float]
+        Sample rate for each filter.
+    See Also
+    --------
+    librosa.cqt
+    librosa.iirt
+    librosa.filters.mr_frequencies
+    scipy.signal.iirdesign
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> import numpy as np
+    >>> import scipy.signal
+    >>> semitone_filterbank, sample_rates = librosa.filters.semitone_filterbank(
+    ...     center_freqs=librosa.midi_to_hz(np.arange(60, 72)),
+    ...     sample_rates=np.repeat(4410.0, 12),
+    ...     flayout='sos'
+    ...     )
+    >>> magnitudes = []
+    >>> for cur_sr, cur_filter in zip(sample_rates, semitone_filterbank):
+    ...     w, h = scipy.signal.sosfreqz(cur_filter,fs=cur_sr, worN=1025)
+    ...     magnitudes.append(20 * np.log10(np.abs(h)))
+    >>> fig, ax = plt.subplots(figsize=(12,6))
+    >>> img = librosa.display.specshow(
+    ...     np.array(magnitudes),
+    ...     x_axis="hz",
+    ...     sr=4410,
+    ...     y_coords=librosa.midi_to_hz(np.arange(60, 72)),
+    ...     vmin=-60,
+    ...     vmax=3,
+    ...     ax=ax
+    ...     )
+    >>> fig.colorbar(img, ax=ax, format="%+2.f dB", label="Magnitude (dB)")
+    >>> ax.set(
+    ...     xlim=[200, 600],
+    ...     yticks=librosa.midi_to_hz(np.arange(60, 72)),
+    ...     title='Magnitude Responses of the Pitch Filterbank',
+    ...     xlabel='Frequency (Hz)',
+    ...     ylabel='Semitone filter center frequency (Hz)'
+    ... )
+    """
+    if (center_freqs is None) and (sample_rates is None):
+        center_freqs, sample_rates = mr_frequencies(tuning)
+    filterbank, fb_sample_rates = _multirate_fb(
+        center_freqs=center_freqs, sample_rates=sample_rates, flayout=flayout, **kwargs
+    )
+    return filterbank, fb_sample_rates
+@jit(nopython=True, cache=True)
+def __window_ss_fill(x, win_sq, n_frames, hop_length):  # pragma: no cover
+    """Compute the sum-square envelope of a window."""
+    n = len(x)
+    n_fft = len(win_sq)
+    for i in range(n_frames):
+        sample = i * hop_length
+        x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
+def window_sumsquare(
+    *,
+    window: _WindowSpec,
+    n_frames: int,
+    hop_length: int = 512,
+    win_length: Optional[int] = None,
+    n_fft: int = 2048,
+    dtype: DTypeLike = np.float32,
+    norm: Optional[float] = None,
+) -> np.ndarray:
+    """Compute the sum-square envelope of a window function at a given hop length.
+    This is used to estimate modulation effects induced by windowing observations
+    in short-time Fourier transforms.
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        Window specification, as in `get_window`
+    n_frames : int > 0
+        The number of analysis frames
+    hop_length : int > 0
+        The number of samples to advance between frames
+    win_length : [optional]
+        The length of the window function.  By default, this matches ``n_fft``.
+    n_fft : int > 0
+        The length of each analysis frame.
+    dtype : np.dtype
+        The data type of the output
+    norm : {np.inf, -np.inf, 0, float > 0, None}
+        Normalization mode used in window construction.
+        Note that this does not affect the squaring operation.
+    Returns
+    -------
+    wss : np.ndarray, shape=``(n_fft + hop_length * (n_frames - 1))``
+        The sum-squared envelope of the window function
+    Examples
+    --------
+    For a fixed frame length (2048), compare modulation effects for a Hann window
+    at different hop lengths:
+    >>> n_frames = 50
+    >>> wss_256 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=256)
+    >>> wss_512 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=512)
+    >>> wss_1024 = librosa.filters.window_sumsquare(window='hann', n_frames=n_frames, hop_length=1024)
+    >>> import matplotlib.pyplot as plt
+    >>> fig, ax = plt.subplots(nrows=3, sharey=True)
+    >>> ax[0].plot(wss_256)
+    >>> ax[0].set(title='hop_length=256')
+    >>> ax[1].plot(wss_512)
+    >>> ax[1].set(title='hop_length=512')
+    >>> ax[2].plot(wss_1024)
+    >>> ax[2].set(title='hop_length=1024')
+    """
+    if win_length is None:
+        win_length = n_fft
+    n = n_fft + hop_length * (n_frames - 1)
+    x = np.zeros(n, dtype=dtype)
+    # Compute the squared window at the desired length
+    win_sq = get_window(window, win_length)
+    win_sq = util.normalize(win_sq, norm=norm) ** 2
+    win_sq = util.pad_center(win_sq, size=n_fft)
+    # Fill the envelope
+    __window_ss_fill(x, win_sq, n_frames, hop_length)
+    return x
+@cache(level=10)
+def diagonal_filter(
+    window: _WindowSpec,
+    n: int,
+    *,
+    slope: float = 1.0,
+    angle: Optional[float] = None,
+    zero_mean: bool = False,
+) -> np.ndarray:
+    """Build a two-dimensional diagonal filter.
+    This is primarily used for smoothing recurrence or self-similarity matrices.
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        The window function to use for the filter.
+        See `get_window` for details.
+        Note that the window used here should be non-negative.
+    n : int > 0
+        the length of the filter
+    slope : float
+        The slope of the diagonal filter to produce
+    angle : float or None
+        If given, the slope parameter is ignored,
+        and angle directly sets the orientation of the filter (in radians).
+        Otherwise, angle is inferred as `arctan(slope)`.
+    zero_mean : bool
+        If True, a zero-mean filter is used.
+        Otherwise, a non-negative averaging filter is used.
+        This should be enabled if you want to enhance paths and suppress
+        blocks.
+    Returns
+    -------
+    kernel : np.ndarray, shape=[(m, m)]
+        The 2-dimensional filter kernel
+    Notes
+    -----
+    This function caches at level 10.
+    """
+    if angle is None:
+        angle = np.arctan(slope)
+    win = np.diag(get_window(window, n, fftbins=False))
+    if not np.isclose(angle, np.pi / 4):
+        win = scipy.ndimage.rotate(
+            win, 45 - angle * 180 / np.pi, order=5, prefilter=False
+        )
+    np.clip(win, 0, None, out=win)
+    win /= win.sum()
+    if zero_mean:
+        win -= win.mean()
+    return win