Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """Core IO, DSP and utility functions.""" | |
| from __future__ import annotations | |
| import os | |
| import pathlib | |
| import warnings | |
| import soundfile as sf | |
| import audioread | |
| import numpy as np | |
| import scipy.signal | |
| import soxr | |
| import lazy_loader as lazy | |
| from numba import jit, stencil, guvectorize | |
| from .fft import get_fftlib | |
| from .convert import frames_to_samples, time_to_samples | |
| from .._cache import cache | |
| from .. import util | |
| from ..util.exceptions import ParameterError | |
| from ..util.decorators import deprecated | |
| from ..util.deprecation import Deprecated, rename_kw | |
| from .._typing import _FloatLike_co, _IntLike_co, _SequenceLike | |
| from typing import Any, BinaryIO, Callable, Generator, Optional, Tuple, Union, List | |
| from numpy.typing import DTypeLike, ArrayLike | |
| # Lazy-load optional dependencies | |
| samplerate = lazy.load("samplerate") | |
| resampy = lazy.load("resampy") | |
| __all__ = [ | |
| "load", | |
| "stream", | |
| "to_mono", | |
| "resample", | |
| "get_duration", | |
| "get_samplerate", | |
| "autocorrelate", | |
| "lpc", | |
| "zero_crossings", | |
| "clicks", | |
| "tone", | |
| "chirp", | |
| "mu_compress", | |
| "mu_expand", | |
| ] | |
| # -- CORE ROUTINES --# | |
| # Load should never be cached, since we cannot verify that the contents of | |
| # 'path' are unchanged across calls. | |
| def load( | |
| path: Union[ | |
| str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO | |
| ], | |
| *, | |
| sr: Optional[float] = 22050, | |
| mono: bool = True, | |
| offset: float = 0.0, | |
| duration: Optional[float] = None, | |
| dtype: DTypeLike = np.float32, | |
| res_type: str = "soxr_hq", | |
| ) -> Tuple[np.ndarray, float]: | |
| """Load an audio file as a floating point time series. | |
| Audio will be automatically resampled to the given rate | |
| (default ``sr=22050``). | |
| To preserve the native sampling rate of the file, use ``sr=None``. | |
| Parameters | |
| ---------- | |
| path : string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object | |
| path to the input file. | |
| Any codec supported by `soundfile` or `audioread` will work. | |
| Any string file paths, or any object implementing Python's | |
| file interface (e.g. `pathlib.Path`) are supported as `path`. | |
| If the codec is supported by `soundfile`, then `path` can also be | |
| an open file descriptor (int) or an existing `soundfile.SoundFile` object. | |
| Pre-constructed audioread decoders are also supported here, see the example | |
| below. This can be used, for example, to force a specific decoder rather | |
| than relying upon audioread to select one for you. | |
| .. warning:: audioread support is deprecated as of version 0.10.0. | |
| audioread support be removed in version 1.0. | |
| sr : number > 0 [scalar] | |
| target sampling rate | |
| 'None' uses the native sampling rate | |
| mono : bool | |
| convert signal to mono | |
| offset : float | |
| start reading after this time (in seconds) | |
| duration : float | |
| only load up to this much audio (in seconds) | |
| dtype : numeric type | |
| data type of ``y`` | |
| res_type : str | |
| resample type (see note) | |
| .. note:: | |
| By default, this uses `soxr`'s high-quality mode ('HQ'). | |
| For alternative resampling modes, see `resample` | |
| .. note:: | |
| `audioread` may truncate the precision of the audio data to 16 bits. | |
| See :ref:`ioformats` for alternate loading methods. | |
| Returns | |
| ------- | |
| y : np.ndarray [shape=(n,) or (..., n)] | |
| audio time series. Multi-channel is supported. | |
| sr : number > 0 [scalar] | |
| sampling rate of ``y`` | |
| Examples | |
| -------- | |
| >>> # Load an ogg vorbis file | |
| >>> filename = librosa.ex('trumpet') | |
| >>> y, sr = librosa.load(filename) | |
| >>> y | |
| array([-1.407e-03, -4.461e-04, ..., -3.042e-05, 1.277e-05], | |
| dtype=float32) | |
| >>> sr | |
| 22050 | |
| >>> # Load a file and resample to 11 KHz | |
| >>> filename = librosa.ex('trumpet') | |
| >>> y, sr = librosa.load(filename, sr=11025) | |
| >>> y | |
| array([-8.746e-04, -3.363e-04, ..., -1.301e-05, 0.000e+00], | |
| dtype=float32) | |
| >>> sr | |
| 11025 | |
| >>> # Load 5 seconds of a file, starting 15 seconds in | |
| >>> filename = librosa.ex('brahms') | |
| >>> y, sr = librosa.load(filename, offset=15.0, duration=5.0) | |
| >>> y | |
| array([0.146, 0.144, ..., 0.128, 0.015], dtype=float32) | |
| >>> sr | |
| 22050 | |
| >>> # Load using an already open SoundFile object | |
| >>> import soundfile | |
| >>> sfo = soundfile.SoundFile(librosa.ex('brahms')) | |
| >>> y, sr = librosa.load(sfo) | |
| >>> # Load using an already open audioread object | |
| >>> import audioread.ffdec # Use ffmpeg decoder | |
| >>> aro = audioread.ffdec.FFmpegAudioFile(librosa.ex('brahms')) | |
| >>> y, sr = librosa.load(aro) | |
| """ | |
| if isinstance(path, tuple(audioread.available_backends())): | |
| # Force the audioread loader if we have a reader object already | |
| y, sr_native = __audioread_load(path, offset, duration, dtype) | |
| else: | |
| # Otherwise try soundfile first, and then fall back if necessary | |
| try: | |
| y, sr_native = __soundfile_load(path, offset, duration, dtype) | |
| except sf.SoundFileRuntimeError as exc: | |
| # If soundfile failed, try audioread instead | |
| if isinstance(path, (str, pathlib.PurePath)): | |
| warnings.warn( | |
| "PySoundFile failed. Trying audioread instead.", stacklevel=2 | |
| ) | |
| y, sr_native = __audioread_load(path, offset, duration, dtype) | |
| else: | |
| raise exc | |
| # Final cleanup for dtype and contiguity | |
| if mono: | |
| y = to_mono(y) | |
| if sr is not None: | |
| y = resample(y, orig_sr=sr_native, target_sr=sr, res_type=res_type) | |
| else: | |
| sr = sr_native | |
| return y, sr | |
| def __soundfile_load(path, offset, duration, dtype): | |
| """Load an audio buffer using soundfile.""" | |
| if isinstance(path, sf.SoundFile): | |
| # If the user passed an existing soundfile object, | |
| # we can use it directly | |
| context = path | |
| else: | |
| # Otherwise, create the soundfile object | |
| context = sf.SoundFile(path) | |
| with context as sf_desc: | |
| sr_native = sf_desc.samplerate | |
| if offset: | |
| # Seek to the start of the target read | |
| sf_desc.seek(int(offset * sr_native)) | |
| if duration is not None: | |
| frame_duration = int(duration * sr_native) | |
| else: | |
| frame_duration = -1 | |
| # Load the target number of frames, and transpose to match librosa form | |
| y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T | |
| return y, sr_native | |
| def __audioread_load(path, offset, duration, dtype: DTypeLike): | |
| """Load an audio buffer using audioread. | |
| This loads one block at a time, and then concatenates the results. | |
| """ | |
| buf = [] | |
| if isinstance(path, tuple(audioread.available_backends())): | |
| # If we have an audioread object already, don't bother opening | |
| reader = path | |
| else: | |
| # If the input was not an audioread object, try to open it | |
| reader = audioread.audio_open(path) | |
| with reader as input_file: | |
| sr_native = input_file.samplerate | |
| n_channels = input_file.channels | |
| s_start = int(np.round(sr_native * offset)) * n_channels | |
| if duration is None: | |
| s_end = np.inf | |
| else: | |
| s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) | |
| n = 0 | |
| for frame in input_file: | |
| frame = util.buf_to_float(frame, dtype=dtype) | |
| n_prev = n | |
| n = n + len(frame) | |
| if n < s_start: | |
| # offset is after the current frame | |
| # keep reading | |
| continue | |
| if s_end < n_prev: | |
| # we're off the end. stop reading | |
| break | |
| if s_end < n: | |
| # the end is in this frame. crop. | |
| frame = frame[: int(s_end - n_prev)] # pragma: no cover | |
| if n_prev <= s_start <= n: | |
| # beginning is in this frame | |
| frame = frame[(s_start - n_prev) :] | |
| # tack on the current frame | |
| buf.append(frame) | |
| if buf: | |
| y = np.concatenate(buf) | |
| if n_channels > 1: | |
| y = y.reshape((-1, n_channels)).T | |
| else: | |
| y = np.empty(0, dtype=dtype) | |
| return y, sr_native | |
| def stream( | |
| path: Union[str, int, sf.SoundFile, BinaryIO], | |
| *, | |
| block_length: int, | |
| frame_length: int, | |
| hop_length: int, | |
| mono: bool = True, | |
| offset: float = 0.0, | |
| duration: Optional[float] = None, | |
| fill_value: Optional[float] = None, | |
| dtype: DTypeLike = np.float32, | |
| ) -> Generator[np.ndarray, None, None]: | |
| """Stream audio in fixed-length buffers. | |
| This is primarily useful for processing large files that won't | |
| fit entirely in memory at once. | |
| Instead of loading the entire audio signal into memory (as | |
| in `load`, this function produces *blocks* of audio spanning | |
| a fixed number of frames at a specified frame length and hop | |
| length. | |
| While this function strives for similar behavior to `load`, | |
| there are a few caveats that users should be aware of: | |
| 1. This function does not return audio buffers directly. | |
| It returns a generator, which you can iterate over | |
| to produce blocks of audio. A *block*, in this context, | |
| refers to a buffer of audio which spans a given number of | |
| (potentially overlapping) frames. | |
| 2. Automatic sample-rate conversion is not supported. | |
| Audio will be streamed in its native sample rate, | |
| so no default values are provided for ``frame_length`` | |
| and ``hop_length``. It is recommended that you first | |
| get the sampling rate for the file in question, using | |
| `get_samplerate`, and set these parameters accordingly. | |
| 3. Many analyses require access to the entire signal | |
| to behave correctly, such as `resample`, `cqt`, or | |
| `beat_track`, so these methods will not be appropriate | |
| for streamed data. | |
| 4. The ``block_length`` parameter specifies how many frames | |
| of audio will be produced per block. Larger values will | |
| consume more memory, but will be more efficient to process | |
| down-stream. The best value will ultimately depend on your | |
| application and other system constraints. | |
| 5. By default, most librosa analyses (e.g., short-time Fourier | |
| transform) assume centered frames, which requires padding the | |
| signal at the beginning and end. This will not work correctly | |
| when the signal is carved into blocks, because it would introduce | |
| padding in the middle of the signal. To disable this feature, | |
| use ``center=False`` in all frame-based analyses. | |
| See the examples below for proper usage of this function. | |
| Parameters | |
| ---------- | |
| path : string, int, sf.SoundFile, or file-like object | |
| path to the input file to stream. | |
| Any codec supported by `soundfile` is permitted here. | |
| An existing `soundfile.SoundFile` object may also be provided. | |
| block_length : int > 0 | |
| The number of frames to include in each block. | |
| Note that at the end of the file, there may not be enough | |
| data to fill an entire block, resulting in a shorter block | |
| by default. To pad the signal out so that blocks are always | |
| full length, set ``fill_value`` (see below). | |
| frame_length : int > 0 | |
| The number of samples per frame. | |
| hop_length : int > 0 | |
| The number of samples to advance between frames. | |
| Note that by when ``hop_length < frame_length``, neighboring frames | |
| will overlap. Similarly, the last frame of one *block* will overlap | |
| with the first frame of the next *block*. | |
| mono : bool | |
| Convert the signal to mono during streaming | |
| offset : float | |
| Start reading after this time (in seconds) | |
| duration : float | |
| Only load up to this much audio (in seconds) | |
| fill_value : float [optional] | |
| If padding the signal to produce constant-length blocks, | |
| this value will be used at the end of the signal. | |
| In most cases, ``fill_value=0`` (silence) is expected, but | |
| you may specify any value here. | |
| dtype : numeric type | |
| data type of audio buffers to be produced | |
| Yields | |
| ------ | |
| y : np.ndarray | |
| An audio buffer of (at most) | |
| ``(block_length-1) * hop_length + frame_length`` samples. | |
| See Also | |
| -------- | |
| load | |
| get_samplerate | |
| soundfile.blocks | |
| Examples | |
| -------- | |
| Apply a short-term Fourier transform to blocks of 256 frames | |
| at a time. Note that streaming operation requires left-aligned | |
| frames, so we must set ``center=False`` to avoid padding artifacts. | |
| >>> filename = librosa.ex('brahms') | |
| >>> sr = librosa.get_samplerate(filename) | |
| >>> stream = librosa.stream(filename, | |
| ... block_length=256, | |
| ... frame_length=4096, | |
| ... hop_length=1024) | |
| >>> for y_block in stream: | |
| ... D_block = librosa.stft(y_block, center=False) | |
| Or compute a mel spectrogram over a stream, using a shorter frame | |
| and non-overlapping windows | |
| >>> filename = librosa.ex('brahms') | |
| >>> sr = librosa.get_samplerate(filename) | |
| >>> stream = librosa.stream(filename, | |
| ... block_length=256, | |
| ... frame_length=2048, | |
| ... hop_length=2048) | |
| >>> for y_block in stream: | |
| ... m_block = librosa.feature.melspectrogram(y=y_block, sr=sr, | |
| ... n_fft=2048, | |
| ... hop_length=2048, | |
| ... center=False) | |
| """ | |
| if not util.is_positive_int(block_length): | |
| raise ParameterError(f"block_length={block_length} must be a positive integer") | |
| if not util.is_positive_int(frame_length): | |
| raise ParameterError(f"frame_length={frame_length} must be a positive integer") | |
| if not util.is_positive_int(hop_length): | |
| raise ParameterError(f"hop_length={hop_length} must be a positive integer") | |
| if isinstance(path, sf.SoundFile): | |
| sfo = path | |
| else: | |
| sfo = sf.SoundFile(path) | |
| # Get the sample rate from the file info | |
| sr = sfo.samplerate | |
| # Construct the stream | |
| if offset: | |
| start = int(offset * sr) | |
| else: | |
| start = 0 | |
| if duration: | |
| frames = int(duration * sr) | |
| else: | |
| frames = -1 | |
| # Seek the soundfile object to the starting frame | |
| sfo.seek(start) | |
| blocks = sfo.blocks( | |
| blocksize=frame_length + (block_length - 1) * hop_length, | |
| overlap=frame_length - hop_length, | |
| frames=frames, | |
| dtype=dtype, | |
| always_2d=False, | |
| fill_value=fill_value, | |
| ) | |
| for block in blocks: | |
| if mono: | |
| yield to_mono(block.T) | |
| else: | |
| yield block.T | |
| def to_mono(y: np.ndarray) -> np.ndarray: | |
| """Convert an audio signal to mono by averaging samples across channels. | |
| Parameters | |
| ---------- | |
| y : np.ndarray [shape=(..., n)] | |
| audio time series. Multi-channel is supported. | |
| Returns | |
| ------- | |
| y_mono : np.ndarray [shape=(n,)] | |
| ``y`` as a monophonic time-series | |
| Notes | |
| ----- | |
| This function caches at level 20. | |
| Examples | |
| -------- | |
| >>> y, sr = librosa.load(librosa.ex('trumpet', hq=True), mono=False) | |
| >>> y.shape | |
| (2, 117601) | |
| >>> y_mono = librosa.to_mono(y) | |
| >>> y_mono.shape | |
| (117601,) | |
| """ | |
| # Validate the buffer. Stereo is ok here. | |
| util.valid_audio(y, mono=False) | |
| if y.ndim > 1: | |
| y = np.mean(y, axis=tuple(range(y.ndim - 1))) | |
| return y | |
| def resample( | |
| y: np.ndarray, | |
| *, | |
| orig_sr: float, | |
| target_sr: float, | |
| res_type: str = "soxr_hq", | |
| fix: bool = True, | |
| scale: bool = False, | |
| axis: int = -1, | |
| **kwargs: Any, | |
| ) -> np.ndarray: | |
| """Resample a time series from orig_sr to target_sr | |
| By default, this uses a high-quality method (`soxr_hq`) for band-limited sinc | |
| interpolation. The alternate ``res_type`` values listed below offer different | |
| trade-offs of speed and quality. | |
| Parameters | |
| ---------- | |
| y : np.ndarray [shape=(..., n, ...)] | |
| audio time series, with `n` samples along the specified axis. | |
| orig_sr : number > 0 [scalar] | |
| original sampling rate of ``y`` | |
| target_sr : number > 0 [scalar] | |
| target sampling rate | |
| res_type : str (default: `soxr_hq`) | |
| resample type | |
| 'soxr_vhq', 'soxr_hq', 'soxr_mq' or 'soxr_lq' | |
| `soxr` Very high-, High-, Medium-, Low-quality FFT-based bandlimited interpolation. | |
| ``'soxr_hq'`` is the default setting of `soxr`. | |
| 'soxr_qq' | |
| `soxr` Quick cubic interpolation (very fast, but not bandlimited) | |
| 'kaiser_best' | |
| `resampy` high-quality mode | |
| 'kaiser_fast' | |
| `resampy` faster method | |
| 'fft' or 'scipy' | |
| `scipy.signal.resample` Fourier method. | |
| 'polyphase' | |
| `scipy.signal.resample_poly` polyphase filtering. (fast) | |
| 'linear' | |
| `samplerate` linear interpolation. (very fast, but not bandlimited) | |
| 'zero_order_hold' | |
| `samplerate` repeat the last value between samples. (very fast, but not bandlimited) | |
| 'sinc_best', 'sinc_medium' or 'sinc_fastest' | |
| `samplerate` high-, medium-, and low-quality bandlimited sinc interpolation. | |
| .. note:: | |
| Not all options yield a bandlimited interpolator. If you use `soxr_qq`, `polyphase`, | |
| `linear`, or `zero_order_hold`, you need to be aware of possible aliasing effects. | |
| .. note:: | |
| `samplerate` and `resampy` are not installed with `librosa`. | |
| To use `samplerate` or `resampy`, they should be installed manually:: | |
| $ pip install samplerate | |
| $ pip install resampy | |
| .. note:: | |
| When using ``res_type='polyphase'``, only integer sampling rates are | |
| supported. | |
| fix : bool | |
| adjust the length of the resampled signal to be of size exactly | |
| ``ceil(target_sr * len(y) / orig_sr)`` | |
| scale : bool | |
| Scale the resampled signal so that ``y`` and ``y_hat`` have approximately | |
| equal total energy. | |
| axis : int | |
| The target axis along which to resample. Defaults to the trailing axis. | |
| **kwargs : additional keyword arguments | |
| If ``fix==True``, additional keyword arguments to pass to | |
| `librosa.util.fix_length`. | |
| Returns | |
| ------- | |
| y_hat : np.ndarray [shape=(..., n * target_sr / orig_sr, ...)] | |
| ``y`` resampled from ``orig_sr`` to ``target_sr`` along the target axis | |
| Raises | |
| ------ | |
| ParameterError | |
| If ``res_type='polyphase'`` and ``orig_sr`` or ``target_sr`` are not both | |
| integer-valued. | |
| See Also | |
| -------- | |
| librosa.util.fix_length | |
| scipy.signal.resample | |
| resampy | |
| samplerate.converters.resample | |
| soxr.resample | |
| Notes | |
| ----- | |
| This function caches at level 20. | |
| Examples | |
| -------- | |
| Downsample from 22 KHz to 8 KHz | |
| >>> y, sr = librosa.load(librosa.ex('trumpet'), sr=22050) | |
| >>> y_8k = librosa.resample(y, orig_sr=sr, target_sr=8000) | |
| >>> y.shape, y_8k.shape | |
| ((117601,), (42668,)) | |
| """ | |
| # First, validate the audio buffer | |
| util.valid_audio(y, mono=False) | |
| if orig_sr == target_sr: | |
| return y | |
| ratio = float(target_sr) / orig_sr | |
| n_samples = int(np.ceil(y.shape[axis] * ratio)) | |
| if res_type in ("scipy", "fft"): | |
| y_hat = scipy.signal.resample(y, n_samples, axis=axis) | |
| elif res_type == "polyphase": | |
| if int(orig_sr) != orig_sr or int(target_sr) != target_sr: | |
| raise ParameterError( | |
| "polyphase resampling is only supported for integer-valued sampling rates." | |
| ) | |
| # For polyphase resampling, we need up- and down-sampling ratios | |
| # We can get those from the greatest common divisor of the rates | |
| # as long as the rates are integrable | |
| orig_sr = int(orig_sr) | |
| target_sr = int(target_sr) | |
| gcd = np.gcd(orig_sr, target_sr) | |
| y_hat = scipy.signal.resample_poly( | |
| y, target_sr // gcd, orig_sr // gcd, axis=axis | |
| ) | |
| elif res_type in ( | |
| "linear", | |
| "zero_order_hold", | |
| "sinc_best", | |
| "sinc_fastest", | |
| "sinc_medium", | |
| ): | |
| # Use numpy to vectorize the resampler along the target axis | |
| # This is because samplerate does not support ndim>2 generally. | |
| y_hat = np.apply_along_axis( | |
| samplerate.resample, axis=axis, arr=y, ratio=ratio, converter_type=res_type | |
| ) | |
| elif res_type.startswith("soxr"): | |
| # Use numpy to vectorize the resampler along the target axis | |
| # This is because soxr does not support ndim>2 generally. | |
| y_hat = np.apply_along_axis( | |
| soxr.resample, | |
| axis=axis, | |
| arr=y, | |
| in_rate=orig_sr, | |
| out_rate=target_sr, | |
| quality=res_type, | |
| ) | |
| else: | |
| y_hat = resampy.resample(y, orig_sr, target_sr, filter=res_type, axis=axis) | |
| if fix: | |
| y_hat = util.fix_length(y_hat, size=n_samples, axis=axis, **kwargs) | |
| if scale: | |
| y_hat /= np.sqrt(ratio) | |
| # Match dtypes | |
| return np.asarray(y_hat, dtype=y.dtype) | |
| def get_duration( | |
| *, | |
| y: Optional[np.ndarray] = None, | |
| sr: float = 22050, | |
| S: Optional[np.ndarray] = None, | |
| n_fft: int = 2048, | |
| hop_length: int = 512, | |
| center: bool = True, | |
| path: Optional[Union[str, os.PathLike[Any]]] = None, | |
| filename: Optional[Union[str, os.PathLike[Any], Deprecated]] = Deprecated(), | |
| ) -> float: | |
| """Compute the duration (in seconds) of an audio time series, | |
| feature matrix, or filename. | |
| Examples | |
| -------- | |
| >>> # Load an example audio file | |
| >>> y, sr = librosa.load(librosa.ex('trumpet')) | |
| >>> librosa.get_duration(y=y, sr=sr) | |
| 5.333378684807256 | |
| >>> # Or directly from an audio file | |
| >>> librosa.get_duration(filename=librosa.ex('trumpet')) | |
| 5.333378684807256 | |
| >>> # Or compute duration from an STFT matrix | |
| >>> y, sr = librosa.load(librosa.ex('trumpet')) | |
| >>> S = librosa.stft(y) | |
| >>> librosa.get_duration(S=S, sr=sr) | |
| 5.317369614512471 | |
| >>> # Or a non-centered STFT matrix | |
| >>> S_left = librosa.stft(y, center=False) | |
| >>> librosa.get_duration(S=S_left, sr=sr) | |
| 5.224489795918367 | |
| Parameters | |
| ---------- | |
| y : np.ndarray [shape=(..., n)] or None | |
| audio time series. Multi-channel is supported. | |
| sr : number > 0 [scalar] | |
| audio sampling rate of ``y`` | |
| S : np.ndarray [shape=(..., d, t)] or None | |
| STFT matrix, or any STFT-derived matrix (e.g., chromagram | |
| or mel spectrogram). | |
| Durations calculated from spectrogram inputs are only accurate | |
| up to the frame resolution. If high precision is required, | |
| it is better to use the audio time series directly. | |
| n_fft : int > 0 [scalar] | |
| FFT window size for ``S`` | |
| hop_length : int > 0 [ scalar] | |
| number of audio samples between columns of ``S`` | |
| center : boolean | |
| - If ``True``, ``S[:, t]`` is centered at ``y[t * hop_length]`` | |
| - If ``False``, then ``S[:, t]`` begins at ``y[t * hop_length]`` | |
| path : str, path, or file-like | |
| If provided, all other parameters are ignored, and the | |
| duration is calculated directly from the audio file. | |
| Note that this avoids loading the contents into memory, | |
| and is therefore useful for querying the duration of | |
| long files. | |
| As in ``load``, this can also be an integer or open file-handle | |
| that can be processed by ``soundfile``. | |
| filename : Deprecated | |
| Equivalent to ``path`` | |
| .. warning:: This parameter has been renamed to ``path`` in 0.10. | |
| Support for ``filename=`` will be removed in 1.0. | |
| Returns | |
| ------- | |
| d : float >= 0 | |
| Duration (in seconds) of the input time series or spectrogram. | |
| Raises | |
| ------ | |
| ParameterError | |
| if none of ``y``, ``S``, or ``path`` are provided. | |
| Notes | |
| ----- | |
| `get_duration` can be applied to a file (``path``), a spectrogram (``S``), | |
| or audio buffer (``y, sr``). Only one of these three options should be | |
| provided. If you do provide multiple options (e.g., ``path`` and ``S``), | |
| then ``path`` takes precedence over ``S``, and ``S`` takes precedence over | |
| ``(y, sr)``. | |
| """ | |
| path = rename_kw( | |
| old_name="filename", | |
| old_value=filename, | |
| new_name="path", | |
| new_value=path, | |
| version_deprecated="0.10.0", | |
| version_removed="1.0", | |
| ) | |
| if path is not None: | |
| try: | |
| return sf.info(path).duration # type: ignore | |
| except sf.SoundFileRuntimeError: | |
| warnings.warn( | |
| "PySoundFile failed. Trying audioread instead." | |
| "\n\tAudioread support is deprecated in librosa 0.10.0" | |
| " and will be removed in version 1.0.", | |
| stacklevel=2, | |
| category=FutureWarning, | |
| ) | |
| with audioread.audio_open(path) as fdesc: | |
| return fdesc.duration # type: ignore | |
| if y is None: | |
| if S is None: | |
| raise ParameterError("At least one of (y, sr), S, or path must be provided") | |
| n_frames = S.shape[-1] | |
| n_samples = n_fft + hop_length * (n_frames - 1) | |
| # If centered, we lose half a window from each end of S | |
| if center: | |
| n_samples = n_samples - 2 * int(n_fft // 2) | |
| else: | |
| n_samples = y.shape[-1] | |
| return float(n_samples) / sr | |
| def get_samplerate(path: Union[str, int, sf.SoundFile, BinaryIO]) -> float: | |
| """Get the sampling rate for a given file. | |
| Parameters | |
| ---------- | |
| path : string, int, soundfile.SoundFile, or file-like | |
| The path to the file to be loaded | |
| As in ``load``, this can also be an integer or open file-handle | |
| that can be processed by `soundfile`. | |
| An existing `soundfile.SoundFile` object can also be supplied. | |
| Returns | |
| ------- | |
| sr : number > 0 | |
| The sampling rate of the given audio file | |
| Examples | |
| -------- | |
| Get the sampling rate for the included audio file | |
| >>> path = librosa.ex('trumpet') | |
| >>> librosa.get_samplerate(path) | |
| 22050 | |
| """ | |
| try: | |
| if isinstance(path, sf.SoundFile): | |
| return path.samplerate # type: ignore | |
| return sf.info(path).samplerate # type: ignore | |
| except sf.SoundFileRuntimeError: | |
| warnings.warn( | |
| "PySoundFile failed. Trying audioread instead." | |
| "\n\tAudioread support is deprecated in librosa 0.10.0" | |
| " and will be removed in version 1.0.", | |
| stacklevel=2, | |
| category=FutureWarning, | |
| ) | |
| with audioread.audio_open(path) as fdesc: | |
| return fdesc.samplerate # type: ignore | |
| def autocorrelate( | |
| y: np.ndarray, *, max_size: Optional[int] = None, axis: int = -1 | |
| ) -> np.ndarray: | |
| """Bounded-lag auto-correlation | |
| Parameters | |
| ---------- | |
| y : np.ndarray | |
| array to autocorrelate | |
| max_size : int > 0 or None | |
| maximum correlation lag. | |
| If unspecified, defaults to ``y.shape[axis]`` (unbounded) | |
| axis : int | |
| The axis along which to autocorrelate. | |
| By default, the last axis (-1) is taken. | |
| Returns | |
| ------- | |
| z : np.ndarray | |
| truncated autocorrelation ``y*y`` along the specified axis. | |
| If ``max_size`` is specified, then ``z.shape[axis]`` is bounded | |
| to ``max_size``. | |
| Notes | |
| ----- | |
| This function caches at level 20. | |
| Examples | |
| -------- | |
| Compute full autocorrelation of ``y`` | |
| >>> y, sr = librosa.load(librosa.ex('trumpet')) | |
| >>> librosa.autocorrelate(y) | |
| array([ 6.899e+02, 6.236e+02, ..., 3.710e-08, -1.796e-08]) | |
| Compute onset strength auto-correlation up to 4 seconds | |
| >>> import matplotlib.pyplot as plt | |
| >>> odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512) | |
| >>> ac = librosa.autocorrelate(odf, max_size=4 * sr // 512) | |
| >>> fig, ax = plt.subplots() | |
| >>> ax.plot(ac) | |
| >>> ax.set(title='Auto-correlation', xlabel='Lag (frames)') | |
| """ | |
| if max_size is None: | |
| max_size = y.shape[axis] | |
| max_size = int(min(max_size, y.shape[axis])) | |
| fft = get_fftlib() | |
| # Pad out the signal to support full-length auto-correlation. | |
| n_pad = 2 * y.shape[axis] - 1 | |
| if np.iscomplexobj(y): | |
| # Compute the power spectrum along the chosen axis | |
| powspec = util.abs2(fft.fft(y, n=n_pad, axis=axis)) | |
| # Convert back to time domain | |
| autocorr = fft.ifft(powspec, n=n_pad, axis=axis) | |
| else: | |
| # Compute the power spectrum along the chosen axis | |
| # Pad out the signal to support full-length auto-correlation. | |
| powspec = util.abs2(fft.rfft(y, n=n_pad, axis=axis)) | |
| # Convert back to time domain | |
| autocorr = fft.irfft(powspec, n=n_pad, axis=axis) | |
| # Slice down to max_size | |
| subslice = [slice(None)] * autocorr.ndim | |
| subslice[axis] = slice(max_size) | |
| autocorr_slice: np.ndarray = autocorr[tuple(subslice)] | |
| return autocorr_slice | |
| def lpc(y: np.ndarray, *, order: int, axis: int = -1) -> np.ndarray: | |
| """Linear Prediction Coefficients via Burg's method | |
| This function applies Burg's method to estimate coefficients of a linear | |
| filter on ``y`` of order ``order``. Burg's method is an extension to the | |
| Yule-Walker approach, which are both sometimes referred to as LPC parameter | |
| estimation by autocorrelation. | |
| It follows the description and implementation approach described in the | |
| introduction by Marple. [#]_ N.B. This paper describes a different method, which | |
| is not implemented here, but has been chosen for its clear explanation of | |
| Burg's technique in its introduction. | |
| .. [#] Larry Marple. | |
| A New Autoregressive Spectrum Analysis Algorithm. | |
| IEEE Transactions on Acoustics, Speech, and Signal Processing | |
| vol 28, no. 4, 1980. | |
| Parameters | |
| ---------- | |
| y : np.ndarray [shape=(..., n)] | |
| Time series to fit. Multi-channel is supported.. | |
| order : int > 0 | |
| Order of the linear filter | |
| axis : int | |
| Axis along which to compute the coefficients | |
| Returns | |
| ------- | |
| a : np.ndarray [shape=(..., order + 1)] | |
| LP prediction error coefficients, i.e. filter denominator polynomial. | |
| Note that the length along the specified ``axis`` will be ``order+1``. | |
| Raises | |
| ------ | |
| ParameterError | |
| - If ``y`` is not valid audio as per `librosa.util.valid_audio` | |
| - If ``order < 1`` or not integer | |
| FloatingPointError | |
| - If ``y`` is ill-conditioned | |
| See Also | |
| -------- | |
| scipy.signal.lfilter | |
| Examples | |
| -------- | |
| Compute LP coefficients of y at order 16 on entire series | |
| >>> y, sr = librosa.load(librosa.ex('libri1')) | |
| >>> librosa.lpc(y, order=16) | |
| Compute LP coefficients, and plot LP estimate of original series | |
| >>> import matplotlib.pyplot as plt | |
| >>> import scipy | |
| >>> y, sr = librosa.load(librosa.ex('libri1'), duration=0.020) | |
| >>> a = librosa.lpc(y, order=2) | |
| >>> b = np.hstack([[0], -1 * a[1:]]) | |
| >>> y_hat = scipy.signal.lfilter(b, [1], y) | |
| >>> fig, ax = plt.subplots() | |
| >>> ax.plot(y) | |
| >>> ax.plot(y_hat, linestyle='--') | |
| >>> ax.legend(['y', 'y_hat']) | |
| >>> ax.set_title('LP Model Forward Prediction') | |
| """ | |
| if not util.is_positive_int(order): | |
| raise ParameterError(f"order={order} must be an integer > 0") | |
| util.valid_audio(y, mono=False) | |
| # Move the lpc axis around front, because numba is silly | |
| y = y.swapaxes(axis, 0) | |
| dtype = y.dtype | |
| shape = list(y.shape) | |
| shape[0] = order + 1 | |
| ar_coeffs = np.zeros(tuple(shape), dtype=dtype) | |
| ar_coeffs[0] = 1 | |
| ar_coeffs_prev = ar_coeffs.copy() | |
| shape[0] = 1 | |
| reflect_coeff = np.zeros(shape, dtype=dtype) | |
| den = reflect_coeff.copy() | |
| epsilon = util.tiny(den) | |
| # Call the helper, and swap the results back to the target axis position | |
| return np.swapaxes( | |
| __lpc(y, order, ar_coeffs, ar_coeffs_prev, reflect_coeff, den, epsilon), 0, axis | |
| ) | |
| # type: ignore | |
| def __lpc( | |
| y: np.ndarray, | |
| order: int, | |
| ar_coeffs: np.ndarray, | |
| ar_coeffs_prev: np.ndarray, | |
| reflect_coeff: np.ndarray, | |
| den: np.ndarray, | |
| epsilon: float, | |
| ) -> np.ndarray: | |
| # This implementation follows the description of Burg's algorithm given in | |
| # section III of Marple's paper referenced in the docstring. | |
| # | |
| # We use the Levinson-Durbin recursion to compute AR coefficients for each | |
| # increasing model order by using those from the last. We maintain two | |
| # arrays and then flip them each time we increase the model order so that | |
| # we may use all the coefficients from the previous order while we compute | |
| # those for the new one. These two arrays hold ar_coeffs for order M and | |
| # order M-1. (Corresponding to a_{M,k} and a_{M-1,k} in eqn 5) | |
| # These two arrays hold the forward and backward prediction error. They | |
| # correspond to f_{M-1,k} and b_{M-1,k} in eqns 10, 11, 13 and 14 of | |
| # Marple. First they are used to compute the reflection coefficient at | |
| # order M from M-1 then are re-used as f_{M,k} and b_{M,k} for each | |
| # iteration of the below loop | |
| fwd_pred_error = y[1:] | |
| bwd_pred_error = y[:-1] | |
| # DEN_{M} from eqn 16 of Marple. | |
| den[0] = np.sum(fwd_pred_error**2 + bwd_pred_error**2, axis=0) | |
| for i in range(order): | |
| # can be removed if we keep the epsilon bias | |
| # if np.any(den <= 0): | |
| # raise FloatingPointError("numerical error, input ill-conditioned?") | |
| # Eqn 15 of Marple, with fwd_pred_error and bwd_pred_error | |
| # corresponding to f_{M-1,k+1} and b{M-1,k} and the result as a_{M,M} | |
| reflect_coeff[0] = np.sum(bwd_pred_error * fwd_pred_error, axis=0) | |
| reflect_coeff[0] *= -2 | |
| reflect_coeff[0] /= den[0] + epsilon | |
| # Now we use the reflection coefficient and the AR coefficients from | |
| # the last model order to compute all of the AR coefficients for the | |
| # current one. This is the Levinson-Durbin recursion described in | |
| # eqn 5. | |
| # Note 1: We don't have to care about complex conjugates as our signals | |
| # are all real-valued | |
| # Note 2: j counts 1..order+1, i-j+1 counts order..0 | |
| # Note 3: The first element of ar_coeffs* is always 1, which copies in | |
| # the reflection coefficient at the end of the new AR coefficient array | |
| # after the preceding coefficients | |
| ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev | |
| for j in range(1, i + 2): | |
| # reflection multiply should be broadcast | |
| ar_coeffs[j] = ( | |
| ar_coeffs_prev[j] + reflect_coeff[0] * ar_coeffs_prev[i - j + 1] | |
| ) | |
| # Update the forward and backward prediction errors corresponding to | |
| # eqns 13 and 14. We start with f_{M-1,k+1} and b_{M-1,k} and use them | |
| # to compute f_{M,k} and b_{M,k} | |
| fwd_pred_error_tmp = fwd_pred_error | |
| fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error | |
| bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp | |
| # SNIP - we are now done with order M and advance. M-1 <- M | |
| # Compute DEN_{M} using the recursion from eqn 17. | |
| # | |
| # reflect_coeff = a_{M-1,M-1} (we have advanced M) | |
| # den = DEN_{M-1} (rhs) | |
| # bwd_pred_error = b_{M-1,N-M+1} (we have advanced M) | |
| # fwd_pred_error = f_{M-1,k} (we have advanced M) | |
| # den <- DEN_{M} (lhs) | |
| # | |
| q = 1.0 - reflect_coeff[0] ** 2 | |
| den[0] = q * den[0] - bwd_pred_error[-1] ** 2 - fwd_pred_error[0] ** 2 | |
| # Shift up forward error. | |
| # | |
| # fwd_pred_error <- f_{M-1,k+1} | |
| # bwd_pred_error <- b_{M-1,k} | |
| # | |
| # N.B. We do this after computing the denominator using eqn 17 but | |
| # before using it in the numerator in eqn 15. | |
| fwd_pred_error = fwd_pred_error[1:] | |
| bwd_pred_error = bwd_pred_error[:-1] | |
| return ar_coeffs | |
| # type: ignore | |
| def _zc_stencil(x: np.ndarray, threshold: float, zero_pos: bool) -> np.ndarray: | |
| """Stencil to compute zero crossings""" | |
| x0 = x[0] | |
| if -threshold <= x0 <= threshold: | |
| x0 = 0 | |
| x1 = x[-1] | |
| if -threshold <= x1 <= threshold: | |
| x1 = 0 | |
| if zero_pos: | |
| return np.signbit(x0) != np.signbit(x1) # type: ignore | |
| else: | |
| return np.sign(x0) != np.sign(x1) # type: ignore | |
| # type: ignore | |
| def _zc_wrapper( | |
| x: np.ndarray, | |
| threshold: float, | |
| zero_pos: bool, | |
| y: np.ndarray, | |
| ) -> None: # pragma: no cover | |
| """Vectorized wrapper for zero crossing stencil""" | |
| y[:] = _zc_stencil(x, threshold, zero_pos) | |
| def zero_crossings( | |
| y: np.ndarray, | |
| *, | |
| threshold: float = 1e-10, | |
| ref_magnitude: Optional[Union[float, Callable]] = None, | |
| pad: bool = True, | |
| zero_pos: bool = True, | |
| axis: int = -1, | |
| ) -> np.ndarray: | |
| """Find the zero-crossings of a signal ``y``: indices ``i`` such that | |
| ``sign(y[i]) != sign(y[j])``. | |
| If ``y`` is multi-dimensional, then zero-crossings are computed along | |
| the specified ``axis``. | |
| Parameters | |
| ---------- | |
| y : np.ndarray | |
| The input array | |
| threshold : float >= 0 | |
| If non-zero, values where ``-threshold <= y <= threshold`` are | |
| clipped to 0. | |
| ref_magnitude : float > 0 or callable | |
| If numeric, the threshold is scaled relative to ``ref_magnitude``. | |
| If callable, the threshold is scaled relative to | |
| ``ref_magnitude(np.abs(y))``. | |
| pad : boolean | |
| If ``True``, then ``y[0]`` is considered a valid zero-crossing. | |
| zero_pos : boolean | |
| If ``True`` then the value 0 is interpreted as having positive sign. | |
| If ``False``, then 0, -1, and +1 all have distinct signs. | |
| axis : int | |
| Axis along which to compute zero-crossings. | |
| Returns | |
| ------- | |
| zero_crossings : np.ndarray [shape=y.shape, dtype=boolean] | |
| Indicator array of zero-crossings in ``y`` along the selected axis. | |
| Notes | |
| ----- | |
| This function caches at level 20. | |
| Examples | |
| -------- | |
| >>> # Generate a time-series | |
| >>> y = np.sin(np.linspace(0, 4 * 2 * np.pi, 20)) | |
| >>> y | |
| array([ 0.000e+00, 9.694e-01, 4.759e-01, -7.357e-01, | |
| -8.372e-01, 3.247e-01, 9.966e-01, 1.646e-01, | |
| -9.158e-01, -6.142e-01, 6.142e-01, 9.158e-01, | |
| -1.646e-01, -9.966e-01, -3.247e-01, 8.372e-01, | |
| 7.357e-01, -4.759e-01, -9.694e-01, -9.797e-16]) | |
| >>> # Compute zero-crossings | |
| >>> z = librosa.zero_crossings(y) | |
| >>> z | |
| array([ True, False, False, True, False, True, False, False, | |
| True, False, True, False, True, False, False, True, | |
| False, True, False, True], dtype=bool) | |
| >>> # Stack y against the zero-crossing indicator | |
| >>> librosa.util.stack([y, z], axis=-1) | |
| array([[ 0.000e+00, 1.000e+00], | |
| [ 9.694e-01, 0.000e+00], | |
| [ 4.759e-01, 0.000e+00], | |
| [ -7.357e-01, 1.000e+00], | |
| [ -8.372e-01, 0.000e+00], | |
| [ 3.247e-01, 1.000e+00], | |
| [ 9.966e-01, 0.000e+00], | |
| [ 1.646e-01, 0.000e+00], | |
| [ -9.158e-01, 1.000e+00], | |
| [ -6.142e-01, 0.000e+00], | |
| [ 6.142e-01, 1.000e+00], | |
| [ 9.158e-01, 0.000e+00], | |
| [ -1.646e-01, 1.000e+00], | |
| [ -9.966e-01, 0.000e+00], | |
| [ -3.247e-01, 0.000e+00], | |
| [ 8.372e-01, 1.000e+00], | |
| [ 7.357e-01, 0.000e+00], | |
| [ -4.759e-01, 1.000e+00], | |
| [ -9.694e-01, 0.000e+00], | |
| [ -9.797e-16, 1.000e+00]]) | |
| >>> # Find the indices of zero-crossings | |
| >>> np.nonzero(z) | |
| (array([ 0, 3, 5, 8, 10, 12, 15, 17, 19]),) | |
| """ | |
| if callable(ref_magnitude): | |
| threshold = threshold * ref_magnitude(np.abs(y)) | |
| elif ref_magnitude is not None: | |
| threshold = threshold * ref_magnitude | |
| yi = y.swapaxes(-1, axis) | |
| z = np.empty_like(y, dtype=bool) | |
| zi = z.swapaxes(-1, axis) | |
| _zc_wrapper(yi, threshold, zero_pos, zi) | |
| zi[..., 0] = pad | |
| return z | |
| def clicks( | |
| *, | |
| times: Optional[_SequenceLike[_FloatLike_co]] = None, | |
| frames: Optional[_SequenceLike[_IntLike_co]] = None, | |
| sr: float = 22050, | |
| hop_length: int = 512, | |
| click_freq: float = 1000.0, | |
| click_duration: float = 0.1, | |
| click: Optional[np.ndarray] = None, | |
| length: Optional[int] = None, | |
| ) -> np.ndarray: | |
| """Construct a "click track". | |
| This returns a signal with the signal ``click`` sound placed at | |
| each specified time. | |
| Parameters | |
| ---------- | |
| times : np.ndarray or None | |
| times to place clicks, in seconds | |
| frames : np.ndarray or None | |
| frame indices to place clicks | |
| sr : number > 0 | |
| desired sampling rate of the output signal | |
| hop_length : int > 0 | |
| if positions are specified by ``frames``, the number of samples between frames. | |
| click_freq : float > 0 | |
| frequency (in Hz) of the default click signal. Default is 1KHz. | |
| click_duration : float > 0 | |
| duration (in seconds) of the default click signal. Default is 100ms. | |
| click : np.ndarray or None | |
| (optional) click signal sample to use instead of the default click. | |
| Multi-channel is supported. | |
| length : int > 0 | |
| desired number of samples in the output signal | |
| Returns | |
| ------- | |
| click_signal : np.ndarray | |
| Synthesized click signal. | |
| This will be monophonic by default, or match the number of channels to a provided ``click`` signal. | |
| Raises | |
| ------ | |
| ParameterError | |
| - If neither ``times`` nor ``frames`` are provided. | |
| - If any of ``click_freq``, ``click_duration``, or ``length`` are out of range. | |
| Examples | |
| -------- | |
| >>> # Sonify detected beat events | |
| >>> y, sr = librosa.load(librosa.ex('choice'), duration=10) | |
| >>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr) | |
| >>> y_beats = librosa.clicks(frames=beats, sr=sr) | |
| >>> # Or generate a signal of the same length as y | |
| >>> y_beats = librosa.clicks(frames=beats, sr=sr, length=len(y)) | |
| >>> # Or use timing instead of frame indices | |
| >>> times = librosa.frames_to_time(beats, sr=sr) | |
| >>> y_beat_times = librosa.clicks(times=times, sr=sr) | |
| >>> # Or with a click frequency of 880Hz and a 500ms sample | |
| >>> y_beat_times880 = librosa.clicks(times=times, sr=sr, | |
| ... click_freq=880, click_duration=0.5) | |
| Display click waveform next to the spectrogram | |
| >>> import matplotlib.pyplot as plt | |
| >>> fig, ax = plt.subplots(nrows=2, sharex=True) | |
| >>> S = librosa.feature.melspectrogram(y=y, sr=sr) | |
| >>> librosa.display.specshow(librosa.power_to_db(S, ref=np.max), | |
| ... x_axis='time', y_axis='mel', ax=ax[0]) | |
| >>> librosa.display.waveshow(y_beat_times, sr=sr, label='Beat clicks', | |
| ... ax=ax[1]) | |
| >>> ax[1].legend() | |
| >>> ax[0].label_outer() | |
| >>> ax[0].set_title(None) | |
| """ | |
| # Compute sample positions from time or frames | |
| positions: np.ndarray | |
| if times is None: | |
| if frames is None: | |
| raise ParameterError('either "times" or "frames" must be provided') | |
| positions = frames_to_samples(frames, hop_length=hop_length) | |
| else: | |
| # Convert times to positions | |
| positions = time_to_samples(times, sr=sr) | |
| if click is not None: | |
| # Check that we have a well-formed audio buffer | |
| util.valid_audio(click, mono=False) | |
| else: | |
| # Create default click signal | |
| if click_duration <= 0: | |
| raise ParameterError("click_duration must be strictly positive") | |
| if click_freq <= 0: | |
| raise ParameterError("click_freq must be strictly positive") | |
| angular_freq = 2 * np.pi * click_freq / float(sr) | |
| click = np.logspace(0, -10, num=int(np.round(sr * click_duration)), base=2.0) | |
| click *= np.sin(angular_freq * np.arange(len(click))) | |
| # Set default length | |
| if length is None: | |
| length = positions.max() + click.shape[-1] | |
| else: | |
| if length < 1: | |
| raise ParameterError("length must be a positive integer") | |
| # Filter out any positions past the length boundary | |
| positions = positions[positions < length] | |
| # Pre-allocate click signal | |
| shape = list(click.shape) | |
| shape[-1] = length | |
| click_signal = np.zeros(shape, dtype=np.float32) | |
| # Place clicks | |
| for start in positions: | |
| # Compute the end-point of this click | |
| end = start + click.shape[-1] | |
| if end >= length: | |
| click_signal[..., start:] += click[..., : length - start] | |
| else: | |
| # Normally, just add a click here | |
| click_signal[..., start:end] += click | |
| return click_signal | |
| def tone( | |
| frequency: _FloatLike_co, | |
| *, | |
| sr: float = 22050, | |
| length: Optional[int] = None, | |
| duration: Optional[float] = None, | |
| phi: Optional[float] = None, | |
| ) -> np.ndarray: | |
| """Construct a pure tone (cosine) signal at a given frequency. | |
| Parameters | |
| ---------- | |
| frequency : float > 0 | |
| frequency | |
| sr : number > 0 | |
| desired sampling rate of the output signal | |
| length : int > 0 | |
| desired number of samples in the output signal. | |
| When both ``duration`` and ``length`` are defined, | |
| ``length`` takes priority. | |
| duration : float > 0 | |
| desired duration in seconds. | |
| When both ``duration`` and ``length`` are defined, | |
| ``length`` takes priority. | |
| phi : float or None | |
| phase offset, in radians. If unspecified, defaults to ``-np.pi * 0.5``. | |
| Returns | |
| ------- | |
| tone_signal : np.ndarray [shape=(length,), dtype=float64] | |
| Synthesized pure sine tone signal | |
| Raises | |
| ------ | |
| ParameterError | |
| - If ``frequency`` is not provided. | |
| - If neither ``length`` nor ``duration`` are provided. | |
| Examples | |
| -------- | |
| Generate a pure sine tone A4 | |
| >>> tone = librosa.tone(440, duration=1) | |
| Or generate the same signal using `length` | |
| >>> tone = librosa.tone(440, sr=22050, length=22050) | |
| Display spectrogram | |
| >>> import matplotlib.pyplot as plt | |
| >>> fig, ax = plt.subplots() | |
| >>> S = librosa.feature.melspectrogram(y=tone) | |
| >>> librosa.display.specshow(librosa.power_to_db(S, ref=np.max), | |
| ... x_axis='time', y_axis='mel', ax=ax) | |
| """ | |
| if frequency is None: | |
| raise ParameterError('"frequency" must be provided') | |
| # Compute signal length | |
| if length is None: | |
| if duration is None: | |
| raise ParameterError('either "length" or "duration" must be provided') | |
| length = int(np.ceil(duration * sr)) | |
| if phi is None: | |
| phi = -np.pi * 0.5 | |
| y: np.ndarray = np.cos(2 * np.pi * frequency * np.arange(length) / sr + phi) | |
| return y | |
| def chirp( | |
| *, | |
| fmin: _FloatLike_co, | |
| fmax: _FloatLike_co, | |
| sr: float = 22050, | |
| length: Optional[int] = None, | |
| duration: Optional[float] = None, | |
| linear: bool = False, | |
| phi: Optional[float] = None, | |
| ) -> np.ndarray: | |
| """Construct a "chirp" or "sine-sweep" signal. | |
| The chirp sweeps from frequency ``fmin`` to ``fmax`` (in Hz). | |
| Parameters | |
| ---------- | |
| fmin : float > 0 | |
| initial frequency | |
| fmax : float > 0 | |
| final frequency | |
| sr : number > 0 | |
| desired sampling rate of the output signal | |
| length : int > 0 | |
| desired number of samples in the output signal. | |
| When both ``duration`` and ``length`` are defined, | |
| ``length`` takes priority. | |
| duration : float > 0 | |
| desired duration in seconds. | |
| When both ``duration`` and ``length`` are defined, | |
| ``length`` takes priority. | |
| linear : boolean | |
| - If ``True``, use a linear sweep, i.e., frequency changes linearly with time | |
| - If ``False``, use a exponential sweep. | |
| Default is ``False``. | |
| phi : float or None | |
| phase offset, in radians. | |
| If unspecified, defaults to ``-np.pi * 0.5``. | |
| Returns | |
| ------- | |
| chirp_signal : np.ndarray [shape=(length,), dtype=float64] | |
| Synthesized chirp signal | |
| Raises | |
| ------ | |
| ParameterError | |
| - If either ``fmin`` or ``fmax`` are not provided. | |
| - If neither ``length`` nor ``duration`` are provided. | |
| See Also | |
| -------- | |
| scipy.signal.chirp | |
| Examples | |
| -------- | |
| Generate a exponential chirp from A2 to A8 | |
| >>> exponential_chirp = librosa.chirp(fmin=110, fmax=110*64, duration=1) | |
| Or generate the same signal using ``length`` | |
| >>> exponential_chirp = librosa.chirp(fmin=110, fmax=110*64, sr=22050, length=22050) | |
| Or generate a linear chirp instead | |
| >>> linear_chirp = librosa.chirp(fmin=110, fmax=110*64, duration=1, linear=True) | |
| Display spectrogram for both exponential and linear chirps. | |
| >>> import matplotlib.pyplot as plt | |
| >>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True) | |
| >>> S_exponential = np.abs(librosa.stft(y=exponential_chirp)) | |
| >>> librosa.display.specshow(librosa.amplitude_to_db(S_exponential, ref=np.max), | |
| ... x_axis='time', y_axis='linear', ax=ax[0]) | |
| >>> ax[0].set(title='Exponential chirp', xlabel=None) | |
| >>> ax[0].label_outer() | |
| >>> S_linear = np.abs(librosa.stft(y=linear_chirp)) | |
| >>> librosa.display.specshow(librosa.amplitude_to_db(S_linear, ref=np.max), | |
| ... x_axis='time', y_axis='linear', ax=ax[1]) | |
| >>> ax[1].set(title='Linear chirp') | |
| """ | |
| if fmin is None or fmax is None: | |
| raise ParameterError('both "fmin" and "fmax" must be provided') | |
| # Compute signal duration | |
| period = 1.0 / sr | |
| if length is None: | |
| if duration is None: | |
| raise ParameterError('either "length" or "duration" must be provided') | |
| else: | |
| duration = period * length | |
| if phi is None: | |
| phi = -np.pi * 0.5 | |
| method = "linear" if linear else "logarithmic" | |
| y: np.ndarray = scipy.signal.chirp( | |
| np.arange(int(np.ceil(duration * sr))) / sr, | |
| fmin, | |
| duration, | |
| fmax, | |
| method=method, | |
| phi=phi / np.pi * 180, # scipy.signal.chirp uses degrees for phase offset | |
| ) | |
| return y | |
| def mu_compress( | |
| x: Union[np.ndarray, _FloatLike_co], *, mu: float = 255, quantize: bool = True | |
| ) -> np.ndarray: | |
| """mu-law compression | |
| Given an input signal ``-1 <= x <= 1``, the mu-law compression | |
| is calculated by:: | |
| sign(x) * ln(1 + mu * abs(x)) / ln(1 + mu) | |
| Parameters | |
| ---------- | |
| x : np.ndarray with values in [-1, +1] | |
| The input signal to compress | |
| mu : positive number | |
| The compression parameter. Values of the form ``2**n - 1`` | |
| (e.g., 15, 31, 63, etc.) are most common. | |
| quantize : bool | |
| If ``True``, quantize the compressed values into ``1 + mu`` | |
| distinct integer values. | |
| If ``False``, mu-law compression is applied without quantization. | |
| Returns | |
| ------- | |
| x_compressed : np.ndarray | |
| The compressed signal. | |
| Raises | |
| ------ | |
| ParameterError | |
| If ``x`` has values outside the range [-1, +1] | |
| If ``mu <= 0`` | |
| See Also | |
| -------- | |
| mu_expand | |
| Examples | |
| -------- | |
| Compression without quantization | |
| >>> x = np.linspace(-1, 1, num=16) | |
| >>> x | |
| array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667, | |
| -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 , | |
| 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667, | |
| 1. ]) | |
| >>> y = librosa.mu_compress(x, quantize=False) | |
| >>> y | |
| array([-1. , -0.97430198, -0.94432361, -0.90834832, -0.86336132, | |
| -0.80328309, -0.71255496, -0.52124063, 0.52124063, 0.71255496, | |
| 0.80328309, 0.86336132, 0.90834832, 0.94432361, 0.97430198, | |
| 1. ]) | |
| Compression with quantization | |
| >>> y = librosa.mu_compress(x, quantize=True) | |
| >>> y | |
| array([-128, -124, -120, -116, -110, -102, -91, -66, 66, 91, 102, | |
| 110, 116, 120, 124, 127]) | |
| Compression with quantization and a smaller range | |
| >>> y = librosa.mu_compress(x, mu=15, quantize=True) | |
| >>> y | |
| array([-8, -7, -7, -6, -6, -5, -4, -2, 2, 4, 5, 6, 6, 7, 7, 7]) | |
| """ | |
| if mu <= 0: | |
| raise ParameterError( | |
| f"mu-law compression parameter mu={mu} must be strictly positive." | |
| ) | |
| if np.any(x < -1) or np.any(x > 1): | |
| raise ParameterError(f"mu-law input x={x} must be in the range [-1, +1].") | |
| x_comp: np.ndarray = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) | |
| if quantize: | |
| y: np.ndarray = ( | |
| np.digitize( | |
| x_comp, np.linspace(-1, 1, num=int(1 + mu), endpoint=True), right=True | |
| ) | |
| - int(mu + 1) // 2 | |
| ) | |
| return y | |
| return x_comp | |
| def mu_expand( | |
| x: Union[np.ndarray, _FloatLike_co], *, mu: float = 255.0, quantize: bool = True | |
| ) -> np.ndarray: | |
| """mu-law expansion | |
| This function is the inverse of ``mu_compress``. Given a mu-law compressed | |
| signal ``-1 <= x <= 1``, the mu-law expansion is calculated by:: | |
| sign(x) * (1 / mu) * ((1 + mu)**abs(x) - 1) | |
| Parameters | |
| ---------- | |
| x : np.ndarray | |
| The compressed signal. | |
| If ``quantize=True``, values must be in the range [-1, +1]. | |
| mu : positive number | |
| The compression parameter. Values of the form ``2**n - 1`` | |
| (e.g., 15, 31, 63, etc.) are most common. | |
| quantize : boolean | |
| If ``True``, the input is assumed to be quantized to | |
| ``1 + mu`` distinct integer values. | |
| Returns | |
| ------- | |
| x_expanded : np.ndarray with values in the range [-1, +1] | |
| The mu-law expanded signal. | |
| Raises | |
| ------ | |
| ParameterError | |
| If ``x`` has values outside the range [-1, +1] and ``quantize=False`` | |
| If ``mu <= 0`` | |
| See Also | |
| -------- | |
| mu_compress | |
| Examples | |
| -------- | |
| Compress and expand without quantization | |
| >>> x = np.linspace(-1, 1, num=16) | |
| >>> x | |
| array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667, | |
| -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 , | |
| 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667, | |
| 1. ]) | |
| >>> y = librosa.mu_compress(x, quantize=False) | |
| >>> y | |
| array([-1. , -0.97430198, -0.94432361, -0.90834832, -0.86336132, | |
| -0.80328309, -0.71255496, -0.52124063, 0.52124063, 0.71255496, | |
| 0.80328309, 0.86336132, 0.90834832, 0.94432361, 0.97430198, | |
| 1. ]) | |
| >>> z = librosa.mu_expand(y, quantize=False) | |
| >>> z | |
| array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667, | |
| -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 , | |
| 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667, | |
| 1. ]) | |
| Compress and expand with quantization. Note that this necessarily | |
| incurs quantization error, particularly for values near +-1. | |
| >>> y = librosa.mu_compress(x, quantize=True) | |
| >>> y | |
| array([-128, -124, -120, -116, -110, -102, -91, -66, 66, 91, 102, | |
| 110, 116, 120, 124, 127]) | |
| >>> z = librosa.mu_expand(y, quantize=True) | |
| array([-1. , -0.84027248, -0.70595818, -0.59301377, -0.4563785 , | |
| -0.32155973, -0.19817918, -0.06450245, 0.06450245, 0.19817918, | |
| 0.32155973, 0.4563785 , 0.59301377, 0.70595818, 0.84027248, | |
| 0.95743702]) | |
| """ | |
| if mu <= 0: | |
| raise ParameterError( | |
| f"Inverse mu-law compression parameter mu={mu} must be strictly positive." | |
| ) | |
| if quantize: | |
| x = x * 2.0 / (1 + mu) | |
| if np.any(x < -1) or np.any(x > 1): | |
| raise ParameterError( | |
| f"Inverse mu-law input x={x} must be in the range [-1, +1]." | |
| ) | |
| return np.sign(x) / mu * (np.power(1 + mu, np.abs(x)) - 1) | |