Spaces:

MusoraProductDepartment
/

jam-tracks

Sleeping

File size: 2,833 Bytes

a0fcd39

"""Audio utility functions for format conversion and processing."""

import io
import numpy as np
import soundfile as sf


def to_mono(audio: np.ndarray) -> np.ndarray:
    """
    Convert stereo audio to mono by averaging channels.

    Args:
        audio: Audio array, shape (samples,) for mono or (samples, channels) for stereo

    Returns:
        Mono audio array, shape (samples,)
    """
    if audio.ndim == 2:
        return np.mean(audio, axis=1)
    return audio


def to_float32(audio: np.ndarray) -> np.ndarray:
    """
    Ensure audio is float32 in [-1, 1] range.

    Args:
        audio: Audio array in any numeric format

    Returns:
        Audio array as float32 normalized to [-1, 1]
    """
    audio = audio.astype(np.float32)

    # Check if already normalized
    max_val = np.max(np.abs(audio))
    if max_val > 1.0:
        audio = audio / max_val

    return audio


def normalize(audio: np.ndarray, peak: float = 0.95) -> np.ndarray:
    """
    Normalize audio so peak amplitude equals given value.

    Args:
        audio: Audio array
        peak: Target peak amplitude (default 0.95 to avoid clipping)

    Returns:
        Normalized audio array
    """
    max_val = np.max(np.abs(audio))
    if max_val > 0:
        audio = audio / max_val * peak
    return audio


def pad_or_trim(audio: np.ndarray, target_length: int) -> np.ndarray:
    """
    Pad with zeros or trim audio to target length.

    Args:
        audio: Audio array
        target_length: Desired length in samples

    Returns:
        Audio array with exactly target_length samples
    """
    current_length = len(audio)

    if current_length == target_length:
        return audio
    elif current_length > target_length:
        return audio[:target_length]
    else:
        # Pad with zeros
        padding = np.zeros(target_length - current_length, dtype=audio.dtype)
        return np.concatenate([audio, padding])


def encode_wav_to_bytes(audio: np.ndarray, sr: int) -> bytes:
    """
    Encode numpy array to WAV bytes.

    Args:
        audio: Audio array
        sr: Sample rate

    Returns:
        WAV file as bytes
    """
    buf = io.BytesIO()
    sf.write(buf, audio, sr, format='WAV')
    buf.seek(0)
    return buf.read()


def encode_flac_to_bytes(audio: np.ndarray, sr: int) -> bytes:
    """
    Encode numpy array to FLAC bytes (lossless, ~50% smaller than WAV).

    Args:
        audio: Audio array
        sr: Sample rate

    Returns:
        FLAC file as bytes
    """
    buf = io.BytesIO()
    # Convert float32 to int16 for FLAC (better compression)
    if audio.dtype == np.float32 or audio.dtype == np.float64:
        audio_int = (audio * 32767).astype(np.int16)
    else:
        audio_int = audio
    sf.write(buf, audio_int, sr, format='FLAC')
    buf.seek(0)
    return buf.read()