Spaces:
Sleeping
Sleeping
| import librosa | |
| import numpy as np | |
| # Function to resample the audio array | |
| def resample_audio(array, orig_sr, target_sr): | |
| array = np.array(array) # Ensure it's a numpy array | |
| if orig_sr != target_sr: | |
| array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr) | |
| return array | |
| def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512): | |
| """ | |
| Create a Mel spectrogram from a waveform. | |
| Args: | |
| waveform (np.ndarray): 1D NumPy array of the audio waveform. | |
| sr (int): Sampling rate of the waveform. | |
| n_mels (int): Number of Mel bands to generate. | |
| n_fft (int): Length of the FFT window. | |
| hop_length (int): Number of samples between successive frames. | |
| Returns: | |
| np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]). | |
| """ | |
| # Create Mel spectrogram | |
| mel_spectrogram = librosa.feature.melspectrogram( | |
| y=waveform, | |
| sr=sr, | |
| n_fft=n_fft, | |
| hop_length=hop_length, | |
| n_mels=n_mels | |
| ) | |
| # Convert power spectrogram (amplitude squared) to decibel (log scale) | |
| mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max) | |
| # Ensure consistent length for each feature | |
| #log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300) | |
| return mel_spectrogram_db |