piroplasmata's picture
Upload folder using huggingface_hub
924a4d4 verified
import random
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import soundfile as sf
from pathlib import Path
# Print the current working directory
print("Current working directory:", os.getcwd())
#y, sr = librosa.load(path="Data_Management/Laughter/_540cGuw2H0_70.0-80.0.wav")
#S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels= 128)
#
#S_dB = librosa.power_to_db(S=S, ref=np.max)
#plt.figure(figsize=(2.24, 2.24))
#librosa.display.specshow(S_dB, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
#plt.axis('off')
#plt.savefig(os.path.join("Spectrogram_Images/Laughs", "_540cGuw2H0_70.0-80.0.png"), bbox_inches='tight', pad_inches=0)
def audio_augmentation():
def load_audio(file_path):
y, sr = librosa.load(file_path, sr=None) # Load the audio file with its original sampling rate
return y, sr
def pitch_shift(y, sr, n_steps):
return librosa.effects.pitch_shift(y=y, sr=sr, n_steps=n_steps)
def time_stretch(y, rate):
return librosa.effects.time_stretch(y=y, rate=rate)
def add_noise(y, noise_factor=0.005):
noise = np.random.randn(len(y))
y_noisy = y + noise_factor * noise
y_noisy = np.clip(y_noisy, -1.0, 1.0) # Ensure the signal remains in the valid range
return y_noisy
def augment_and_save(input_dir, output_dir, augmentations):
for file_name in os.listdir(input_dir):
if file_name.endswith('.wav'): # Assuming your audio files are .wav
file_path = input_dir / file_name
y, sr = load_audio(file_path)
# Apply augmentations
for aug in augmentations:
y_augmented = aug(y, sr)
# Create a new filename for the augmented file
new_file_name = f"{file_name}_{aug.__name__}.wav"
sf.write(output_dir / new_file_name, y_augmented, sr)
augmentations = [
lambda y, sr: pitch_shift(y, sr, n_steps=random.uniform(-2, 2)), # Random pitch shift between -2 and 2 semitones
lambda y, sr: time_stretch(y, rate=random.uniform(0.8, 1.2)), # Random time stretch between 80% and 120%
lambda y, sr: add_noise(y, noise_factor=random.uniform(0.002, 0.01)) # Random noise between 0.002 and 0.01
]
non_laugh_dir=Path("Data_Management/Speech")
augmented_non_laugh_dir = Path("Data_Management/Augmented_Non_Laugh")
augment_and_save(non_laugh_dir, augmented_non_laugh_dir, augmentations)
def create_mel_spectrogram(audio_path, output_folder, filename):
y, sr = librosa.load(audio_path)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels= 128)
S_dB = librosa.power_to_db(S=S, ref=np.max)
plt.figure(figsize=(2.24, 2.24))
librosa.display.specshow(S_dB, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
plt.axis('off')
plt.savefig(os.path.join(output_folder, filename), bbox_inches='tight', pad_inches=0)
plt.close()
def laugh_spectrogram():
input_folder = 'Data_Management/Laughter'
output_folder = 'Spectrogram_Images/Laughs'
os.makedirs(output_folder, exist_ok=True)
for filename in os.listdir(input_folder):
if filename.endswith('.wav'):
create_mel_spectrogram(os.path.join(input_folder, filename), output_folder, filename.replace('.wav', '.png'))
def non_laugh_spectrogram():
input_folder = 'Data_Management/Speech'
output_folder = 'Spectrogram_Images/non_Laughs'
os.makedirs(output_folder, exist_ok=True)
for filename in os.listdir(input_folder):
if filename.endswith('.wav'):
create_mel_spectrogram(os.path.join(input_folder, filename), output_folder, filename.replace('.wav', '.png'))
def test_spectrogram(max_files=1000):
input_folder = 'Data_Management/Test_Speech/Speech'
output_folder = 'Spectrogram_Images/Test/Speech'
os.makedirs(output_folder, exist_ok=True)
file_count = 0
for filename in os.listdir(input_folder):
if filename.endswith('.wav'):
create_mel_spectrogram(os.path.join(input_folder, filename), output_folder, filename.replace('.wav', '.png'))
file_count += 1
if file_count >= max_files:
break
def augmented_spectrogram():
input_folder = 'Data_Management/Augmented_Non_Laugh'
output_folder = 'Spectrogram_Images/Non_Laughs'
os.makedirs(output_folder, exist_ok=True)
for filename in os.listdir(input_folder):
if filename.endswith('.wav'):
create_mel_spectrogram(os.path.join(input_folder, filename), output_folder, filename.replace('.wav', '.png'))
test_spectrogram(31)