S2S / convert_dataset.py
Vikrantyadav11234's picture
Add files using upload-large-folder tool
b54655d verified
import torchaudio
import torch
import os
from datasets import load_dataset
import numpy as np
# Function to convert a single audio sample to WAV file
def convert_audio_sample_to_wav(audio_data, sample_rate, output_file):
"""
Converts a single audio sample (tensor) to a WAV file.
Parameters:
- audio_data (torch.Tensor): The audio data as a tensor.
- sample_rate (int): The sample rate of the audio data.
- output_file (str): The path to save the output WAV file.
"""
try:
# Scale audio data to the range of int16
audio_tensor = audio_data / torch.max(torch.abs(audio_data))
audio_tensor = audio_tensor * 32767
audio_tensor = audio_tensor.short()
# Ensure the audio_tensor is 2D (channels x samples)
if audio_tensor.ndim == 1:
audio_tensor = audio_tensor.unsqueeze(0) # Add channel dimension
# Save the audio tensor to a WAV file
torchaudio.save(output_file, audio_tensor, sample_rate)
print(f"Saved {output_file}")
except Exception as e:
print(f"Error saving {output_file}: {e}")
# Main script
if __name__ == "__main__":
# Specify the dataset path and split
dataset_path = "/home/vikrant/Conversational-AI-Model/embedding_vocoder/English_Accent_DataSet_Local/westbrook___english_accent_data_set/default/0.0.0"
split = "train" # Or "validation" or "test"
# Load the dataset split using Hugging Face Datasets
try:
dataset = load_dataset("arrow", data_dir=os.path.join(dataset_path, split), split=split)
except Exception as e:
print(f"Error loading dataset: {e}")
exit()
# Define the output directory for saving WAV files
output_wav_dir = "output_wavs"
# Create the output directory if it doesn't exist
if not os.path.exists(output_wav_dir):
os.makedirs(output_wav_dir)
# Iterate over each example in the dataset
for index, example in enumerate(dataset):
try:
# Extract relevant information from the example
audio_id = example['audio_id']
audio_data = torch.tensor(example['audio']['array']).float() # Get audio data as a tensor
sample_rate = example['audio']['sampling_rate'] # Get sample rate
# Construct the output file path
output_file = os.path.join(output_wav_dir, f"{audio_id}.wav")
# Convert and save the audio sample to a WAV file
convert_audio_sample_to_wav(audio_data, sample_rate, output_file)
except Exception as e:
print(f"Error processing example {index}: {e}")
print("Conversion complete.")