Vikrantyadav11234
/

S2S

Model card Files Files and versions

S2S / convert_dataset.py

Vikrantyadav11234's picture

Vikrantyadav11234

Add files using upload-large-folder tool

b54655d verified 10 months ago

history blame contribute delete

2.66 kB

	import torchaudio
	import torch
	import os
	from datasets import load_dataset
	import numpy as np

	# Function to convert a single audio sample to WAV file
	def convert_audio_sample_to_wav(audio_data, sample_rate, output_file):
	"""
	Converts a single audio sample (tensor) to a WAV file.

	Parameters:
	- audio_data (torch.Tensor): The audio data as a tensor.
	- sample_rate (int): The sample rate of the audio data.
	- output_file (str): The path to save the output WAV file.
	"""
	try:
	# Scale audio data to the range of int16
	audio_tensor = audio_data / torch.max(torch.abs(audio_data))
	audio_tensor = audio_tensor * 32767
	audio_tensor = audio_tensor.short()

	# Ensure the audio_tensor is 2D (channels x samples)
	if audio_tensor.ndim == 1:
	audio_tensor = audio_tensor.unsqueeze(0) # Add channel dimension

	# Save the audio tensor to a WAV file
	torchaudio.save(output_file, audio_tensor, sample_rate)

	print(f"Saved {output_file}")

	except Exception as e:
	print(f"Error saving {output_file}: {e}")

	# Main script
	if __name__ == "__main__":
	# Specify the dataset path and split
	dataset_path = "/home/vikrant/Conversational-AI-Model/embedding_vocoder/English_Accent_DataSet_Local/westbrook___english_accent_data_set/default/0.0.0"
	split = "train" # Or "validation" or "test"

	# Load the dataset split using Hugging Face Datasets
	try:
	dataset = load_dataset("arrow", data_dir=os.path.join(dataset_path, split), split=split)
	except Exception as e:
	print(f"Error loading dataset: {e}")
	exit()

	# Define the output directory for saving WAV files
	output_wav_dir = "output_wavs"

	# Create the output directory if it doesn't exist
	if not os.path.exists(output_wav_dir):
	os.makedirs(output_wav_dir)

	# Iterate over each example in the dataset
	for index, example in enumerate(dataset):
	try:
	# Extract relevant information from the example
	audio_id = example['audio_id']
	audio_data = torch.tensor(example['audio']['array']).float() # Get audio data as a tensor
	sample_rate = example['audio']['sampling_rate'] # Get sample rate

	# Construct the output file path
	output_file = os.path.join(output_wav_dir, f"{audio_id}.wav")

	# Convert and save the audio sample to a WAV file
	convert_audio_sample_to_wav(audio_data, sample_rate, output_file)

	except Exception as e:
	print(f"Error processing example {index}: {e}")

	print("Conversion complete.")