ASR

File size: 8,246 Bytes

f2688f7

import os
import torchaudio
import torch
import numpy as np
import soundfile
class AudioLoader:
    def __init__(self, sample_rate=16000):
        self.sample_rate = sample_rate

    def load_audio(self, file_path):
        audio, sample_rate = torchaudio.load(file_path, backend='soundfile')
        if sample_rate != self.sample_rate:
            audio = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.sample_rate)(audio)
        return audio.squeeze(0)

class STFT:
    def __init__(self, n_fft=1024, hop_length=512, win_length=1024):
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.win_length = win_length

    def compute_stft(self, signal):
        return torch.stft(signal, n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length, window=torch.hamming_window(self.win_length), return_complex=True)

class SpectrogramSaver:
    @staticmethod
    def save_spectrogram(spectrogram, save_path):
        torch.save(spectrogram, save_path)

# class Preprocessing:
#     def __init__(self, sample_rate=16000, n_fft=1024, hop_length=512, win_length=1024):
#         self.loader = AudioLoader(sample_rate)
#         self.stft = STFT(n_fft, hop_length, win_length)
#         self.saver = SpectrogramSaver()
#         self.fixed_length = None

#     def preprocess(self, signal):
#         spectrogram = self.stft.compute_stft(signal)
#         real = spectrogram.real
#         imag = spectrogram.imag
#         combined = torch.stack((real, imag), dim=-1)  # Shape: (num_frames, num_frequency_bins, 2)
#         return combined

#     def determine_fixed_length(self, noisy_dir, clean_dir):
#         lengths = []
#         noisy_files = [os.path.join(noisy_dir, f) for f in os.listdir(noisy_dir) if f.endswith('.wav')]
#         clean_files = [os.path.join(clean_dir, f) for f in os.listdir(clean_dir) if f.endswith('.wav')]

#         for noisy_file, clean_file in zip(noisy_files, clean_files):
#             noisy_audio = self.loader.load_audio(noisy_file)
#             clean_audio = self.loader.load_audio(clean_file)

#             noisy_spectrogram = self.preprocess(noisy_audio)
#             clean_spectrogram = self.preprocess(clean_audio)

#             lengths.append(noisy_spectrogram.shape[1])
#             lengths.append(clean_spectrogram.shape[1])

#         self.fixed_length = int(np.median(lengths))
#         print(f"Determined fixed length: {self.fixed_length}")

#     def create_dataset(self, noisy_dir, clean_dir, save_dir):
#         if self.fixed_length is None:
#             self.determine_fixed_length(noisy_dir, clean_dir)

#         noisy_save_dir = os.path.join(save_dir, 'noisy')
#         clean_save_dir = os.path.join(save_dir, 'clean')
        
#         if not os.path.exists(noisy_save_dir):
#             os.makedirs(noisy_save_dir)
#         if not os.path.exists(clean_save_dir):
#             os.makedirs(clean_save_dir)

#         noisy_files = [os.path.join(noisy_dir, f) for f in os.listdir(noisy_dir) if f.endswith('.wav')]
#         clean_files = [os.path.join(clean_dir, f) for f in os.listdir(clean_dir) if f.endswith('.wav')]

#         for noisy_file, clean_file in zip(noisy_files, clean_files):
#             noisy_audio = self.loader.load_audio(noisy_file)
#             clean_audio = self.loader.load_audio(clean_file)

#             noisy_spectrogram = self.preprocess(noisy_audio)
#             clean_spectrogram = self.preprocess(clean_audio)

#             noisy_spectrogram = self.pad_spectrogram(noisy_spectrogram)
#             clean_spectrogram = self.pad_spectrogram(clean_spectrogram)

#             noisy_save_path = os.path.join(noisy_save_dir, f"noisy_{os.path.basename(noisy_file).split('.')[0]}.pt")
#             clean_save_path = os.path.join(clean_save_dir, f"clean_{os.path.basename(clean_file).split('.')[0]}.pt")

#             self.saver.save_spectrogram(noisy_spectrogram, noisy_save_path)
#             self.saver.save_spectrogram(clean_spectrogram, clean_save_path)

#     def pad_spectrogram(self, spectrogram):
#         pad_length = self.fixed_length - spectrogram.shape[1]
#         if pad_length > 0:
#             pad = torch.zeros((spectrogram.shape[0], pad_length, spectrogram.shape[2]))
#             spectrogram = torch.cat((spectrogram, pad), dim=1)
#         elif pad_length < 0:
#             spectrogram = spectrogram[:, :self.fixed_length, :]
#         return spectrogram
class Preprocessing:
    def __init__(self, sample_rate, n_fft, hop_length, win_length):
        self.sample_rate = sample_rate
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.win_length = win_length
        self.fixed_length = 86
        self.stft = STFT(n_fft, hop_length, win_length)
        self.loader = AudioLoader(sample_rate)

    def preprocess(self, signal):
        # print(f"Signal shape before STFT: {signal.shape}")  # Debug statement
        if signal.shape[-1] == 0:
            print("Encountered zero-length signal, skipping...")
            return None  # Skip this signal
        spectrogram = self.stft.compute_stft(signal)
        real = spectrogram.real
        imag = spectrogram.imag
        return torch.stack((real, imag), dim=-1)

    def determine_fixed_length(self, noisy_dir, clean_dir):
        lengths = []
        for noisy_file, clean_file in zip(sorted(os.listdir(noisy_dir)), sorted(os.listdir(clean_dir))):
            noisy_audio = self.loader.load_audio(os.path.join(noisy_dir, noisy_file))
            clean_audio = self.loader.load_audio(os.path.join(clean_dir, clean_file))

            # print(f"Noisy audio shape: {noisy_audio.shape}, Clean audio shape: {clean_audio.shape}")  # Debug statement

            noisy_spectrogram = self.preprocess(noisy_audio)
            clean_spectrogram = self.preprocess(clean_audio)

            if noisy_spectrogram is None or clean_spectrogram is None:
                continue  # Skip any zero-length signals

            lengths.append(noisy_spectrogram.shape[1])
            lengths.append(clean_spectrogram.shape[1])
        
        if lengths:
            self.fixed_length = min(lengths)
            print(f"Determined fixed length: {self.fixed_length}")  # Debug statement
        else:
            print("No valid spectrograms found.")  # If no valid data is found

    def create_dataset(self, noisy_dir, clean_dir, save_dir):
        if self.fixed_length is None:
            self.determine_fixed_length(noisy_dir, clean_dir)

        noisy_save_dir = os.path.join(save_dir, 'noisy')
        clean_save_dir = os.path.join(save_dir, 'clean')
        os.makedirs(noisy_save_dir, exist_ok=True)
        os.makedirs(clean_save_dir, exist_ok=True)

        for noisy_file, clean_file in zip(sorted(os.listdir(noisy_dir)), sorted(os.listdir(clean_dir))):
            noisy_audio = self.loader.load_audio(os.path.join(noisy_dir, noisy_file))
            clean_audio = self.loader.load_audio(os.path.join(clean_dir, clean_file))

            noisy_spectrogram = self.preprocess(noisy_audio)
            clean_spectrogram = self.preprocess(clean_audio)

            if noisy_spectrogram is None or clean_spectrogram is None:
                continue  # Skip any zero-length signals

            noisy_spectrogram = noisy_spectrogram[:, :self.fixed_length, :]
            clean_spectrogram = clean_spectrogram[:, :self.fixed_length, :]

            torch.save(noisy_spectrogram, os.path.join(noisy_save_dir, os.path.basename(noisy_file).replace('.wav', '.pt')))
            torch.save(clean_spectrogram, os.path.join(clean_save_dir, os.path.basename(clean_file).replace('.wav', '.pt')))

            # print(f"Processed and saved {noisy_file} and {clean_file}")  # Debug statement


# # Example usage
# if __name__ == "__main__":
#     noisy_dir = "/home/siddharth/Myprojects/ASR_project/Hybrid_CRN_SFANC-FxNLMS/Babble_noise_speech_train"
#     clean_dir = "/home/siddharth/Myprojects/ASR_project/Hybrid_CRN_SFANC-FxNLMS/clean_train"
#     save_dir = "/home/siddharth/Myprojects/ASR_project/Hybrid_CRN_SFANC-FxNLMS/preprocessed_data"

#     preprocessor = Preprocessing(sample_rate=16000, n_fft=1024, hop_length=512, win_length=1024)
#     preprocessor.create_dataset(noisy_dir, clean_dir, save_dir)