import numpy as np from scipy.io import wavfile def convert_wav_to_s16_16khz(input_wav_path, output_wav_path): sample_rate, data = wavfile.read(input_wav_path) # Resample if needed if sample_rate != 16000: num_samples = round(len(data) * 16000 / sample_rate) data = np.interp(np.arange(num_samples), np.arange(len(data)), data) # Convert to 16-bit signed integer if data.dtype != np.int16: data = (data * 32767).astype(np.int16) wavfile.write(output_wav_path, 16000, data) from glob import glob wavs = glob('/mnt/data-2t/jeff/codes/llm/cpp/sample_data_old/*.wav') for wav in wavs: convert_wav_to_s16_16khz(wav, '/mnt/data-2t/jeff/codes/llm/cpp/sample_data/'+wav.split('/')[-1])