|
|
import numpy as np |
|
|
from scipy.io import wavfile |
|
|
|
|
|
def convert_wav_to_s16_16khz(input_wav_path, output_wav_path): |
|
|
sample_rate, data = wavfile.read(input_wav_path) |
|
|
|
|
|
|
|
|
if sample_rate != 16000: |
|
|
num_samples = round(len(data) * 16000 / sample_rate) |
|
|
data = np.interp(np.arange(num_samples), np.arange(len(data)), data) |
|
|
|
|
|
|
|
|
if data.dtype != np.int16: |
|
|
data = (data * 32767).astype(np.int16) |
|
|
|
|
|
wavfile.write(output_wav_path, 16000, data) |
|
|
|
|
|
from glob import glob |
|
|
wavs = glob('/mnt/data-2t/jeff/codes/llm/cpp/sample_data_old/*.wav') |
|
|
|
|
|
for wav in wavs: |
|
|
convert_wav_to_s16_16khz(wav, |
|
|
'/mnt/data-2t/jeff/codes/llm/cpp/sample_data/'+wav.split('/')[-1]) |