File size: 764 Bytes
a2dca42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import numpy as np
from scipy.io import wavfile

def convert_wav_to_s16_16khz(input_wav_path, output_wav_path):
    sample_rate, data = wavfile.read(input_wav_path)

    # Resample if needed
    if sample_rate != 16000:
        num_samples = round(len(data) * 16000 / sample_rate)
        data = np.interp(np.arange(num_samples), np.arange(len(data)), data)

    # Convert to 16-bit signed integer
    if data.dtype != np.int16:
        data = (data * 32767).astype(np.int16)

    wavfile.write(output_wav_path, 16000, data)

from glob import glob
wavs = glob('/mnt/data-2t/jeff/codes/llm/cpp/sample_data_old/*.wav')

for wav in wavs:
    convert_wav_to_s16_16khz(wav, 
                            '/mnt/data-2t/jeff/codes/llm/cpp/sample_data/'+wav.split('/')[-1])