TestTranslator / scripts /recorder.py
yujuanqin's picture
refactor file sturcture
778443c
import sounddevice as sd
import soundfile as sf
import sys
import re
TEXT_FILE = '/Users/jeqin/work/code/TestTranslator/test_data/text/test_asr_zh_with_index.txt'
AUDIO_FOLDER= '/Users/jeqin/work/code/TestTranslator/test_data/recordings'
SAMPLE_RATE = 16000
CHANNELS = 1
def get_lines_with_index(filepath):
with open(filepath, encoding='utf-8') as f:
for line in f:
line = line.strip()
m = re.match(r'^(\d+)\.\s*(.*)', line)
if m:
yield m.group(1), m.group(2)
def record_audio(filename):
import numpy as np
def callback(indata, frames, time, status):
recording.append(indata.copy())
while True:
print("按回车开始录音...")
input()
print("正在录音,按回车结束录音,或输入 q 回车重新录音。")
recording = []
stop = False
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype='float32', callback=callback):
user_input = input()
if user_input.strip().lower() == 'q':
print("重新录音...")
stop = False
continue # 跳出 with,重新录音
else:
stop = True
if stop:
audio_np = np.concatenate(recording, axis=0)
max_val = np.max(np.abs(audio_np))
if max_val > 0:
audio_np = audio_np * (0.99 / max_val)
sf.write(f"{AUDIO_FOLDER}/{filename}", audio_np, SAMPLE_RATE)
print(f"已保存: {filename}")
break
def main():
for idx, text in get_lines_with_index(TEXT_FILE):
print(f"{idx}. {text}")
if int(idx)==52:
record_audio(f"{idx}.wav")
if __name__ == '__main__':
main()