yujuanqin commited on
Commit
8a3bc32
·
1 Parent(s): e4406a3

add recordings and tests

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. lib/utils.py +1 -1
  2. scripts/asr_utils.py +81 -0
  3. scripts/infer_finetuned_whisper.py +29 -5
  4. scripts/recorder.py +56 -0
  5. scripts/run_funasr.py +1 -1
  6. scripts/{run_quant.py → run_funasr_quant.py} +49 -26
  7. scripts/run_whisper.py +31 -9
  8. temp.py +14 -4
  9. tests/test_data/recordings/1.wav +3 -0
  10. tests/test_data/recordings/10.wav +3 -0
  11. tests/test_data/recordings/11.wav +3 -0
  12. tests/test_data/recordings/12.wav +3 -0
  13. tests/test_data/recordings/13.wav +3 -0
  14. tests/test_data/recordings/14.wav +3 -0
  15. tests/test_data/recordings/15.wav +3 -0
  16. tests/test_data/recordings/16.wav +3 -0
  17. tests/test_data/recordings/17.wav +3 -0
  18. tests/test_data/recordings/18.wav +3 -0
  19. tests/test_data/recordings/19.wav +3 -0
  20. tests/test_data/recordings/2.wav +3 -0
  21. tests/test_data/recordings/20.wav +3 -0
  22. tests/test_data/recordings/21.wav +3 -0
  23. tests/test_data/recordings/22.wav +3 -0
  24. tests/test_data/recordings/23.wav +3 -0
  25. tests/test_data/recordings/24.wav +3 -0
  26. tests/test_data/recordings/25.wav +3 -0
  27. tests/test_data/recordings/26.wav +3 -0
  28. tests/test_data/recordings/27.wav +3 -0
  29. tests/test_data/recordings/28.wav +3 -0
  30. tests/test_data/recordings/29.wav +3 -0
  31. tests/test_data/recordings/3.wav +3 -0
  32. tests/test_data/recordings/30.wav +3 -0
  33. tests/test_data/recordings/31.wav +3 -0
  34. tests/test_data/recordings/32.wav +3 -0
  35. tests/test_data/recordings/33.wav +3 -0
  36. tests/test_data/recordings/34.wav +3 -0
  37. tests/test_data/recordings/35.wav +3 -0
  38. tests/test_data/recordings/36.wav +3 -0
  39. tests/test_data/recordings/37.wav +3 -0
  40. tests/test_data/recordings/38.wav +3 -0
  41. tests/test_data/recordings/39.wav +3 -0
  42. tests/test_data/recordings/4.wav +3 -0
  43. tests/test_data/recordings/40.wav +3 -0
  44. tests/test_data/recordings/41.wav +3 -0
  45. tests/test_data/recordings/42.wav +3 -0
  46. tests/test_data/recordings/43.wav +3 -0
  47. tests/test_data/recordings/44.wav +3 -0
  48. tests/test_data/recordings/45.wav +3 -0
  49. tests/test_data/recordings/46.wav +3 -0
  50. tests/test_data/recordings/47.wav +3 -0
lib/utils.py CHANGED
@@ -38,7 +38,7 @@ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
38
  return ret
39
 
40
  def clean_text_for_comparison_zh(text):
41
- symbol_pattern = "[ ,。、!?\n]"
42
  to = ""
43
  return re.sub(symbol_pattern, to, text).lower()
44
 
 
38
  return ret
39
 
40
  def clean_text_for_comparison_zh(text):
41
+ symbol_pattern = "[ ,。、!?::‘’-《》!?;,\n]"
42
  to = ""
43
  return re.sub(symbol_pattern, to, text).lower()
44
 
scripts/asr_utils.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import wave
4
+ import re
5
+
6
+ def add_text_index():
7
+ text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh.txt'
8
+ index = 1
9
+ with open(text_file, encoding='utf-8') as f:
10
+ for line in f:
11
+ line = line.strip()
12
+ # print(line)
13
+ if not line:
14
+ continue
15
+ if line.startswith('#'):
16
+ # print(line)
17
+ continue
18
+ line = f"{index}. {line}"
19
+ print(line)
20
+ index += 1
21
+
22
+ def get_lines_with_index(filepath):
23
+ with open(filepath, encoding='utf-8') as f:
24
+ for line in f:
25
+ line = line.strip()
26
+ m = re.match(r'^(\d+)\.\s*(.*)', line)
27
+ if m:
28
+ yield m.group(1), m.group(2)
29
+
30
+ def get_wav_length(wav_path):
31
+ try:
32
+ with wave.open(wav_path, 'rb') as wf:
33
+ frames = wf.getnframes()
34
+ rate = wf.getframerate()
35
+ duration = frames / float(rate)
36
+ return duration
37
+ except Exception as e:
38
+ print(f"Error reading {wav_path}: {e}")
39
+ return 0
40
+
41
+ def write_csv(rows, output_csv):
42
+ with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
43
+ writer = csv.writer(csvfile)
44
+ writer.writerow(['序号', '文本', '音频长度(秒)'])
45
+ writer.writerows(rows)
46
+
47
+ def print_text_and_audio_length():
48
+ text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh_with_index.txt'
49
+ audio_folder = '/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings'
50
+ output_csv = '/Users/jeqin/work/code/TestTranslator/scripts/csv/text_audio_length.csv'
51
+ rows = []
52
+ for idx, text in get_lines_with_index(text_file):
53
+ # print(idx)
54
+ # print(text)
55
+ audio_path = os.path.join(audio_folder, f"{idx}.wav")
56
+ audio_length = get_wav_length(audio_path)
57
+ audio_length = round(audio_length, 2) if audio_length is not None else None
58
+ # print(audio_length)
59
+ rows.append([idx, text, round(audio_length,2)])
60
+ write_csv(rows, output_csv)
61
+
62
+ def get_text_distance(text1, text2):
63
+ from lib.utils import run_textdistance, clean_text_for_comparison_zh, highlight_diff
64
+ text1_clean = clean_text_for_comparison_zh(text1)
65
+ text2_clean = clean_text_for_comparison_zh(text2)
66
+ d, nd = run_textdistance(text1_clean, text2_clean)
67
+ diff = highlight_diff(text1_clean, text2_clean, spliter="")
68
+ return d, nd, diff
69
+
70
+ def get_origin_text_dict():
71
+ text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh_with_index.txt'
72
+ text_dict = {}
73
+ for idx, text in get_lines_with_index(text_file):
74
+ text_dict[idx] = text
75
+ return text_dict
76
+
77
+
78
+ if __name__ == '__main__':
79
+ # add_text_index()
80
+ # print_text_and_audio_length()
81
+ pass
scripts/infer_finetuned_whisper.py CHANGED
@@ -112,9 +112,10 @@ def main():
112
  print(f"{p.name} -> {text}; time cost: {t1-t0}")
113
  except Exception as e:
114
  print(f"{p.name} -> 失败: {e}")
115
- def run():
 
116
  model_path = "/Users/jeqin/Downloads/whisper-large-v3-turbo-finetune-0901"
117
- lang = "en"
118
  t0 = time.time()
119
  processor = WhisperProcessor.from_pretrained(
120
  model_path,
@@ -133,9 +134,11 @@ def run():
133
  model.generation_config.language = lang.lower()
134
  model.generation_config.forced_decoder_ids = None
135
  model.eval()
136
-
137
-
138
  print("load model time: ", time.time() - t0)
 
 
 
 
139
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
140
  rows = [["file_name", "inference_time", "inference_result"]]
141
  for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
@@ -152,6 +155,27 @@ def run():
152
  print(f"{audio.name} -> 失败: {e}")
153
  save_csv("csv/fine-tune_whisper-0901.csv", rows)
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  if __name__ == "__main__":
156
  # main()
157
- run()
 
112
  print(f"{p.name} -> {text}; time cost: {t1-t0}")
113
  except Exception as e:
114
  print(f"{p.name} -> 失败: {e}")
115
+
116
+ def load_model():
117
  model_path = "/Users/jeqin/Downloads/whisper-large-v3-turbo-finetune-0901"
118
+ lang = "zh"
119
  t0 = time.time()
120
  processor = WhisperProcessor.from_pretrained(
121
  model_path,
 
134
  model.generation_config.language = lang.lower()
135
  model.generation_config.forced_decoder_ids = None
136
  model.eval()
 
 
137
  print("load model time: ", time.time() - t0)
138
+ return model, processor
139
+
140
+ def run_test_audios():
141
+ model, processor = load_model()
142
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
143
  rows = [["file_name", "inference_time", "inference_result"]]
144
  for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
 
155
  print(f"{audio.name} -> 失败: {e}")
156
  save_csv("csv/fine-tune_whisper-0901.csv", rows)
157
 
158
+ def run_recordings():
159
+ from scripts.asr_utils import get_origin_text_dict, get_text_distance
160
+ model, processor = load_model()
161
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
162
+ rows = [["file_name", "time", "inference_result"]]
163
+ original = get_origin_text_dict()
164
+ for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
165
+ print(audio)
166
+ try:
167
+ t0 = time.time()
168
+ text = transcribe_file(
169
+ str(audio), model, processor
170
+ )
171
+ t = time.time()-t0
172
+ print(text)
173
+ print("inference time:", t)
174
+ d, nd, diff = get_text_distance(original[audio.stem], text)
175
+ rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
176
+ except Exception as e:
177
+ print(f"{audio.name} -> 失败: {e}")
178
+ save_csv("csv/fine-tune_whisper.csv", rows)
179
  if __name__ == "__main__":
180
  # main()
181
+ run_recordings()
scripts/recorder.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sounddevice as sd
2
+ import soundfile as sf
3
+ import sys
4
+ import re
5
+
6
+ TEXT_FILE = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh_with_index.txt'
7
+ AUDIO_FOLDER= '/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings'
8
+ SAMPLE_RATE = 16000
9
+ CHANNELS = 1
10
+
11
+ def get_lines_with_index(filepath):
12
+ with open(filepath, encoding='utf-8') as f:
13
+ for line in f:
14
+ line = line.strip()
15
+ m = re.match(r'^(\d+)\.\s*(.*)', line)
16
+ if m:
17
+ yield m.group(1), m.group(2)
18
+
19
+ def record_audio(filename):
20
+ import numpy as np
21
+ def callback(indata, frames, time, status):
22
+ recording.append(indata.copy())
23
+
24
+ while True:
25
+ print("按回车开始录音...")
26
+ input()
27
+ print("正在录音,按回车结束录音,或输入 q 回车重新录音。")
28
+ recording = []
29
+ stop = False
30
+
31
+ with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype='float32', callback=callback):
32
+ user_input = input()
33
+ if user_input.strip().lower() == 'q':
34
+ print("重新录音...")
35
+ stop = False
36
+ continue # 跳出 with,重新录音
37
+ else:
38
+ stop = True
39
+
40
+ if stop:
41
+ audio_np = np.concatenate(recording, axis=0)
42
+ max_val = np.max(np.abs(audio_np))
43
+ if max_val > 0:
44
+ audio_np = audio_np * (0.99 / max_val)
45
+ sf.write(f"{AUDIO_FOLDER}/{filename}", audio_np, SAMPLE_RATE)
46
+ print(f"已保存: {filename}")
47
+ break
48
+
49
+ def main():
50
+ for idx, text in get_lines_with_index(TEXT_FILE):
51
+ print(f"{idx}. {text}")
52
+ if int(idx)==52:
53
+ record_audio(f"{idx}.wav")
54
+
55
+ if __name__ == '__main__':
56
+ main()
scripts/run_funasr.py CHANGED
@@ -27,7 +27,7 @@ def main():
27
  print("load model: ", t1 - t0)
28
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
29
  rows = [["file_name", "inference_time", "inference_result"]]
30
- for audio in sorted(audios.glob("*ac1-16k/Chinese*")):
31
  print(audio)
32
  t1 = time.time()
33
  try:
 
27
  print("load model: ", t1 - t0)
28
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
29
  rows = [["file_name", "inference_time", "inference_result"]]
30
+ for audio in sorted(audios.glob("*mix/*")):
31
  print(audio)
32
  t1 = time.time()
33
  try:
scripts/{run_quant.py → run_funasr_quant.py} RENAMED
@@ -2,13 +2,15 @@ from pathlib import Path
2
  import time
3
  import csv
4
  from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad
 
5
 
6
  def save_csv(file_path, rows):
7
  with open(file_path, "w", encoding="utf-8") as f:
8
  writer = csv.writer(f)
9
  writer.writerows(rows)
10
  print(f"write csv to {file_path}")
11
- def main():
 
12
  model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
13
 
14
  asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
@@ -21,31 +23,52 @@ def main():
21
  punc_model = CT_Transformer(punc_model_path, quantize=quantize)
22
  t1 = time.time()
23
  print("load model time:", t1 - t0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
25
- rows = [["file_name", "inference_time", "inference_result"]]
26
- for audio in sorted(audios.glob("*s/randomforest*.wav")):
27
- t1 = time.time()
28
- vad_res = vad_model(str(audio))
29
- t2 = time.time()
30
- print("vad time:", t2-t1)
31
- asr_res = asr_model(str(audio), hotwords="")
32
- asr_text = asr_res[0]["preds"]
33
- t3 = time.time()
34
- print("asr time:", t3-t2)
35
- print("asr text:", asr_text)
36
- result = punc_model(asr_text)
37
- text = result[0]
38
- t4 = time.time()
39
- print("punc time:", t4-t3)
40
- print("punc text:", text)
41
- # print(text)
42
- # vad_res = vad_model(str(audio))
43
- # t5 = time.time()
44
- # print("vad time:", t5 - t4)
45
- t = t4-t1
46
- print("inference:", t)
47
- rows.append([f"{audio.parent.name}/{audio.name}", t, text])
48
- file_name = "csv/quant.csv" if quantize else "run_onnx.csv"
49
  save_csv(file_name, rows)
 
50
  if __name__ == '__main__':
51
- main()
 
2
  import time
3
  import csv
4
  from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad
5
+ from scripts.asr_utils import get_origin_text_dict, get_text_distance
6
 
7
  def save_csv(file_path, rows):
8
  with open(file_path, "w", encoding="utf-8") as f:
9
  writer = csv.writer(f)
10
  writer.writerows(rows)
11
  print(f"write csv to {file_path}")
12
+
13
+ def load_model(quantize=True):
14
  model_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
15
 
16
  asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
 
23
  punc_model = CT_Transformer(punc_model_path, quantize=quantize)
24
  t1 = time.time()
25
  print("load model time:", t1 - t0)
26
+ return vad_model, asr_model, punc_model
27
+
28
+ def inference(vad_model, asr_model, punc_model, audio:Path):
29
+ print(audio.name)
30
+ t1 = time.time()
31
+ vad_res = vad_model(str(audio))
32
+ t2 = time.time()
33
+ # print("vad time:", t2-t1)
34
+ asr_res = asr_model(str(audio), hotwords="")
35
+ asr_text = asr_res[0]["preds"]
36
+ t3 = time.time()
37
+ # print("asr time:", t3-t2)
38
+ # print("asr text:", asr_text)
39
+ result = punc_model(asr_text)
40
+ text = result[0]
41
+ t4 = time.time()
42
+ # print("punc time:", t4-t3)
43
+ # print("punc text:", text)
44
+ print(text)
45
+ t = t4-t1
46
+ print("inference:", t)
47
+ return text, t
48
+
49
+ def run_recordings():
50
+ quantize = True
51
+ vad_model, asr_model, punc_model = load_model(quantize)
52
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
53
+ rows = [["file_name", "time", "inference_result"]]
54
+ original = get_origin_text_dict()
55
+ for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
56
+ text, t = inference(vad_model, asr_model, punc_model, audio)
57
+ d, nd, diff = get_text_distance(original[audio.stem], text)
58
+ rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff]) # f"{audio.parent.name}/{audio.name}"
59
+ file_name = "csv/funasr_quant.csv" if quantize else "funasr_onnx.csv"
60
+ save_csv(file_name, rows)
61
+
62
+ def run_test_audios():
63
+ quantize = True
64
+ vad_model, asr_model, punc_model = load_model(quantize)
65
  audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
66
+ rows = [["file_name", "time", "inference_result"]]
67
+ for audio in sorted(audios.glob("*s/zh*.wav")):
68
+ text, t = inference(vad_model, asr_model, punc_model, audio)
69
+ rows.append([f"{audio.parent.name}/{audio.name}", round(t, 3), text])
70
+ file_name = "csv/funasr_quant.csv" if quantize else "funasr_onnx.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  save_csv(file_name, rows)
72
+
73
  if __name__ == '__main__':
74
+ run_recordings()
scripts/run_whisper.py CHANGED
@@ -4,7 +4,7 @@ import time
4
  import csv
5
 
6
  from silero_vad.utils_vad import languages
7
-
8
 
9
  def save_csv(file_path, rows):
10
  with open(file_path, "w", encoding="utf-8") as f:
@@ -12,7 +12,7 @@ def save_csv(file_path, rows):
12
  writer.writerows(rows)
13
  print(f"write csv to {file_path}")
14
 
15
- def main():
16
  models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
17
  whisper_model = 'large-v3-turbo-q5_0'
18
  t0 = time.time()
@@ -28,18 +28,40 @@ def main():
28
  no_context=True
29
  )
30
  print("load model time: ", time.time()-t0)
31
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
32
- rows = [["file_name", "inference_time", "inference_result"]]
33
- for audio in sorted(audios.glob("*-mix/randomforest*.wav")):
 
 
 
 
 
34
  print(audio)
35
  t1 = time.time()
36
- output = model.transcribe(str(audio), language="zh")#, language="zh", initial_prompt="这是一段中文的会议内容。")# initial_prompt="这是一段中文的会议内容。"
37
  t = time.time() - t1
38
  print("inference time:", t)
39
  text = " ".join([a.text for a in output])
40
  print(text)
41
- rows.append([f"{audio.parent.name}/{audio.name}", t, text])
42
- # save_csv("csv/whisper.csv", rows)
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  if __name__ == '__main__':
45
- main()
 
4
  import csv
5
 
6
  from silero_vad.utils_vad import languages
7
+ from scripts.asr_utils import get_origin_text_dict, get_text_distance
8
 
9
  def save_csv(file_path, rows):
10
  with open(file_path, "w", encoding="utf-8") as f:
 
12
  writer.writerows(rows)
13
  print(f"write csv to {file_path}")
14
 
15
+ def load_model():
16
  models_dir = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models")
17
  whisper_model = 'large-v3-turbo-q5_0'
18
  t0 = time.time()
 
28
  no_context=True
29
  )
30
  print("load model time: ", time.time()-t0)
31
+ return model
32
+
33
+ def run_recordings():
34
+ model = load_model()
35
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
36
+ rows = [["file_name", "time", "inference_result"]]
37
+ original = get_origin_text_dict()
38
+ for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
39
  print(audio)
40
  t1 = time.time()
41
+ output = model.transcribe(str(audio), language="zh", initial_prompt="以下是普通话句子,这是一段会议内容。")# initial_prompt="这是一段中文的会议内容。"
42
  t = time.time() - t1
43
  print("inference time:", t)
44
  text = " ".join([a.text for a in output])
45
  print(text)
46
+ d, nd, diff = get_text_distance(original[audio.stem], text)
47
+ rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
48
+ save_csv("csv/whisper.csv", rows)
49
 
50
+
51
+ def run_test_audios():
52
+ model = load_model()
53
+ lang = "zh"
54
+ audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
55
+ rows = [["file_name", "time", "inference_result"]]
56
+ for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
57
+ print(audio)
58
+ t1 = time.time()
59
+ output = model.transcribe(str(audio), language=lang, initial_prompt="以下是普通话句子,这是一段会议内容。")# initial_prompt="这是一段中文的会议内容。"
60
+ t = time.time() - t1
61
+ print("inference time:", t)
62
+ text = " ".join([a.text for a in output])
63
+ print(text)
64
+ rows.append([f"{audio.parent.name}/{audio.name}", round(t, 3), text])
65
+ save_csv("csv/whisper.csv", rows)
66
  if __name__ == '__main__':
67
+ run_recordings()
temp.py CHANGED
@@ -1,4 +1,14 @@
1
- text ="""
2
- {%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}
3
- """
4
- print(text)
 
 
 
 
 
 
 
 
 
 
 
1
+ TEXT_FILE = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh.txt'
2
+ index = 1
3
+ with open(TEXT_FILE, encoding='utf-8') as f:
4
+ for line in f:
5
+ line = line.strip()
6
+ # print(line)
7
+ if not line:
8
+ continue
9
+ if line.startswith('#'):
10
+ # print(line)
11
+ continue
12
+ line = f"{index}. {line}"
13
+ print(line)
14
+ index += 1
tests/test_data/recordings/1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f6f1006e789f69f30f9047d5d93f2cbc58012f1a41a21ebfc12b93c2de7d89
3
+ size 141854
tests/test_data/recordings/10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce3be6f2ae27a19ee56eafb95ac3ca62b7946c4733e8aa542e74f36d06b036b
3
+ size 184664
tests/test_data/recordings/11.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6720957bd8bcba4515cc3e416d9898c38f2dbdb377697ef69a528e621e11d0a1
3
+ size 158234
tests/test_data/recordings/12.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c686cd5322384e36747647f228cd023a77bc323cc26fab9f7fcd351f93019dc3
3
+ size 201614
tests/test_data/recordings/13.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf0c19d4729ecba8057a2292cdb117ec9c4836aaa5aabbb9def63ef1d7ffbb3
3
+ size 201674
tests/test_data/recordings/14.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44baf4bdd176224377b4318400d5ac8a4947517fe09b0e16619d63d77b18a631
3
+ size 321194
tests/test_data/recordings/15.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5abf82f0fa3ce6737f7cdae83931b78295bf2adc48f394d1fa6c7193c76e879
3
+ size 252284
tests/test_data/recordings/16.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3323eeefe1f1219cf2db94a41e0370f699f66e495c390b5d0a8576748955a220
3
+ size 278594
tests/test_data/recordings/17.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8654234ed4a01ef2b0b32779757da30c5c6929325977c0d7640d40216d12abd5
3
+ size 381464
tests/test_data/recordings/18.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ece527a8f191c88bda2800715898ae5e3c2ec13b86d43467ec9e37a53b8e77c
3
+ size 284024
tests/test_data/recordings/19.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d57a7b312033cc19cc7f7a6db06b01b6d37e191a58588b9b32a7ab98032aa3
3
+ size 367964
tests/test_data/recordings/2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31be8ce54c599118787494d88e481e511dab05d45d1a7383710ceecf3db7569d
3
+ size 149924
tests/test_data/recordings/20.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76acbe509263e26fd076bab325cde6b45c52d792a562180ddb9b5a4da62beec7
3
+ size 274964
tests/test_data/recordings/21.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e143f1d398c8f144324767474805d69b9a1f285734f354d1ff10824f4f869b70
3
+ size 313754
tests/test_data/recordings/22.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf10a0bd8f9b202a5fc32f467d7a55c4029675a28f428970a2149f50a315f112
3
+ size 272714
tests/test_data/recordings/23.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17c5316b39779bcb84d0aa1593c05fc22907697f6d6c7236c7a7260166e406b
3
+ size 266204
tests/test_data/recordings/24.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1dba9272cd68902b20e705ce687b6cce5e6ef79a5427b7a1bcbb0532137227
3
+ size 297314
tests/test_data/recordings/25.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89d3440b113f0db28ec55776d3ca97cdc430ea9d1a13023bfec0d5aba5cabe9
3
+ size 270434
tests/test_data/recordings/26.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a08d7996a8485369483e7e6f2e19e7c21dbdec91df3bae74a4cba360eb80d9e4
3
+ size 150704
tests/test_data/recordings/27.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae80a1a961185e18c9b93216b3ce1a5baee9e3aee0fbcf4114a0319c9fce556a
3
+ size 174524
tests/test_data/recordings/28.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9293d2d95d78ce9392fd49e220798eb2ef79a7b361e5c9aaa00451125d79c721
3
+ size 183014
tests/test_data/recordings/29.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9f941054691b17c3d6320b1bd4ae0468ab4f335d667d87ae25ef2b4ba251f1
3
+ size 196994
tests/test_data/recordings/3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c500e9787d1882bde1e3d7fe5ab48bffebc69a503e99f0f6ba7d011941e687
3
+ size 242084
tests/test_data/recordings/30.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32fbcac1e31b4fbec5a758a40ac01c477b3d79b79c78195134d17f51ebaf5023
3
+ size 162194
tests/test_data/recordings/31.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4bb89d406d30a1cc77aea7f7d2162fc9db33b33c6f4d6258b09c0018c2b3e37
3
+ size 190634
tests/test_data/recordings/32.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3632d894647cdae67fecb4703fcf7ce1030cd905580e20fbeca5cc51d2b0e40c
3
+ size 268694
tests/test_data/recordings/33.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e52805deabef8843e1f75ac8ae651f766dd5e0225981a8b843109e73b0f522db
3
+ size 247094
tests/test_data/recordings/34.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9008cf27e698797c14dc9f5594952a8a84007c8a6f2f674dbf69bed276d392a
3
+ size 248234
tests/test_data/recordings/35.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15a82a43aa78ab86983c85883ed49ffbb99fc87cccf33022eb6bcb8d9eea88c
3
+ size 257864
tests/test_data/recordings/36.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020ed5cb9eb3a00e4fdb9408783d36cc758534e0c3263a0896995faebdcf5535
3
+ size 274394
tests/test_data/recordings/37.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4d2d0f66885e6bee03bf4806cd2cf760ebec7b120bdbcb49ac46c849e0c1a4
3
+ size 295964
tests/test_data/recordings/38.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54649f65aabfdd20fe2df2c4b1f6e294e773540cb63b65503c0d1732c990319
3
+ size 277334
tests/test_data/recordings/39.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae68f060247ea11ee422bffa01d1e444fa323b7e2105a63079ca49cf2d9e0df7
3
+ size 347624
tests/test_data/recordings/4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113d7173b16bd2d921d0e63b8a1f39fae1450da68b4b6e835d8f6bea405bf25e
3
+ size 227264
tests/test_data/recordings/40.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb33cf5324157fcc1a81398f467c75ca0c31a697b0eb378cfef37c237af3e875
3
+ size 277994
tests/test_data/recordings/41.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3ef23125d4c5246ad4bd3e41b4f842356ecfd186ff6112b1f12a2474e912ec
3
+ size 258284
tests/test_data/recordings/42.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9efd63be189ba2eb143ad8cabb03c1088f113e3b25bd586f5998669bca2acb10
3
+ size 159404
tests/test_data/recordings/43.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:343c18a93f85637f7872953f98d1fc5a03d37c2271b147b53a4f68f0126d2fad
3
+ size 556814
tests/test_data/recordings/44.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f52275b9d21e17e527a9dff2e7f102210764d1f305e269a1f336c2ddb80fcf9
3
+ size 246134
tests/test_data/recordings/45.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe73673d6d8ed05031ffdaac3015419ee3a495022d515e0161867fe49da139b
3
+ size 296894
tests/test_data/recordings/46.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716f798077ff25b2f8bc7620ba70b45f6a51152b70079688eaa6dae18b90f464
3
+ size 279404
tests/test_data/recordings/47.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66866edef043eedf895fdeac5ea025291be2e68e64819355b56c6588a51bb136
3
+ size 285074