yujuanqin commited on
Commit
8a27f7c
·
1 Parent(s): 8a3bc32

update path to relative

Browse files
scripts/asr_utils.py CHANGED
@@ -4,7 +4,7 @@ import wave
4
  import re
5
 
6
  def add_text_index():
7
- text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh.txt'
8
  index = 1
9
  with open(text_file, encoding='utf-8') as f:
10
  for line in f:
@@ -45,9 +45,9 @@ def write_csv(rows, output_csv):
45
  writer.writerows(rows)
46
 
47
  def print_text_and_audio_length():
48
- text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh_with_index.txt'
49
- audio_folder = '/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings'
50
- output_csv = '/Users/jeqin/work/code/TestTranslator/scripts/csv/text_audio_length.csv'
51
  rows = []
52
  for idx, text in get_lines_with_index(text_file):
53
  # print(idx)
@@ -68,7 +68,7 @@ def get_text_distance(text1, text2):
68
  return d, nd, diff
69
 
70
  def get_origin_text_dict():
71
- text_file = '/Users/jeqin/work/code/TestTranslator/tests/test_data/text/test_asr_zh_with_index.txt'
72
  text_dict = {}
73
  for idx, text in get_lines_with_index(text_file):
74
  text_dict[idx] = text
@@ -77,5 +77,5 @@ def get_origin_text_dict():
77
 
78
  if __name__ == '__main__':
79
  # add_text_index()
80
- # print_text_and_audio_length()
81
- pass
 
4
  import re
5
 
6
  def add_text_index():
7
+ text_file = '../tests/test_data/text/test_asr_zh.txt'
8
  index = 1
9
  with open(text_file, encoding='utf-8') as f:
10
  for line in f:
 
45
  writer.writerows(rows)
46
 
47
  def print_text_and_audio_length():
48
+ text_file = '../tests/test_data/text/test_asr_zh_with_index.txt'
49
+ audio_folder = '../tests/test_data/recordings'
50
+ output_csv = 'csv/text_audio_length.csv'
51
  rows = []
52
  for idx, text in get_lines_with_index(text_file):
53
  # print(idx)
 
68
  return d, nd, diff
69
 
70
  def get_origin_text_dict():
71
+ text_file = '../tests/test_data/text/test_asr_zh_with_index.txt'
72
  text_dict = {}
73
  for idx, text in get_lines_with_index(text_file):
74
  text_dict[idx] = text
 
77
 
78
  if __name__ == '__main__':
79
  # add_text_index()
80
+ print_text_and_audio_length()
81
+ # pass
scripts/run_funasr_quant.py CHANGED
@@ -49,7 +49,7 @@ def inference(vad_model, asr_model, punc_model, audio:Path):
49
  def run_recordings():
50
  quantize = True
51
  vad_model, asr_model, punc_model = load_model(quantize)
52
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
53
  rows = [["file_name", "time", "inference_result"]]
54
  original = get_origin_text_dict()
55
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
@@ -62,7 +62,7 @@ def run_recordings():
62
  def run_test_audios():
63
  quantize = True
64
  vad_model, asr_model, punc_model = load_model(quantize)
65
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
66
  rows = [["file_name", "time", "inference_result"]]
67
  for audio in sorted(audios.glob("*s/zh*.wav")):
68
  text, t = inference(vad_model, asr_model, punc_model, audio)
 
49
  def run_recordings():
50
  quantize = True
51
  vad_model, asr_model, punc_model = load_model(quantize)
52
+ audios = Path("../tests/test_data/recordings/")
53
  rows = [["file_name", "time", "inference_result"]]
54
  original = get_origin_text_dict()
55
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
 
62
  def run_test_audios():
63
  quantize = True
64
  vad_model, asr_model, punc_model = load_model(quantize)
65
+ audios = Path("../tests/test_data/test_audios/")
66
  rows = [["file_name", "time", "inference_result"]]
67
  for audio in sorted(audios.glob("*s/zh*.wav")):
68
  text, t = inference(vad_model, asr_model, punc_model, audio)
scripts/run_whisper.py CHANGED
@@ -32,7 +32,7 @@ def load_model():
32
 
33
  def run_recordings():
34
  model = load_model()
35
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
36
  rows = [["file_name", "time", "inference_result"]]
37
  original = get_origin_text_dict()
38
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
@@ -51,7 +51,7 @@ def run_recordings():
51
  def run_test_audios():
52
  model = load_model()
53
  lang = "zh"
54
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
55
  rows = [["file_name", "time", "inference_result"]]
56
  for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
57
  print(audio)
 
32
 
33
  def run_recordings():
34
  model = load_model()
35
+ audios = Path("../tests/test_data/recordings/")
36
  rows = [["file_name", "time", "inference_result"]]
37
  original = get_origin_text_dict()
38
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
 
51
  def run_test_audios():
52
  model = load_model()
53
  lang = "zh"
54
+ audios = Path("../tests/test_data/test_audios/")
55
  rows = [["file_name", "time", "inference_result"]]
56
  for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
57
  print(audio)
scripts/{infer_finetuned_whisper.py → run_whisper_finetuned.py} RENAMED
@@ -139,7 +139,7 @@ def load_model():
139
 
140
  def run_test_audios():
141
  model, processor = load_model()
142
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/test_audios/")
143
  rows = [["file_name", "inference_time", "inference_result"]]
144
  for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
145
  try:
@@ -158,7 +158,7 @@ def run_test_audios():
158
  def run_recordings():
159
  from scripts.asr_utils import get_origin_text_dict, get_text_distance
160
  model, processor = load_model()
161
- audios = Path("/Users/jeqin/work/code/TestTranslator/tests/test_data/recordings/")
162
  rows = [["file_name", "time", "inference_result"]]
163
  original = get_origin_text_dict()
164
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
 
139
 
140
  def run_test_audios():
141
  model, processor = load_model()
142
+ audios = Path("../tests/test_data/test_audios/")
143
  rows = [["file_name", "inference_time", "inference_result"]]
144
  for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
145
  try:
 
158
  def run_recordings():
159
  from scripts.asr_utils import get_origin_text_dict, get_text_distance
160
  model, processor = load_model()
161
+ audios = Path("../tests/test_data/recordings/")
162
  rows = [["file_name", "time", "inference_result"]]
163
  original = get_origin_text_dict()
164
  for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):