yujuanqin commited on
Commit
fe108d8
·
1 Parent(s): 1e495f3

update script for ov

Browse files
scripts/run_whisper_finetuned_with_punc_ov.py CHANGED
@@ -43,7 +43,7 @@ def transcribe_file(
43
  def load_model(device):
44
  device = "GPU" # GPU can be used as well
45
  # model_path = r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8"
46
- model_path = r"D:\yujuan\models\whisper-turbo-25000-int8p\whisper-turbo-25000-int8p"
47
  punc_model = r"D:\yujuan\models\funasr_ct\ct-punc"
48
 
49
  t0 = time.time()
@@ -94,6 +94,75 @@ def run_recordings():
94
  rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
95
  save_csv("csv/finetune_whisper_with_punc.csv", rows)
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if __name__ == "__main__":
98
  # main()
99
- run_recordings()
 
43
  def load_model(device):
44
  device = "GPU" # GPU can be used as well
45
  # model_path = r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8"
46
+ model_path = r"D:\yujuan\models\whisper-turbo-39000-int8p\whisper-turbo-39000-int8p"
47
  punc_model = r"D:\yujuan\models\funasr_ct\ct-punc"
48
 
49
  t0 = time.time()
 
94
  rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
95
  save_csv("csv/finetune_whisper_with_punc.csv", rows)
96
 
97
+ def run_test_dataset():
98
+ from scripts.asr_utils import read_dataset
99
+ device = "GPU"
100
+ lang = "<|zh|>"
101
+ asr, punc = load_model(device)
102
+
103
+ test_data = Path("../tests/test_data/dataset.txt")
104
+ audio_parent = Path("../tests/test_data/")
105
+ rows = [["file_name", "time", "inference_result"]]
106
+ result_list = []
107
+ count = 0
108
+ try:
109
+ for audio_path, sentence, duration in read_dataset(test_data):
110
+ count += 1
111
+ print(f"processing {count}: {audio_path}")
112
+
113
+ text, t = inference(audio_parent/audio_path, asr, punc, lang)
114
+ print("inference time:", t)
115
+ print(text)
116
+ result_list.append({
117
+ "index": count,
118
+ "audio_path": audio_path,
119
+ "reference": sentence,
120
+ "duration": duration,
121
+ "inference_time": round(t, 3),
122
+ "inference_result": text
123
+ })
124
+ except Exception as e:
125
+ print(e)
126
+ except KeyboardInterrupt as e:
127
+ print(e)
128
+ import json
129
+ with open("csv/whisper_finetune_ov_results.json", "w", encoding="utf-8") as f:
130
+ json.dump(result_list, f, ensure_ascii=False, indent=2)
131
+
132
+
133
+ def run_test_emilia():
134
+ from scripts.asr_utils import read_emilia
135
+ device = "GPU"
136
+ lang = "<|zh|>"
137
+ asr, punc = load_model(device)
138
+ parent = Path("../tests/test_data/ZH-B000000")
139
+ result_list = []
140
+ count = 0
141
+ try:
142
+ for audio_path, sentence, duration in read_emilia(parent, count_limit=5000):
143
+ count += 1
144
+ print(f"processing {count}: {audio_path.name}")
145
+
146
+ text, t = inference(audio_path, asr, punc, lang)
147
+ print("inference time:", t)
148
+ print(text)
149
+ result_list.append({
150
+ "index": count,
151
+ "audio_path": audio_path.name,
152
+ "reference": sentence,
153
+ "duration": duration,
154
+ "inference_time": round(t, 3),
155
+ "inference_result": text
156
+ })
157
+ except Exception as e:
158
+ print(e)
159
+ except KeyboardInterrupt as e:
160
+ print(e)
161
+ import json
162
+ with open("csv/whisper_finetune_emilia_ov_results.json", "w", encoding="utf-8") as f:
163
+ json.dump(result_list, f, ensure_ascii=False, indent=2)
164
+
165
+
166
  if __name__ == "__main__":
167
  # main()
168
+ run_test_emilia()