update script for ov
Browse files
scripts/run_whisper_finetuned_with_punc_ov.py
CHANGED
|
@@ -43,7 +43,7 @@ def transcribe_file(
|
|
| 43 |
def load_model(device):
|
| 44 |
device = "GPU" # GPU can be used as well
|
| 45 |
# model_path = r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8"
|
| 46 |
-
model_path = r"D:\yujuan\models\whisper-turbo-
|
| 47 |
punc_model = r"D:\yujuan\models\funasr_ct\ct-punc"
|
| 48 |
|
| 49 |
t0 = time.time()
|
|
@@ -94,6 +94,75 @@ def run_recordings():
|
|
| 94 |
rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
|
| 95 |
save_csv("csv/finetune_whisper_with_punc.csv", rows)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
if __name__ == "__main__":
|
| 98 |
# main()
|
| 99 |
-
|
|
|
|
| 43 |
def load_model(device):
|
| 44 |
device = "GPU" # GPU can be used as well
|
| 45 |
# model_path = r"D:\yujuan\yoyo-translator-win\models\whisper-large-v3-turbo-int8"
|
| 46 |
+
model_path = r"D:\yujuan\models\whisper-turbo-39000-int8p\whisper-turbo-39000-int8p"
|
| 47 |
punc_model = r"D:\yujuan\models\funasr_ct\ct-punc"
|
| 48 |
|
| 49 |
t0 = time.time()
|
|
|
|
| 94 |
rows.append([audio.name, round(t, 3), text, d, round(nd,3), diff])
|
| 95 |
save_csv("csv/finetune_whisper_with_punc.csv", rows)
|
| 96 |
|
| 97 |
+
def run_test_dataset():
|
| 98 |
+
from scripts.asr_utils import read_dataset
|
| 99 |
+
device = "GPU"
|
| 100 |
+
lang = "<|zh|>"
|
| 101 |
+
asr, punc = load_model(device)
|
| 102 |
+
|
| 103 |
+
test_data = Path("../tests/test_data/dataset.txt")
|
| 104 |
+
audio_parent = Path("../tests/test_data/")
|
| 105 |
+
rows = [["file_name", "time", "inference_result"]]
|
| 106 |
+
result_list = []
|
| 107 |
+
count = 0
|
| 108 |
+
try:
|
| 109 |
+
for audio_path, sentence, duration in read_dataset(test_data):
|
| 110 |
+
count += 1
|
| 111 |
+
print(f"processing {count}: {audio_path}")
|
| 112 |
+
|
| 113 |
+
text, t = inference(audio_parent/audio_path, asr, punc, lang)
|
| 114 |
+
print("inference time:", t)
|
| 115 |
+
print(text)
|
| 116 |
+
result_list.append({
|
| 117 |
+
"index": count,
|
| 118 |
+
"audio_path": audio_path,
|
| 119 |
+
"reference": sentence,
|
| 120 |
+
"duration": duration,
|
| 121 |
+
"inference_time": round(t, 3),
|
| 122 |
+
"inference_result": text
|
| 123 |
+
})
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(e)
|
| 126 |
+
except KeyboardInterrupt as e:
|
| 127 |
+
print(e)
|
| 128 |
+
import json
|
| 129 |
+
with open("csv/whisper_finetune_ov_results.json", "w", encoding="utf-8") as f:
|
| 130 |
+
json.dump(result_list, f, ensure_ascii=False, indent=2)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def run_test_emilia():
|
| 134 |
+
from scripts.asr_utils import read_emilia
|
| 135 |
+
device = "GPU"
|
| 136 |
+
lang = "<|zh|>"
|
| 137 |
+
asr, punc = load_model(device)
|
| 138 |
+
parent = Path("../tests/test_data/ZH-B000000")
|
| 139 |
+
result_list = []
|
| 140 |
+
count = 0
|
| 141 |
+
try:
|
| 142 |
+
for audio_path, sentence, duration in read_emilia(parent, count_limit=5000):
|
| 143 |
+
count += 1
|
| 144 |
+
print(f"processing {count}: {audio_path.name}")
|
| 145 |
+
|
| 146 |
+
text, t = inference(audio_path, asr, punc, lang)
|
| 147 |
+
print("inference time:", t)
|
| 148 |
+
print(text)
|
| 149 |
+
result_list.append({
|
| 150 |
+
"index": count,
|
| 151 |
+
"audio_path": audio_path.name,
|
| 152 |
+
"reference": sentence,
|
| 153 |
+
"duration": duration,
|
| 154 |
+
"inference_time": round(t, 3),
|
| 155 |
+
"inference_result": text
|
| 156 |
+
})
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(e)
|
| 159 |
+
except KeyboardInterrupt as e:
|
| 160 |
+
print(e)
|
| 161 |
+
import json
|
| 162 |
+
with open("csv/whisper_finetune_emilia_ov_results.json", "w", encoding="utf-8") as f:
|
| 163 |
+
json.dump(result_list, f, ensure_ascii=False, indent=2)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
if __name__ == "__main__":
|
| 167 |
# main()
|
| 168 |
+
run_test_emilia()
|