refactor file sturcture
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- environment.py +1 -1
- lib/utils.py +46 -12
- main.py +1 -1
- scripts/asr_utils.py +4 -4
- scripts/audios.txt +9 -9
- scripts/caculate_cer.py +14 -12
- scripts/csv/fine-tune_whisper.csv +84 -84
- scripts/funasr_utils.py +1 -1
- scripts/recorder.py +2 -2
- scripts/run_funasr.py +1 -1
- scripts/run_funasr_c.py +1 -1
- scripts/run_funasr_quant.py +37 -7
- scripts/run_whisper.py +5 -5
- scripts/run_whisper_finetuned.py +4 -4
- scripts/split_audio.py +1 -1
- temp.py +47 -14
- {tests/test_data → test_data}/.gitattributes +0 -0
- {tests/test_data → test_data}/__init__.py +0 -0
- {tests/test_data → test_data}/dataset.txt +0 -0
- {tests/test_data → test_data}/test_audios.zip +0 -0
- {tests/test_data → test_data}/text/test_asr_zh.txt +0 -0
- {tests/test_data → test_data}/text/test_asr_zh_with_index.txt +0 -0
- {tests/test_data → test_data}/text/test_translation_en.txt +0 -0
- {tests/test_data → test_data}/text/test_translation_zh.txt +0 -0
- tests/test_app/__init__.py +0 -0
- tests/{conftest.py → test_app/conftest.py} +0 -0
- tests/{test_accuracy.py → test_app/test_accuracy.py} +0 -0
- tests/{test_accuracy_and_delay.py → test_app/test_accuracy_and_delay.py} +0 -0
- tests/{test_delay.py → test_app/test_delay.py} +0 -0
- tests/{test_logfile.py → test_app/test_logfile.py} +0 -0
- tests/test_data/dataset_bkp.txt +0 -0
- tests/test_data/recordings/1.wav +0 -3
- tests/test_data/recordings/10.wav +0 -3
- tests/test_data/recordings/11.wav +0 -3
- tests/test_data/recordings/12.wav +0 -3
- tests/test_data/recordings/13.wav +0 -3
- tests/test_data/recordings/14.wav +0 -3
- tests/test_data/recordings/15.wav +0 -3
- tests/test_data/recordings/16.wav +0 -3
- tests/test_data/recordings/17.wav +0 -3
- tests/test_data/recordings/18.wav +0 -3
- tests/test_data/recordings/19.wav +0 -3
- tests/test_data/recordings/2.wav +0 -3
- tests/test_data/recordings/20.wav +0 -3
- tests/test_data/recordings/21.wav +0 -3
- tests/test_data/recordings/22.wav +0 -3
- tests/test_data/recordings/23.wav +0 -3
- tests/test_data/recordings/24.wav +0 -3
- tests/test_data/recordings/25.wav +0 -3
- tests/test_data/recordings/26.wav +0 -3
environment.py
CHANGED
|
@@ -16,7 +16,7 @@ DEV_PATH = DEV_DIR / "main.py"
|
|
| 16 |
DEV_LOG = APP_LOG
|
| 17 |
|
| 18 |
DEBUG_PORT = 9222
|
| 19 |
-
TEST_DATA = PROJECT_DIR / "
|
| 20 |
TEST_AUDIOS_DIR = TEST_DATA / "test_audios"
|
| 21 |
|
| 22 |
REPORTS_DIR = PROJECT_DIR / "reports"
|
|
|
|
| 16 |
DEV_LOG = APP_LOG
|
| 17 |
|
| 18 |
DEBUG_PORT = 9222
|
| 19 |
+
TEST_DATA = PROJECT_DIR / "test_data"
|
| 20 |
TEST_AUDIOS_DIR = TEST_DATA / "test_audios"
|
| 21 |
|
| 22 |
REPORTS_DIR = PROJECT_DIR / "reports"
|
lib/utils.py
CHANGED
|
@@ -4,10 +4,38 @@ import subprocess
|
|
| 4 |
from subprocess import CompletedProcess
|
| 5 |
from typing import Literal
|
| 6 |
import re
|
|
|
|
| 7 |
import difflib
|
|
|
|
|
|
|
| 8 |
|
| 9 |
import textdistance
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def get_time_str(level:Literal["d","s","ms"]="d"):
|
| 13 |
time = datetime.now()
|
|
@@ -82,16 +110,22 @@ def time_to_float(s: str):
|
|
| 82 |
return float(d)
|
| 83 |
return 0.0
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
if __name__ == '__main__':
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
print(
|
| 95 |
-
print(highlight_diff(text_1, text_2, " "))
|
| 96 |
-
print(run_textdistance(text_3, text_4))
|
| 97 |
-
print(highlight_diff(text_3, text_4))
|
|
|
|
| 4 |
from subprocess import CompletedProcess
|
| 5 |
from typing import Literal
|
| 6 |
import re
|
| 7 |
+
import time
|
| 8 |
import difflib
|
| 9 |
+
from functools import wraps
|
| 10 |
+
from pathlib import Path
|
| 11 |
|
| 12 |
import textdistance
|
| 13 |
+
import numpy as np
|
| 14 |
+
import soundfile as sf
|
| 15 |
+
|
| 16 |
+
def timer(func):
|
| 17 |
+
@wraps(func)
|
| 18 |
+
def wrapper(*args, **kwargs):
|
| 19 |
+
start_time = time.perf_counter()
|
| 20 |
+
result = func(*args, **kwargs) # 执行原函数
|
| 21 |
+
end_time = time.perf_counter()
|
| 22 |
+
run_time = end_time - start_time
|
| 23 |
+
print(f"函数 {func.__name__!r} 执行耗时: {run_time:.4f} 秒")
|
| 24 |
+
return result
|
| 25 |
+
return wrapper
|
| 26 |
+
|
| 27 |
+
class Timer:
|
| 28 |
+
def __init__(self, log=""):
|
| 29 |
+
self.log = log
|
| 30 |
+
|
| 31 |
+
def __enter__(self):
|
| 32 |
+
self.start = time.perf_counter()
|
| 33 |
+
return self
|
| 34 |
+
|
| 35 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 36 |
+
end = time.perf_counter()
|
| 37 |
+
self.duration = end - self.start
|
| 38 |
+
print(f"{self.log} cost: {self.duration:.4f} 秒")
|
| 39 |
|
| 40 |
def get_time_str(level:Literal["d","s","ms"]="d"):
|
| 41 |
time = datetime.now()
|
|
|
|
| 110 |
return float(d)
|
| 111 |
return 0.0
|
| 112 |
|
| 113 |
+
def read_audio(file:Path)->np.ndarray:
|
| 114 |
+
audio, sr = sf.read(file)
|
| 115 |
+
if sr != 16000:
|
| 116 |
+
raise ValueError(f"只支持 16k 采样率的音频,当前采样率为 {sr}")
|
| 117 |
+
return audio.astype(np.float32)
|
| 118 |
+
|
| 119 |
+
def write_audio(file:Path, audio:np.ndarray, sr=16000):
|
| 120 |
+
sf.write(file, audio, sr)
|
| 121 |
+
print(f"写入音频文件 {file}")
|
| 122 |
|
| 123 |
if __name__ == '__main__':
|
| 124 |
+
with Timer() as duration_b:
|
| 125 |
+
print("开始操作 B...")
|
| 126 |
+
time.sleep(0.4)
|
| 127 |
+
print(duration_b.duration)
|
| 128 |
+
with Timer("C") as duration_b:
|
| 129 |
+
print("开始操作 C...")
|
| 130 |
+
time.sleep(0.5)
|
| 131 |
+
print(duration_b.duration)
|
|
|
|
|
|
|
|
|
main.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import time
|
| 2 |
from lib.audio import play_audio_until_end
|
| 3 |
-
from
|
| 4 |
|
| 5 |
if __name__ == '__main__':
|
| 6 |
# report = Report()
|
|
|
|
| 1 |
import time
|
| 2 |
from lib.audio import play_audio_until_end
|
| 3 |
+
from test_data import test_audios
|
| 4 |
|
| 5 |
if __name__ == '__main__':
|
| 6 |
# report = Report()
|
scripts/asr_utils.py
CHANGED
|
@@ -19,7 +19,7 @@ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
|
|
| 19 |
return ret
|
| 20 |
|
| 21 |
def add_text_index():
|
| 22 |
-
text_file = '../
|
| 23 |
index = 1
|
| 24 |
with open(text_file, encoding='utf-8') as f:
|
| 25 |
for line in f:
|
|
@@ -60,8 +60,8 @@ def write_csv(rows, output_csv):
|
|
| 60 |
writer.writerows(rows)
|
| 61 |
|
| 62 |
def print_text_and_audio_length():
|
| 63 |
-
text_file = '../
|
| 64 |
-
audio_folder = '../
|
| 65 |
output_csv = 'csv/text_audio_length.csv'
|
| 66 |
rows = []
|
| 67 |
for idx, text in get_lines_with_index(text_file):
|
|
@@ -83,7 +83,7 @@ def get_text_distance(text1, text2):
|
|
| 83 |
return d, nd, diff
|
| 84 |
|
| 85 |
def get_origin_text_dict():
|
| 86 |
-
text_file = '../
|
| 87 |
text_dict = {}
|
| 88 |
for idx, text in get_lines_with_index(text_file):
|
| 89 |
text_dict[idx] = text
|
|
|
|
| 19 |
return ret
|
| 20 |
|
| 21 |
def add_text_index():
|
| 22 |
+
text_file = '../test_data/text/test_asr_zh.txt'
|
| 23 |
index = 1
|
| 24 |
with open(text_file, encoding='utf-8') as f:
|
| 25 |
for line in f:
|
|
|
|
| 60 |
writer.writerows(rows)
|
| 61 |
|
| 62 |
def print_text_and_audio_length():
|
| 63 |
+
text_file = '../test_data/text/test_asr_zh_with_index.txt'
|
| 64 |
+
audio_folder = '../test_data/recordings'
|
| 65 |
output_csv = 'csv/text_audio_length.csv'
|
| 66 |
rows = []
|
| 67 |
for idx, text in get_lines_with_index(text_file):
|
|
|
|
| 83 |
return d, nd, diff
|
| 84 |
|
| 85 |
def get_origin_text_dict():
|
| 86 |
+
text_file = '../test_data/text/test_asr_zh_with_index.txt'
|
| 87 |
text_dict = {}
|
| 88 |
for idx, text in get_lines_with_index(text_file):
|
| 89 |
text_dict[idx] = text
|
scripts/audios.txt
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 2 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 3 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 4 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 5 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 6 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 7 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 8 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 9 |
-
/Users/jeqin/work/code/TestTranslator/
|
| 10 |
|
| 11 |
/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-0.wav
|
| 12 |
/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-5.wav
|
|
|
|
| 1 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-0.wav
|
| 2 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-5.wav
|
| 3 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-10.wav
|
| 4 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/5s-en-ac1-16k/English-chaos-part2-15.wav
|
| 5 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-0.wav
|
| 6 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-10.wav
|
| 7 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-20.wav
|
| 8 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/10s-en-ac1-16k/English-chaos-part2-30.wav
|
| 9 |
+
/Users/jeqin/work/code/TestTranslator/test_data/test_audios/English-chaos-part2.wav
|
| 10 |
|
| 11 |
/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-0.wav
|
| 12 |
/Users/jeqin/work/test/test_yoyotranslator/test_videos/audios/5s/es-1-5.wav
|
scripts/caculate_cer.py
CHANGED
|
@@ -26,7 +26,8 @@ def calculate_distance(reference: str, hypothesis: str):
|
|
| 26 |
return d, diff
|
| 27 |
|
| 28 |
if __name__ == '__main__':
|
| 29 |
-
|
|
|
|
| 30 |
count = 0
|
| 31 |
distance_sum = 0
|
| 32 |
reference_sum = 0
|
|
@@ -34,16 +35,17 @@ if __name__ == '__main__':
|
|
| 34 |
count += 1
|
| 35 |
reference = item["reference"]
|
| 36 |
hypothesis = item["inference_result"]
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
cer = distance_sum / reference_sum if reference_sum > 0 else 0
|
| 49 |
print(f"Total Distance: {distance_sum}, Total Reference Length: {reference_sum}, CER: {cer:.4f}")
|
|
|
|
| 26 |
return d, diff
|
| 27 |
|
| 28 |
if __name__ == '__main__':
|
| 29 |
+
import cn2an
|
| 30 |
+
results_list = json.load(open("csv/whisper_emilia_results.json", "r", encoding="utf-8"))
|
| 31 |
count = 0
|
| 32 |
distance_sum = 0
|
| 33 |
reference_sum = 0
|
|
|
|
| 35 |
count += 1
|
| 36 |
reference = item["reference"]
|
| 37 |
hypothesis = item["inference_result"]
|
| 38 |
+
# # 如果是 whisper,使用 cn2an替换数字为中文
|
| 39 |
+
# if re.search(r"\d", hypothesis):
|
| 40 |
+
# hypothesis = cn2an.transform(hypothesis, "an2cn")
|
| 41 |
+
distance, diff = calculate_distance(reference, hypothesis)
|
| 42 |
+
print(f"{count}. distance: {distance}")
|
| 43 |
+
if distance > 0:
|
| 44 |
+
print(f"Audio Path: {item['audio_path']}")
|
| 45 |
+
print(f"Reference: {reference}")
|
| 46 |
+
print(f"Hypothesis: {hypothesis}")
|
| 47 |
+
print(f"Diff: {diff}")
|
| 48 |
+
distance_sum += distance
|
| 49 |
+
reference_sum += len(reference)
|
| 50 |
cer = distance_sum / reference_sum if reference_sum > 0 else 0
|
| 51 |
print(f"Total Distance: {distance_sum}, Total Reference Length: {reference_sum}, CER: {cer:.4f}")
|
scripts/csv/fine-tune_whisper.csv
CHANGED
|
@@ -1,86 +1,86 @@
|
|
| 1 |
-
file_name,
|
| 2 |
-
1.wav,
|
| 3 |
-
2.wav,1.
|
| 4 |
-
3.wav,1.
|
| 5 |
-
4.wav,1.
|
| 6 |
-
5.wav,1.
|
| 7 |
-
6.wav,1.
|
| 8 |
-
7.wav,1.
|
| 9 |
8.wav,1.147,下一位演讲者是来自斯坦福大学计算机科学系的张明教授,0,0.0,下一位演讲者是来自斯坦福大学计算机科学系的张明教授
|
| 10 |
-
9.wav,1.
|
| 11 |
-
10.wav,1.
|
| 12 |
-
11.wav,1.
|
| 13 |
-
12.wav,1.
|
| 14 |
-
13.wav,1.
|
| 15 |
-
14.wav,1.
|
| 16 |
-
15.wav,1.
|
| 17 |
-
16.wav,1.
|
| 18 |
-
17.wav,1.
|
| 19 |
-
18.wav,1.
|
| 20 |
-
19.wav,1.
|
| 21 |
-
20.wav,1.
|
| 22 |
-
21.wav,1.
|
| 23 |
-
22.wav,1.
|
| 24 |
-
23.wav,1.
|
| 25 |
-
24.wav,1.
|
| 26 |
-
25.wav,1.
|
| 27 |
26.wav,1.087,请检查代码库中的依赖向冲突问题,1,0.067,请检查代码库中的依赖[-项-]{+向+}冲突问题
|
| 28 |
-
27.wav,1.
|
| 29 |
-
28.wav,1.
|
| 30 |
-
29.wav,1.
|
| 31 |
-
30.wav,1.
|
| 32 |
-
31.wav,1.
|
| 33 |
-
32.wav,1.
|
| 34 |
-
33.wav,1.
|
| 35 |
-
34.wav,1.
|
| 36 |
-
35.wav,1.
|
| 37 |
-
36.wav,1.
|
| 38 |
-
37.wav,1.
|
| 39 |
-
38.wav,1.
|
| 40 |
-
39.wav,1.
|
| 41 |
-
40.wav,1.
|
| 42 |
-
41.wav,1.198
|
| 43 |
-
42.wav,1.
|
| 44 |
-
43.wav,1.
|
| 45 |
-
44.wav,1.
|
| 46 |
-
45.wav,1.
|
| 47 |
-
46.wav,1.
|
| 48 |
-
47.wav,1.
|
| 49 |
-
48.wav,1.
|
| 50 |
-
49.wav,1.
|
| 51 |
-
50.wav,1.
|
| 52 |
-
51.wav,1.
|
| 53 |
-
52.wav,1.
|
| 54 |
-
53.wav,1.
|
| 55 |
-
54.wav,1.
|
| 56 |
-
55.wav,1.
|
| 57 |
-
56.wav,1.
|
| 58 |
-
57.wav,1.
|
| 59 |
-
58.wav,1.
|
| 60 |
-
59.wav,1.
|
| 61 |
-
60.wav,1.
|
| 62 |
-
61.wav,1.
|
| 63 |
-
62.wav,1.
|
| 64 |
-
63.wav,1.
|
| 65 |
-
64.wav,1.
|
| 66 |
-
65.wav,1.
|
| 67 |
-
66.wav,1.
|
| 68 |
-
67.wav,1.
|
| 69 |
-
68.wav,1.
|
| 70 |
-
69.wav,1.
|
| 71 |
-
70.wav,1.
|
| 72 |
-
71.wav,1.
|
| 73 |
-
72.wav,1.
|
| 74 |
-
73.wav,1.
|
| 75 |
-
74.wav,1.
|
| 76 |
-
75.wav,1.
|
| 77 |
-
76.wav,1.
|
| 78 |
-
77.wav,0.
|
| 79 |
-
78.wav,0.
|
| 80 |
-
79.wav,1.
|
| 81 |
-
80.wav,1.
|
| 82 |
-
81.wav,1.
|
| 83 |
-
82.wav,1.
|
| 84 |
-
83.wav,1.
|
| 85 |
-
84.wav,1.
|
| 86 |
-
85.wav,1.
|
|
|
|
| 1 |
+
file_name,time,inference_result
|
| 2 |
+
1.wav,8.261,您这车开的真够稳的蜗牛都超车了,1,0.067,您这车开[-得-]{+的+}真够稳的蜗牛都超车了
|
| 3 |
+
2.wav,1.097,你可真是个天才把这么简单的事情都搞砸了,0,0.0,你可真是个天才把这么简单的事情都搞砸了
|
| 4 |
+
3.wav,1.235,因网络问题远程讲着音频中断我们切到备用方案共享幻灯片,1,0.038,因网络问题远程讲[-者-]{+着+}音频中断我们切到备用方案共享幻灯片
|
| 5 |
+
4.wav,1.273,这就好比用大炮打蚊子资源分配严重不均衡需要细腻度调度,1,0.038,这就好比用大炮打蚊子资源分配严重不均衡需要细[-粒-]{+腻+}度调度
|
| 6 |
+
5.wav,1.074,言归正传我们来讨论一下核心问题,0,0.0,言归正传我们来讨论一下核心问题
|
| 7 |
+
6.wav,1.081,这有点超出了我们今天讨论的范围,0,0.0,这有点超出了我们今天讨论的范围
|
| 8 |
+
7.wav,1.043,他的年收入约为一百万人民币,0,0.0,他的年收入约为一百万人民币
|
| 9 |
8.wav,1.147,下一位演讲者是来自斯坦福大学计算机科学系的张明教授,0,0.0,下一位演讲者是来自斯坦福大学计算机科学系的张明教授
|
| 10 |
+
9.wav,1.182,请各位将手机调至静音模式演讲结束后有十五分钟提问时间,0,0.0,请各位将手机调至静音模式演讲结束后有十五分钟提问时间
|
| 11 |
+
10.wav,1.115,茶歇将在十点三十分开始地联在二楼休息区,1,0.053,茶歇将在十点三十分开始地[-点-]{+联+}在二楼休息区
|
| 12 |
+
11.wav,1.07,本合同自双方签字盖章之日起生效,0,0.0,本合同自双方签字盖章之日起生效
|
| 13 |
+
12.wav,1.196,政府正在采取措施刺激经济增长和创造就业机会,0,0.0,政府正在采取措施刺激经济增长和创造就业机会
|
| 14 |
+
13.wav,1.154,今天中午我们点外卖吧我想吃宫保鸡丁和麻婆豆腐,0,0.0,今天中午我们点外卖吧我想吃宫保鸡丁和麻婆豆腐
|
| 15 |
+
14.wav,1.405,医生建议我多吃粗粮比如燕麦和紫薯少吃高油高糖的油炸食品像炸鸡和甜甜圈,0,0.0,医生建议我多吃粗粮比如燕麦和紫薯少吃高油高糖的油炸食品像炸鸡和甜甜圈
|
| 16 |
+
15.wav,1.235,本方案采用液构计算架构结合GPU与FPA加速漳量运算,3,0.111,本方案采用[-异-]{+液+}构计算架构结合gpu与fp[-g-]a加速[-张-]{+漳+}量运算
|
| 17 |
+
16.wav,1.28,推力延迟稳定在五毫秒以内吞吐量提升百分之四十功耗降低一点八瓦,10,0.357,推[-理-]{+力+}延迟稳定在[-5-]{+五+}毫秒以内吞吐量提升[-40%-]{+百分之四十+}功耗降低[-1.8-]{+一点八+}瓦
|
| 18 |
+
17.wav,1.325,相比sota模型我们的方法在小样本场景下召回率高出十二个百分点且参数量仅为其三分之一,2,0.048,相比sota模型我们的方法在小样本场景下召回率高出[-12-]{+十二+}个百分点且参数量仅为其三分之一
|
| 19 |
+
18.wav,1.301,当解脸并发数超过一万时内存带宽会呈平静导致伪延齿急剧上升,12,0.387,当[-节点-]{+解脸+}并发数超过[-10000-]{+一万+}时内存带宽会[-成瓶颈-]{+呈平静+}导致[-尾-]{+伪+}延[-迟-]{+齿+}急剧上升
|
| 20 |
+
19.wav,1.477,请看图三蓝色柱是基线模型红色线是我们的优化结果交叉点表明在第十五轮迭代后优势显著,3,0.075,请看图[-3-]{+三+}蓝色柱是基线模型红色线是我们的优化结果交叉点表明在第[-15-]{+十五+}轮迭代后优势显著
|
| 21 |
+
20.wav,1.261,您提到模型泛化性提升是否在跨模态数据上验证过量化指标是多少,0,0.0,您提到模型泛化性提升是否在跨模态数据上验证过量化指标是多少
|
| 22 |
+
21.wav,1.338,容我追问一点如果输入数据存在对抗样本您方案的鲁邦性如何保证失效概率有测试吗,1,0.027,容我追问一点如果输入数据存在对抗样本您方案的鲁[-棒-]{+邦+}性如何保证失效概率有测试吗
|
| 23 |
+
22.wav,1.254,抱歉打断您说的动态简直是指训练中还是推理中预值是自适应的吗,3,0.103,抱歉打断您说的动态[-剪枝-]{+简直+}是指训练中还是推理中[-阈-]{+预+}值是自适应的吗
|
| 24 |
+
23.wav,1.242,成本效益是否可行不属这种定制芯片需要牛片费用中小客户怎么承担,3,0.1,成本效益是否可行[-部署-]{+不属+}这种定制芯片需要[-流-]{+牛+}片费用中小客户怎么承担
|
| 25 |
+
24.wav,1.263,我补充一个角度隐私计算领域也在用类似思路是否可能跨领域合作,0,0.0,我补充一个角度隐私计算领域也在用类似思路是否可能跨领域合作
|
| 26 |
+
25.wav,1.259,端到端优化流水线涵盖数据清洗特征工程模型压缩部署监控,0,0.0,端到端优化流水线涵盖数据清洗特征工程模型压缩部署监控
|
| 27 |
26.wav,1.087,请检查代码库中的依赖向冲突问题,1,0.067,请检查代码库中的依赖[-项-]{+向+}冲突问题
|
| 28 |
+
27.wav,1.125,这个电路板集成了微控制器传感器和无线通信模块,0,0.0,这个电路板集成了微控制器传感器和无线通信模块
|
| 29 |
+
28.wav,1.132,区块链技术的核心在于去中心化和加密安全性,0,0.0,区块链技术的核心在于去中心化和加密安全性
|
| 30 |
+
29.wav,1.11,需符合等保三级要求数据出境要走安全评估,0,0.0,需符合等保三级要求数据出境要走安全评估
|
| 31 |
+
30.wav,1.081,最新数据显示消费者信心指数有所回升,0,0.0,最新数据显示消费者信心指数有所回升
|
| 32 |
+
31.wav,1.142,专家预测新能源汽车市场将迎来爆发式增长,0,0.0,专家预测新能源汽车市场将迎来爆发式增长
|
| 33 |
+
32.wav,1.271,多模态大模型能够同时处理文本图像和音频信号实现真正的跨模态理解,0,0.0,多模态大模型能够同时处理文本图像和音频信号实现真正的跨模态理解
|
| 34 |
+
33.wav,1.252,联邦学习可以在保护用户隐私的前提下实现分布式模型的协同训练,0,0.0,联邦学习可以在保护用户隐私的前提下实现分布式模型的协同训练
|
| 35 |
+
34.wav,1.195,我们研发的五十量子比特处理器在特定算法上展现出了量子优越性,0,0.0,我们研发的五十量子比特处理器在特定算法上展现出了量子优越性
|
| 36 |
+
35.wav,1.17,变分量子本真值求解器在化学模拟中显示出巨大潜力,1,0.043,变分量子本[-征-]{+真+}值求解器在化学模拟中显示出巨大潜力
|
| 37 |
+
36.wav,1.195,边缘AI推理的延迟已经可以控制在十毫秒以内满足实时应用需求,0,0.0,边缘ai推理的延迟已经可以控制在十毫秒以内满足实时应用需求
|
| 38 |
+
37.wav,1.304,我们设计的新型神经网诺压缩算法在保持精度的同时将模型大小减少了百分之七十,1,0.028,我们设计的新型神经网[-络-]{+诺+}压缩算法在保持精度的同时将模型大小减少了百分之七十
|
| 39 |
+
38.wav,1.31,损失函数结合了交叉伤损失和对比损失权重系数设置为零点三和零点七,1,0.032,损失函数结合了交叉[-熵-]{+伤+}损失和对比损失权重系数设置为零点三和零点七
|
| 40 |
+
39.wav,1.372,在五个标准数据级上的实验结果表明我们的方法平均比现有最佳方法提升了二点三个百分点,1,0.025,在五个标准数据[-集-]{+级+}上的实验结果表明我们的方法平均比现有最佳方法提升了二点三个百分点
|
| 41 |
+
40.wav,1.294,消融食盐证实了每个模块的有效性移除注意力机制会导致性能下降百分之四点六,2,0.057,消融[-实验-]{+食盐+}证实了每个模块的有效性移除注意力机制会导致性能下降百分之四点六
|
| 42 |
+
41.wav,1.198,模型的参数量为一点二亿在八张一百显卡上训练了七十二小时,4,0.138,模型的参数量为一点二亿在八张[-a100-]{+一百+}显卡上训练了七十二小时
|
| 43 |
+
42.wav,1.052,总之搞定了数据倾斜性能就上去了,0,0.0,总之搞定了数据倾斜性能就上去了
|
| 44 |
+
43.wav,1.541,OpenAI Whisper需要FFMPEG的环境FFMPEG是一个开源的跨平台因视频处理工具和框架可以用来录制转换和流式传输音视频内容,2,0.029,openai[---]whisper需要ffmpeg的环境ffmpeg是一个开源的跨平台[-音-]{+因+}视频处理工具和框架可以用来录制转换和流式传输音视频内容
|
| 45 |
+
44.wav,1.242,采用Transformer训练到训练模型可以实现针对不同的语言处理任务,4,0.114,采用transformer[-序列-]{+训练+}到[-序列-]{+训练+}模型可以实现针对不同的语言处理任务
|
| 46 |
+
45.wav,1.303,Transformer架构在自然语言处理中的成功应用已经彻底改变了预训练模型的范式,0,0.0,transformer架构在自然语言处理中的成功应用已经彻底改变了预训练模型的范式
|
| 47 |
+
46.wav,1.238,请大家注意workshop材料已经上传至会议系统代码仓库链接在附入页,1,0.029,请大家注意workshop材料已经上传至会议系统代码仓库链接在附[-录-]{+入+}页
|
| 48 |
+
47.wav,1.285,别造轮子了直接调用优劳威武开元库的预训练权重快速迭代才是王道,7,0.219,别造轮子了直接调用[-yolov5-]{+优劳威武+}开[-源-]{+元+}库的预训练权重快速迭代才是王道
|
| 49 |
+
48.wav,1.225,请确保您的设备已连接到五GHz wifi频段亦获得最佳性能,3,0.103,请确保您的设备已连接到[-5-]{+五+}ghzwi[---]fi频段[-以-]{+亦+}获得最佳性能
|
| 50 |
+
49.wav,1.434,我们提出了一种基于对比学习的自监督方法在ImageNet数据集上达到了百分之九十二点五的准确���,0,0.0,我们提出了一种基于对比学习的自监督方法在imagenet数据集上达到了百分之九十二点五的准确率
|
| 51 |
+
50.wav,1.355,据比利弗尔在代码生成和多部推理任务上展现出令人印象深刻的能力,6,0.2,[-gpt-4-]{+据比利弗尔+}在代码生成和多[-步-]{+部+}推理任务上展现出令人印象深刻的能力
|
| 52 |
+
51.wav,1.487,若在高病发场景下为启用我们的动态缓存机制即使用RDMA网络延迟也可能因CPU调度增强而恶化,4,0.089,若在高[-并-]{+病+}发场景下[-未-]{+为+}启用我们的动态缓存机制即使用rdma网络延迟也可能因cpu调度[-争抢-]{+增强+}而恶化
|
| 53 |
+
52.wav,1.144,准确说峰值钻力是两百托斯不是刚才说的一百五,11,0.458,准确说峰值[-算-]{+钻+}力是[-200tops-]{+两百托斯+}不是刚才说的[-150-]{+一百五+}
|
| 54 |
+
53.wav,1.233,秦始皇营镇书童文车童轨奠定了中国大一统的基础,4,0.182,秦始皇[-嬴政-]{+营镇+}书[-同-]{+童+}文车[-同-]{+童+}轨奠定了中国大一统的基础
|
| 55 |
+
54.wav,1.278,诸葛亮在出师表中写道鞠躬尽瘁死而后已成为后世臣子的楷模,0,0.0,诸葛亮在出师表中写道鞠躬尽瘁死而后已成为后世臣子的楷模
|
| 56 |
+
55.wav,1.227,李白的举头望明月低头四故乡是连三岁孩童都能背诵的诗句,1,0.038,李白的举头望明月低头[-思-]{+四+}故乡是连三岁孩童都能背诵的诗句
|
| 57 |
+
56.wav,1.268,孔子曾说己所不欲勿施于人这简单的八个字构成了儒家伦理的基石,0,0.0,孔子曾说己所不欲勿施于人这简单的八个字构成了儒家伦理的基石
|
| 58 |
+
57.wav,1.233,王羲之背后人尊为殊胜其代表作南庭集序被誉为天下第一行书,5,0.185,王羲之[-被-]{+背+}后人尊为[-书圣-]{+殊胜+}其代表作[-兰亭-]{+南庭+}集序被誉为天下第一行书
|
| 59 |
+
58.wav,1.298,我们要学习跑丁解牛的精神掌握事物的客观规律才能游刃有馀,2,0.074,我们要学习[-庖-]{+跑+}丁解牛的精神掌握事物的客观规律才能游刃有[-余-]{+馀+}
|
| 60 |
+
59.wav,1.377,项羽在鸿门宴上优柔寡断放走了刘邦最终兵败乌江自吻,1,0.042,项羽在鸿门宴上优柔寡断放走了刘邦最终兵败乌江自[-刎-]{+吻+}
|
| 61 |
+
60.wav,1.326,在杭州西湖畔人们总会想起苏氏治理西湖修筑苏堤的往事,1,0.04,在杭州西湖畔人们总会想起苏[-轼-]{+氏+}治理西湖修筑苏堤的往事
|
| 62 |
+
61.wav,1.191,我计划去爱菲尔铁塔和卢浮宫参观,1,0.067,我计划去[-埃-]{+爱+}菲尔铁塔和卢浮宫参观
|
| 63 |
+
62.wav,1.219,莎士比亚的戏剧深刻地探讨了人性的复杂性,0,0.0,莎士比亚的戏剧深刻地探讨了人性的复杂性
|
| 64 |
+
63.wav,1.111,这个消息让他丈二和尚摸不着头脑,0,0.0,这个消息让他丈二和尚摸不着头脑
|
| 65 |
+
64.wav,1.137,画龙点睛之笔让整个设计焕然一新,0,0.0,画龙点睛之笔让整个设计焕然一新
|
| 66 |
+
65.wav,1.063,这是卡泊子技术必须自主研发,1,0.077,这是卡[-脖-]{+泊+}子技术必须自主研发
|
| 67 |
+
66.wav,1.175,这幅画作以其独特的色彩运用和构图技巧而闻名,0,0.0,这幅画作以其独特的色彩运用和构图技巧而闻名
|
| 68 |
+
67.wav,1.091,患者主塑间歇性胸痛放射至左臂,1,0.071,患者主[-诉-]{+塑+}间歇性胸痛放射至左臂
|
| 69 |
+
68.wav,1.305,患者有冠状动脉周样硬化性心脏病病史十年慢性阻塞性肺疾病病史五年,1,0.032,患者有冠状动脉[-粥-]{+周+}样硬化性心脏病病史十年慢性阻塞性肺疾病病史五年
|
| 70 |
+
69.wav,1.144,建议行冠状动脉造影检查必要时植入支架,0,0.0,建议行冠状动脉造影检查必要时植入支架
|
| 71 |
+
70.wav,1.182,患者需低盐低脂糖尿病饮食监测血压血糖变化,0,0.0,患者需低盐低脂糖尿病饮食监测血压血糖变化
|
| 72 |
+
71.wav,1.332,胸部细梯频道显示双肺散在膜玻璃样密度以胸膜下分布为主,6,0.222,胸部[-ct平扫-]{+细梯频道+}显示双肺散在[-磨-]{+膜+}玻璃样密度[-影-]以胸膜下分布为主
|
| 73 |
+
72.wav,1.423,知识产权归属约定义方在履行本合同过程中所产生的全部智力成果其知识产权归甲方所有,2,0.05,知识产权归属约定[-乙-]{+义+}方在履行本合同过程中所产生的全部智力成果其知识产权[-均-]归甲方所有
|
| 74 |
+
73.wav,1.507,双方应履行本合同法生争议的应首先通过友好协商解决协商不成的任何一方均有权向有管辖权的人民法院提起诉讼,2,0.04,双方[-因-]{+应+}履行本合同[-发-]{+法+}生争议的应首先通过友好协商解决协商不成的任何一方均有权向有管辖权的人民法院提起诉讼
|
| 75 |
+
74.wav,1.289,被告在法定期限内未提交答编状亦未到庭参加诉讼本院依法缺席审理,1,0.033,被告在法定期限内未提交答[-辩-]{+编+}状亦未到庭参加诉讼本院依法缺席审理
|
| 76 |
+
75.wav,1.58,原告向本院提出诉讼请求一判令被告支付货款人民币五十万八千元及逾期付款利息二判令被告承担本案的诉讼费用,0,0.0,原告向本院提出诉讼请求一判令被告支付货款人民币五十万八千元及逾期付款利��二判令被告承担本案的诉讼费用
|
| 77 |
+
76.wav,1.545,被执行人未按执行通知履行法律文书确定的义务人民法院有权查封扣押冻结拍卖被执行人的财产,0,0.0,被执行人未按执行通知履行法律文书确定的义务人民法院有权查封扣押冻结拍卖被执行人的财产
|
| 78 |
+
77.wav,0.981,这个视频太上头了,0,0.0,这个视频太上头了
|
| 79 |
+
78.wav,0.942,他真是个社恐,0,0.0,他真是个社恐
|
| 80 |
+
79.wav,1.064,简直了这躺平的状态也太佛系了吧,0,0.0,简直了这躺平的状态也太佛系了吧
|
| 81 |
+
80.wav,1.044,这个瓜有点大我得去吃瓜了,0,0.0,这个瓜有点大我得去吃瓜了
|
| 82 |
+
81.wav,1.074,别内卷了咱们还是多交流交流吧,0,0.0,别内卷了咱们还是多交流交流吧
|
| 83 |
+
82.wav,1.347,第二个公司我们成立了中国黄烨在中国黄烨的创业经验中有很多的经验也是可以在这儿跟大家进行分享的,2,0.043,第二个公司我们成立了中国黄[-页-]{+烨+}在中国黄[-页-]{+烨+}的创业经验中有很多的经验也是可以在这儿跟大家进行分享的
|
| 84 |
+
83.wav,1.314,对于大部分在接触微积分之前主要的学习经验就是刷题甚至是连题也不刷的同学们来说,0,0.0,对于大部分在接触微积分之前主要的学习经验就是刷题甚至是连题也不刷的同学们来说
|
| 85 |
+
84.wav,1.472,说了两个小时没人听懂我在说什么最后二十三个人反对一个人同意这一个人就说马云你这样做你就试试看不行的话赶紧逃回来还来得及,0,0.0,说了两个小时没人听懂我在说什么最后二十三个人反对一个人同意这一个人就说马云你这样做你就试试看不行的话赶紧逃回来还来得及
|
| 86 |
+
85.wav,1.2,晚上想想是热血沸腾真好第二天早上骑个自行车又上班去了对吧,0,0.0,晚上想想是热血沸腾真好第二天早上骑个自行车又上班去了对吧
|
scripts/funasr_utils.py
CHANGED
|
@@ -44,7 +44,7 @@ def run_funasr():
|
|
| 44 |
)
|
| 45 |
t1 = time.time()
|
| 46 |
print("load model: ", t1 - t0)
|
| 47 |
-
audios = Path("/Users/jeqin/work/code/TestTranslator/
|
| 48 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 49 |
for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")):
|
| 50 |
print(audio)
|
|
|
|
| 44 |
)
|
| 45 |
t1 = time.time()
|
| 46 |
print("load model: ", t1 - t0)
|
| 47 |
+
audios = Path("/Users/jeqin/work/code/TestTranslator/test_data/test_audios")
|
| 48 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 49 |
for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")):
|
| 50 |
print(audio)
|
scripts/recorder.py
CHANGED
|
@@ -3,8 +3,8 @@ import soundfile as sf
|
|
| 3 |
import sys
|
| 4 |
import re
|
| 5 |
|
| 6 |
-
TEXT_FILE = '/Users/jeqin/work/code/TestTranslator/
|
| 7 |
-
AUDIO_FOLDER= '/Users/jeqin/work/code/TestTranslator/
|
| 8 |
SAMPLE_RATE = 16000
|
| 9 |
CHANNELS = 1
|
| 10 |
|
|
|
|
| 3 |
import sys
|
| 4 |
import re
|
| 5 |
|
| 6 |
+
TEXT_FILE = '/Users/jeqin/work/code/TestTranslator/test_data/text/test_asr_zh_with_index.txt'
|
| 7 |
+
AUDIO_FOLDER= '/Users/jeqin/work/code/TestTranslator/test_data/recordings'
|
| 8 |
SAMPLE_RATE = 16000
|
| 9 |
CHANNELS = 1
|
| 10 |
|
scripts/run_funasr.py
CHANGED
|
@@ -25,7 +25,7 @@ def main():
|
|
| 25 |
)
|
| 26 |
t1 = time.time()
|
| 27 |
print("load model: ", t1 - t0)
|
| 28 |
-
audios = Path("/Users/jeqin/work/code/TestTranslator/
|
| 29 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 30 |
for audio in sorted(audios.glob("*mix/*")):
|
| 31 |
print(audio)
|
|
|
|
| 25 |
)
|
| 26 |
t1 = time.time()
|
| 27 |
print("load model: ", t1 - t0)
|
| 28 |
+
audios = Path("/Users/jeqin/work/code/TestTranslator/test_data/test_audios/")
|
| 29 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 30 |
for audio in sorted(audios.glob("*mix/*")):
|
| 31 |
print(audio)
|
scripts/run_funasr_c.py
CHANGED
|
@@ -22,7 +22,7 @@ def main():
|
|
| 22 |
asr.init()
|
| 23 |
t1 = time.time()
|
| 24 |
print("Initializing model: ", t1-t0)
|
| 25 |
-
audios = Path("/Users/jeqin/work/code/TestTranslator/
|
| 26 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 27 |
for audio in sorted(audios.glob("*s-ac1/Chinese*")):
|
| 28 |
print(audio)
|
|
|
|
| 22 |
asr.init()
|
| 23 |
t1 = time.time()
|
| 24 |
print("Initializing model: ", t1-t0)
|
| 25 |
+
audios = Path("/Users/jeqin/work/code/TestTranslator/test_data/test_audios/")
|
| 26 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 27 |
for audio in sorted(audios.glob("*s-ac1/Chinese*")):
|
| 28 |
print(audio)
|
scripts/run_funasr_quant.py
CHANGED
|
@@ -41,15 +41,15 @@ def inference(vad_model, asr_model, punc_model, audio:Path):
|
|
| 41 |
t4 = time.time()
|
| 42 |
# print("punc time:", t4-t3)
|
| 43 |
# print("punc text:", text)
|
| 44 |
-
print(text)
|
| 45 |
t = t4-t1
|
| 46 |
-
print("inference:", t)
|
| 47 |
return text, t
|
| 48 |
|
| 49 |
def run_recordings():
|
| 50 |
quantize = True
|
| 51 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 52 |
-
audios = Path("../
|
| 53 |
rows = [["file_name", "time", "inference_result"]]
|
| 54 |
original = get_origin_text_dict()
|
| 55 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
@@ -62,7 +62,7 @@ def run_recordings():
|
|
| 62 |
def run_test_audios():
|
| 63 |
quantize = True
|
| 64 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 65 |
-
audios = Path("../
|
| 66 |
rows = [["file_name", "time", "inference_result"]]
|
| 67 |
for audio in sorted(audios.glob("*s/zh*.wav")):
|
| 68 |
text, t = inference(vad_model, asr_model, punc_model, audio)
|
|
@@ -74,8 +74,8 @@ def run_test_dataset():
|
|
| 74 |
from scripts.asr_utils import read_dataset
|
| 75 |
quantize = True
|
| 76 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 77 |
-
test_data = Path("../
|
| 78 |
-
audio_parent = Path("../
|
| 79 |
rows = [["file_name", "time", "inference_result"]]
|
| 80 |
result_list = []
|
| 81 |
count = 0
|
|
@@ -105,5 +105,35 @@ def run_test_dataset():
|
|
| 105 |
with open("csv/funasr_dataset_results.json", "w", encoding="utf-8") as f:
|
| 106 |
json.dump(result_list, f, ensure_ascii=False, indent=2)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
if __name__ == '__main__':
|
| 109 |
-
|
|
|
|
| 41 |
t4 = time.time()
|
| 42 |
# print("punc time:", t4-t3)
|
| 43 |
# print("punc text:", text)
|
| 44 |
+
# print(text)
|
| 45 |
t = t4-t1
|
| 46 |
+
# print("inference:", t)
|
| 47 |
return text, t
|
| 48 |
|
| 49 |
def run_recordings():
|
| 50 |
quantize = True
|
| 51 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 52 |
+
audios = Path("../test_data/recordings/")
|
| 53 |
rows = [["file_name", "time", "inference_result"]]
|
| 54 |
original = get_origin_text_dict()
|
| 55 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
|
|
| 62 |
def run_test_audios():
|
| 63 |
quantize = True
|
| 64 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 65 |
+
audios = Path("../test_data/test_audios/")
|
| 66 |
rows = [["file_name", "time", "inference_result"]]
|
| 67 |
for audio in sorted(audios.glob("*s/zh*.wav")):
|
| 68 |
text, t = inference(vad_model, asr_model, punc_model, audio)
|
|
|
|
| 74 |
from scripts.asr_utils import read_dataset
|
| 75 |
quantize = True
|
| 76 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 77 |
+
test_data = Path("../test_data/dataset.txt")
|
| 78 |
+
audio_parent = Path("../test_data/")
|
| 79 |
rows = [["file_name", "time", "inference_result"]]
|
| 80 |
result_list = []
|
| 81 |
count = 0
|
|
|
|
| 105 |
with open("csv/funasr_dataset_results.json", "w", encoding="utf-8") as f:
|
| 106 |
json.dump(result_list, f, ensure_ascii=False, indent=2)
|
| 107 |
|
| 108 |
+
def run_test_emilia():
|
| 109 |
+
from scripts.asr_utils import read_emilia
|
| 110 |
+
quantize = True
|
| 111 |
+
vad_model, asr_model, punc_model = load_model(quantize)
|
| 112 |
+
parent = Path("../test_data/ZH-B000000")
|
| 113 |
+
result_list = []
|
| 114 |
+
count = 0
|
| 115 |
+
try:
|
| 116 |
+
for audio_path, sentence, duration in read_emilia(parent, count_limit=5000):
|
| 117 |
+
count += 1
|
| 118 |
+
print(f"processing {count}: {audio_path.name}")
|
| 119 |
+
text, t = inference(vad_model, asr_model, punc_model, audio_path)
|
| 120 |
+
print("inference time:", t)
|
| 121 |
+
print(text)
|
| 122 |
+
result_list.append({
|
| 123 |
+
"index": count,
|
| 124 |
+
"audio_path": audio_path.name,
|
| 125 |
+
"reference": sentence,
|
| 126 |
+
"duration": duration,
|
| 127 |
+
"inference_time": round(t, 3),
|
| 128 |
+
"inference_result": text
|
| 129 |
+
})
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(e)
|
| 132 |
+
except KeyboardInterrupt as e:
|
| 133 |
+
print(e)
|
| 134 |
+
import json
|
| 135 |
+
with open("csv/funasr_emilia_results.json", "w", encoding="utf-8") as f:
|
| 136 |
+
json.dump(result_list, f, ensure_ascii=False, indent=2)
|
| 137 |
+
|
| 138 |
if __name__ == '__main__':
|
| 139 |
+
run_test_emilia()
|
scripts/run_whisper.py
CHANGED
|
@@ -32,7 +32,7 @@ def load_model():
|
|
| 32 |
|
| 33 |
def run_recordings():
|
| 34 |
model = load_model()
|
| 35 |
-
audios = Path("../
|
| 36 |
rows = [["file_name", "time", "inference_result"]]
|
| 37 |
original = get_origin_text_dict()
|
| 38 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
@@ -53,7 +53,7 @@ def run_recordings():
|
|
| 53 |
def run_test_audios():
|
| 54 |
model = load_model()
|
| 55 |
lang = "zh"
|
| 56 |
-
audios = Path("../
|
| 57 |
rows = [["file_name", "time", "inference_result"]]
|
| 58 |
for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
|
| 59 |
print(audio)
|
|
@@ -69,8 +69,8 @@ def run_test_audios():
|
|
| 69 |
def run_test_dataset():
|
| 70 |
from scripts.asr_utils import read_dataset
|
| 71 |
model = load_model()
|
| 72 |
-
test_data = Path("../
|
| 73 |
-
audio_parent = Path("../
|
| 74 |
rows = [["file_name", "time", "inference_result"]]
|
| 75 |
result_list = []
|
| 76 |
count = 0
|
|
@@ -104,7 +104,7 @@ def run_test_dataset():
|
|
| 104 |
def run_test_emilia():
|
| 105 |
from scripts.asr_utils import read_emilia
|
| 106 |
model = load_model()
|
| 107 |
-
parent = Path("../
|
| 108 |
result_list = []
|
| 109 |
count = 0
|
| 110 |
try:
|
|
|
|
| 32 |
|
| 33 |
def run_recordings():
|
| 34 |
model = load_model()
|
| 35 |
+
audios = Path("../test_data/recordings/")
|
| 36 |
rows = [["file_name", "time", "inference_result"]]
|
| 37 |
original = get_origin_text_dict()
|
| 38 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
|
|
| 53 |
def run_test_audios():
|
| 54 |
model = load_model()
|
| 55 |
lang = "zh"
|
| 56 |
+
audios = Path("../test_data/test_audios/")
|
| 57 |
rows = [["file_name", "time", "inference_result"]]
|
| 58 |
for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
|
| 59 |
print(audio)
|
|
|
|
| 69 |
def run_test_dataset():
|
| 70 |
from scripts.asr_utils import read_dataset
|
| 71 |
model = load_model()
|
| 72 |
+
test_data = Path("../test_data/dataset.txt")
|
| 73 |
+
audio_parent = Path("../test_data/")
|
| 74 |
rows = [["file_name", "time", "inference_result"]]
|
| 75 |
result_list = []
|
| 76 |
count = 0
|
|
|
|
| 104 |
def run_test_emilia():
|
| 105 |
from scripts.asr_utils import read_emilia
|
| 106 |
model = load_model()
|
| 107 |
+
parent = Path("../test_data/ZH-B000000")
|
| 108 |
result_list = []
|
| 109 |
count = 0
|
| 110 |
try:
|
scripts/run_whisper_finetuned.py
CHANGED
|
@@ -139,7 +139,7 @@ def load_model():
|
|
| 139 |
|
| 140 |
def run_test_audios():
|
| 141 |
model, processor = load_model()
|
| 142 |
-
audios = Path("../
|
| 143 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 144 |
for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
|
| 145 |
try:
|
|
@@ -158,7 +158,7 @@ def run_test_audios():
|
|
| 158 |
def run_recordings():
|
| 159 |
from scripts.asr_utils import get_origin_text_dict, get_text_distance
|
| 160 |
model, processor = load_model()
|
| 161 |
-
audios = Path("../
|
| 162 |
rows = [["file_name", "time", "inference_result"]]
|
| 163 |
original = get_origin_text_dict()
|
| 164 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
@@ -181,8 +181,8 @@ def run_recordings():
|
|
| 181 |
def run_test_dataset():
|
| 182 |
from scripts.asr_utils import read_dataset
|
| 183 |
model, processor = load_model()
|
| 184 |
-
test_data = Path("../
|
| 185 |
-
audio_parent = Path("../
|
| 186 |
rows = [["file_name", "time", "inference_result"]]
|
| 187 |
result_list = []
|
| 188 |
count = 0
|
|
|
|
| 139 |
|
| 140 |
def run_test_audios():
|
| 141 |
model, processor = load_model()
|
| 142 |
+
audios = Path("../test_data/test_audios/")
|
| 143 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 144 |
for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
|
| 145 |
try:
|
|
|
|
| 158 |
def run_recordings():
|
| 159 |
from scripts.asr_utils import get_origin_text_dict, get_text_distance
|
| 160 |
model, processor = load_model()
|
| 161 |
+
audios = Path("../test_data/recordings/")
|
| 162 |
rows = [["file_name", "time", "inference_result"]]
|
| 163 |
original = get_origin_text_dict()
|
| 164 |
for audio in sorted(audios.glob("*.wav"), key=lambda x: int(x.stem)):
|
|
|
|
| 181 |
def run_test_dataset():
|
| 182 |
from scripts.asr_utils import read_dataset
|
| 183 |
model, processor = load_model()
|
| 184 |
+
test_data = Path("../test_data/dataset.txt")
|
| 185 |
+
audio_parent = Path("../test_data/")
|
| 186 |
rows = [["file_name", "time", "inference_result"]]
|
| 187 |
result_list = []
|
| 188 |
count = 0
|
scripts/split_audio.py
CHANGED
|
@@ -14,7 +14,7 @@ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
|
|
| 14 |
return ret
|
| 15 |
|
| 16 |
|
| 17 |
-
current = Path("/Users/jeqin/work/code/TestTranslator/
|
| 18 |
audios_5s = current/"5s"
|
| 19 |
audios_10s = current/"10s"
|
| 20 |
if not audios_5s.exists():
|
|
|
|
| 14 |
return ret
|
| 15 |
|
| 16 |
|
| 17 |
+
current = Path("/Users/jeqin/work/code/TestTranslator/test_data/test_audios")
|
| 18 |
audios_5s = current/"5s"
|
| 19 |
audios_10s = current/"10s"
|
| 20 |
if not audios_5s.exists():
|
temp.py
CHANGED
|
@@ -1,14 +1,47 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from huggingface_hub import snapshot_download
|
| 6 |
+
|
| 7 |
+
# HF_ENDPOINT=https://hf-mirror.com python download_llm_copy.py
|
| 8 |
+
# local_dir = "/Users/test/yujuan/llm/models"
|
| 9 |
+
|
| 10 |
+
local_dir = "/Users/jeqin/work/code/"
|
| 11 |
+
repos = {
|
| 12 |
+
"TheBloke/Llama-2-7B-GGUF":["llama-2-7b.Q4_K_M.gguf"],
|
| 13 |
+
# "Qwen/Qwen3-235B-A22B-GGUF": ["Q8_0/Qwen3-235B-A22B-Q8_0-00002-of-00009.gguf","Q8_0/Qwen3-235B-A22B-Q8_0-00004-of-00009.gguf"]
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
def download(repo, files):
|
| 17 |
+
folder = repo.split('/')[-1]
|
| 18 |
+
print(f"start download {repo}")
|
| 19 |
+
for retry in range(5):
|
| 20 |
+
try:
|
| 21 |
+
if not files:
|
| 22 |
+
# 下载整个 repo
|
| 23 |
+
snapshot_download(repo_id=repo, local_dir=f"{local_dir}/{folder}",
|
| 24 |
+
#ignore_patterns=["original",]
|
| 25 |
+
# allow_patterns=None,
|
| 26 |
+
local_dir_use_symlinks=False)
|
| 27 |
+
print(f"download {repo} finished")
|
| 28 |
+
break
|
| 29 |
+
else:
|
| 30 |
+
# 下载列表中指定的文件
|
| 31 |
+
snapshot_download(repo_id=repo, local_dir=f"{local_dir}/{folder}", allow_patterns=files,
|
| 32 |
+
local_dir_use_symlinks=False)
|
| 33 |
+
print(f"download {repo} finished")
|
| 34 |
+
break
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(e)
|
| 37 |
+
print(f"download failed, retry: {retry + 1}")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def main():
|
| 41 |
+
for repo, files in repos.items():
|
| 42 |
+
download(repo, files)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
if __name__ == '__main__':
|
| 46 |
+
main()
|
| 47 |
+
|
{tests/test_data → test_data}/.gitattributes
RENAMED
|
File without changes
|
{tests/test_data → test_data}/__init__.py
RENAMED
|
File without changes
|
{tests/test_data → test_data}/dataset.txt
RENAMED
|
File without changes
|
{tests/test_data → test_data}/test_audios.zip
RENAMED
|
File without changes
|
{tests/test_data → test_data}/text/test_asr_zh.txt
RENAMED
|
File without changes
|
{tests/test_data → test_data}/text/test_asr_zh_with_index.txt
RENAMED
|
File without changes
|
{tests/test_data → test_data}/text/test_translation_en.txt
RENAMED
|
File without changes
|
{tests/test_data → test_data}/text/test_translation_zh.txt
RENAMED
|
File without changes
|
tests/test_app/__init__.py
ADDED
|
File without changes
|
tests/{conftest.py → test_app/conftest.py}
RENAMED
|
File without changes
|
tests/{test_accuracy.py → test_app/test_accuracy.py}
RENAMED
|
File without changes
|
tests/{test_accuracy_and_delay.py → test_app/test_accuracy_and_delay.py}
RENAMED
|
File without changes
|
tests/{test_delay.py → test_app/test_delay.py}
RENAMED
|
File without changes
|
tests/{test_logfile.py → test_app/test_logfile.py}
RENAMED
|
File without changes
|
tests/test_data/dataset_bkp.txt
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tests/test_data/recordings/1.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:77f6f1006e789f69f30f9047d5d93f2cbc58012f1a41a21ebfc12b93c2de7d89
|
| 3 |
-
size 141854
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/10.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5ce3be6f2ae27a19ee56eafb95ac3ca62b7946c4733e8aa542e74f36d06b036b
|
| 3 |
-
size 184664
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/11.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:6720957bd8bcba4515cc3e416d9898c38f2dbdb377697ef69a528e621e11d0a1
|
| 3 |
-
size 158234
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/12.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c686cd5322384e36747647f228cd023a77bc323cc26fab9f7fcd351f93019dc3
|
| 3 |
-
size 201614
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/13.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:baf0c19d4729ecba8057a2292cdb117ec9c4836aaa5aabbb9def63ef1d7ffbb3
|
| 3 |
-
size 201674
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/14.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:44baf4bdd176224377b4318400d5ac8a4947517fe09b0e16619d63d77b18a631
|
| 3 |
-
size 321194
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/15.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b5abf82f0fa3ce6737f7cdae83931b78295bf2adc48f394d1fa6c7193c76e879
|
| 3 |
-
size 252284
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/16.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:3323eeefe1f1219cf2db94a41e0370f699f66e495c390b5d0a8576748955a220
|
| 3 |
-
size 278594
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/17.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8654234ed4a01ef2b0b32779757da30c5c6929325977c0d7640d40216d12abd5
|
| 3 |
-
size 381464
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/18.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7ece527a8f191c88bda2800715898ae5e3c2ec13b86d43467ec9e37a53b8e77c
|
| 3 |
-
size 284024
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/19.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d5d57a7b312033cc19cc7f7a6db06b01b6d37e191a58588b9b32a7ab98032aa3
|
| 3 |
-
size 367964
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/2.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:31be8ce54c599118787494d88e481e511dab05d45d1a7383710ceecf3db7569d
|
| 3 |
-
size 149924
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/20.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:76acbe509263e26fd076bab325cde6b45c52d792a562180ddb9b5a4da62beec7
|
| 3 |
-
size 274964
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/21.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e143f1d398c8f144324767474805d69b9a1f285734f354d1ff10824f4f869b70
|
| 3 |
-
size 313754
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/22.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:bf10a0bd8f9b202a5fc32f467d7a55c4029675a28f428970a2149f50a315f112
|
| 3 |
-
size 272714
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/23.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c17c5316b39779bcb84d0aa1593c05fc22907697f6d6c7236c7a7260166e406b
|
| 3 |
-
size 266204
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/24.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cb1dba9272cd68902b20e705ce687b6cce5e6ef79a5427b7a1bcbb0532137227
|
| 3 |
-
size 297314
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/25.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:b89d3440b113f0db28ec55776d3ca97cdc430ea9d1a13023bfec0d5aba5cabe9
|
| 3 |
-
size 270434
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_data/recordings/26.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a08d7996a8485369483e7e6f2e19e7c21dbdec91df3bae74a4cba360eb80d9e4
|
| 3 |
-
size 150704
|
|
|
|
|
|
|
|
|
|
|
|