refactor file structure
Browse files- main.py +2 -2
- scripts/funasr_utils.py +1 -1
- scripts/run_funasr.py +1 -1
- scripts/run_funasr_c.py +1 -1
- scripts/run_funasr_quant.py +2 -2
- scripts/run_whisper.py +2 -2
- scripts/run_whisper_finetuned.py +2 -2
- scripts/split_audio.py +1 -1
- test_data/__init__.py +0 -57
- test_data/{dataset.txt → dataset_aishell/dataset.txt} +0 -0
- test_data/{text → recordings/text}/test_asr_zh.txt +0 -0
- test_data/{text → recordings/text}/test_asr_zh_with_index.txt +0 -0
- test_data/{text → recordings/text}/test_translation_en.txt +0 -0
- test_data/{text → recordings/text}/test_translation_zh.txt +0 -0
- tests/test_app/test_accuracy.py +1 -1
- tests/test_app/test_accuracy_and_delay.py +1 -1
- tests/test_app/test_delay.py +1 -1
main.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import time
|
| 2 |
from lib.audio import play_audio_until_end
|
| 3 |
-
from test_data import
|
| 4 |
|
| 5 |
if __name__ == '__main__':
|
| 6 |
# report = Report()
|
|
@@ -41,7 +41,7 @@ if __name__ == '__main__':
|
|
| 41 |
# print(stderr)
|
| 42 |
# p = PageRunner("electron").start()
|
| 43 |
# p.start_en2zh()
|
| 44 |
-
audios =
|
| 45 |
for a in audios:
|
| 46 |
play_audio_until_end(a)
|
| 47 |
time.sleep(5)
|
|
|
|
| 1 |
import time
|
| 2 |
from lib.audio import play_audio_until_end
|
| 3 |
+
from test_data import audio_clips
|
| 4 |
|
| 5 |
if __name__ == '__main__':
|
| 6 |
# report = Report()
|
|
|
|
| 41 |
# print(stderr)
|
| 42 |
# p = PageRunner("electron").start()
|
| 43 |
# p.start_en2zh()
|
| 44 |
+
audios = audio_clips.get("zh")
|
| 45 |
for a in audios:
|
| 46 |
play_audio_until_end(a)
|
| 47 |
time.sleep(5)
|
scripts/funasr_utils.py
CHANGED
|
@@ -44,7 +44,7 @@ def run_funasr():
|
|
| 44 |
)
|
| 45 |
t1 = time.time()
|
| 46 |
print("load model: ", t1 - t0)
|
| 47 |
-
audios = Path("/
|
| 48 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 49 |
for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")):
|
| 50 |
print(audio)
|
|
|
|
| 44 |
)
|
| 45 |
t1 = time.time()
|
| 46 |
print("load model: ", t1 - t0)
|
| 47 |
+
audios = Path("/test_data/audio_clips")
|
| 48 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 49 |
for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")):
|
| 50 |
print(audio)
|
scripts/run_funasr.py
CHANGED
|
@@ -25,7 +25,7 @@ def main():
|
|
| 25 |
)
|
| 26 |
t1 = time.time()
|
| 27 |
print("load model: ", t1 - t0)
|
| 28 |
-
audios = Path("/
|
| 29 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 30 |
for audio in sorted(audios.glob("*mix/*")):
|
| 31 |
print(audio)
|
|
|
|
| 25 |
)
|
| 26 |
t1 = time.time()
|
| 27 |
print("load model: ", t1 - t0)
|
| 28 |
+
audios = Path("/test_data/audio_clips/")
|
| 29 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 30 |
for audio in sorted(audios.glob("*mix/*")):
|
| 31 |
print(audio)
|
scripts/run_funasr_c.py
CHANGED
|
@@ -22,7 +22,7 @@ def main():
|
|
| 22 |
asr.init()
|
| 23 |
t1 = time.time()
|
| 24 |
print("Initializing model: ", t1-t0)
|
| 25 |
-
audios = Path("/
|
| 26 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 27 |
for audio in sorted(audios.glob("*s-ac1/Chinese*")):
|
| 28 |
print(audio)
|
|
|
|
| 22 |
asr.init()
|
| 23 |
t1 = time.time()
|
| 24 |
print("Initializing model: ", t1-t0)
|
| 25 |
+
audios = Path("/test_data/audio_clips/")
|
| 26 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 27 |
for audio in sorted(audios.glob("*s-ac1/Chinese*")):
|
| 28 |
print(audio)
|
scripts/run_funasr_quant.py
CHANGED
|
@@ -62,7 +62,7 @@ def run_recordings():
|
|
| 62 |
def run_test_audios():
|
| 63 |
quantize = True
|
| 64 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 65 |
-
audios = Path("../test_data/
|
| 66 |
rows = [["file_name", "time", "inference_result"]]
|
| 67 |
for audio in sorted(audios.glob("*s/zh*.wav")):
|
| 68 |
text, t = inference(vad_model, asr_model, punc_model, audio)
|
|
@@ -74,7 +74,7 @@ def run_test_dataset():
|
|
| 74 |
from scripts.asr_utils import read_dataset
|
| 75 |
quantize = True
|
| 76 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 77 |
-
test_data = Path("../test_data/dataset.txt")
|
| 78 |
audio_parent = Path("../test_data/")
|
| 79 |
rows = [["file_name", "time", "inference_result"]]
|
| 80 |
result_list = []
|
|
|
|
| 62 |
def run_test_audios():
|
| 63 |
quantize = True
|
| 64 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 65 |
+
audios = Path("../test_data/audio_clips/")
|
| 66 |
rows = [["file_name", "time", "inference_result"]]
|
| 67 |
for audio in sorted(audios.glob("*s/zh*.wav")):
|
| 68 |
text, t = inference(vad_model, asr_model, punc_model, audio)
|
|
|
|
| 74 |
from scripts.asr_utils import read_dataset
|
| 75 |
quantize = True
|
| 76 |
vad_model, asr_model, punc_model = load_model(quantize)
|
| 77 |
+
test_data = Path("../test_data/dataset_aishell/dataset.txt")
|
| 78 |
audio_parent = Path("../test_data/")
|
| 79 |
rows = [["file_name", "time", "inference_result"]]
|
| 80 |
result_list = []
|
scripts/run_whisper.py
CHANGED
|
@@ -53,7 +53,7 @@ def run_recordings():
|
|
| 53 |
def run_test_audios():
|
| 54 |
model = load_model()
|
| 55 |
lang = "zh"
|
| 56 |
-
audios = Path("../test_data/
|
| 57 |
rows = [["file_name", "time", "inference_result"]]
|
| 58 |
for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
|
| 59 |
print(audio)
|
|
@@ -69,7 +69,7 @@ def run_test_audios():
|
|
| 69 |
def run_test_dataset():
|
| 70 |
from scripts.asr_utils import read_dataset
|
| 71 |
model = load_model()
|
| 72 |
-
test_data = Path("../test_data/dataset.txt")
|
| 73 |
audio_parent = Path("../test_data/")
|
| 74 |
rows = [["file_name", "time", "inference_result"]]
|
| 75 |
result_list = []
|
|
|
|
| 53 |
def run_test_audios():
|
| 54 |
model = load_model()
|
| 55 |
lang = "zh"
|
| 56 |
+
audios = Path("../test_data/audio_clips/")
|
| 57 |
rows = [["file_name", "time", "inference_result"]]
|
| 58 |
for audio in sorted(audios.glob(f"*{lang}*/*.wav")):
|
| 59 |
print(audio)
|
|
|
|
| 69 |
def run_test_dataset():
|
| 70 |
from scripts.asr_utils import read_dataset
|
| 71 |
model = load_model()
|
| 72 |
+
test_data = Path("../test_data/dataset_aishell/dataset.txt")
|
| 73 |
audio_parent = Path("../test_data/")
|
| 74 |
rows = [["file_name", "time", "inference_result"]]
|
| 75 |
result_list = []
|
scripts/run_whisper_finetuned.py
CHANGED
|
@@ -139,7 +139,7 @@ def load_model():
|
|
| 139 |
|
| 140 |
def run_test_audios():
|
| 141 |
model, processor = load_model()
|
| 142 |
-
audios = Path("../test_data/
|
| 143 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 144 |
for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
|
| 145 |
try:
|
|
@@ -181,7 +181,7 @@ def run_recordings():
|
|
| 181 |
def run_test_dataset():
|
| 182 |
from scripts.asr_utils import read_dataset
|
| 183 |
model, processor = load_model()
|
| 184 |
-
test_data = Path("../test_data/dataset.txt")
|
| 185 |
audio_parent = Path("../test_data/")
|
| 186 |
rows = [["file_name", "time", "inference_result"]]
|
| 187 |
result_list = []
|
|
|
|
| 139 |
|
| 140 |
def run_test_audios():
|
| 141 |
model, processor = load_model()
|
| 142 |
+
audios = Path("../test_data/audio_clips/")
|
| 143 |
rows = [["file_name", "inference_time", "inference_result"]]
|
| 144 |
for audio in sorted(audios.glob("*en-ac1-16k/*.wav")): # *s/randomforest*.wav"
|
| 145 |
try:
|
|
|
|
| 181 |
def run_test_dataset():
|
| 182 |
from scripts.asr_utils import read_dataset
|
| 183 |
model, processor = load_model()
|
| 184 |
+
test_data = Path("../test_data/dataset_aishell/dataset.txt")
|
| 185 |
audio_parent = Path("../test_data/")
|
| 186 |
rows = [["file_name", "time", "inference_result"]]
|
| 187 |
result_list = []
|
scripts/split_audio.py
CHANGED
|
@@ -14,7 +14,7 @@ def cmd(command: str, check=True, capture_output=False) -> CompletedProcess:
|
|
| 14 |
return ret
|
| 15 |
|
| 16 |
|
| 17 |
-
current = Path("/
|
| 18 |
audios_5s = current/"5s"
|
| 19 |
audios_10s = current/"10s"
|
| 20 |
if not audios_5s.exists():
|
|
|
|
| 14 |
return ret
|
| 15 |
|
| 16 |
|
| 17 |
+
current = Path("/test_data/audio_clips")
|
| 18 |
audios_5s = current/"5s"
|
| 19 |
audios_10s = current/"10s"
|
| 20 |
if not audios_5s.exists():
|
test_data/__init__.py
CHANGED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
from environment import TEST_AUDIOS_DIR
|
| 2 |
-
|
| 3 |
-
test_audios = {
|
| 4 |
-
"zh": [
|
| 5 |
-
TEST_AUDIOS_DIR/"Chinese-calculus-part1.mp3",
|
| 6 |
-
TEST_AUDIOS_DIR/"Chinese-economics-part1.mp3",
|
| 7 |
-
TEST_AUDIOS_DIR/"Chinese-food-part1.mp3",
|
| 8 |
-
TEST_AUDIOS_DIR/"Chinese-healthandweather-part1.wav",
|
| 9 |
-
TEST_AUDIOS_DIR/"Chinese-laws-part1.mp3",
|
| 10 |
-
TEST_AUDIOS_DIR/"Chinese-liyongle-part1.mp3",
|
| 11 |
-
TEST_AUDIOS_DIR / "Chinese-mayun-part2.mp3",
|
| 12 |
-
TEST_AUDIOS_DIR / "Chinese-news-part1.mp3",
|
| 13 |
-
TEST_AUDIOS_DIR / "Chinese-railway-part1.mp3",
|
| 14 |
-
TEST_AUDIOS_DIR / "Chinese-ssd-part1.mp3",
|
| 15 |
-
|
| 16 |
-
],
|
| 17 |
-
"en": [
|
| 18 |
-
TEST_AUDIOS_DIR/"English-chaos-part2.wav",
|
| 19 |
-
TEST_AUDIOS_DIR/"English-computer_sicence-part1.mp3",
|
| 20 |
-
TEST_AUDIOS_DIR/"English-generative_ai-part1.mp3",
|
| 21 |
-
TEST_AUDIOS_DIR/"English-internet-part20.mp3",
|
| 22 |
-
TEST_AUDIOS_DIR/"English-legalsystem-part1.mp3",
|
| 23 |
-
TEST_AUDIOS_DIR/"English-literarytheory-part1.mp3",
|
| 24 |
-
TEST_AUDIOS_DIR/"English-news-part1.mp3",
|
| 25 |
-
TEST_AUDIOS_DIR/"English-proberbs-part1.mp3",
|
| 26 |
-
TEST_AUDIOS_DIR/"English-stevejobs-part2.wav",
|
| 27 |
-
TEST_AUDIOS_DIR/"English-stock-part1.mp3",
|
| 28 |
-
TEST_AUDIOS_DIR/"English-whyJapan-part1.mp3",
|
| 29 |
-
TEST_AUDIOS_DIR/"English-zuckerberg-part1.wav",
|
| 30 |
-
]
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
audio_texts = {
|
| 34 |
-
'Chinese-calculus-part1': '后来我自己总结啊,微积分这么难,入门主要有几个原因。首先呢大部分的教材为了追求严谨,从一开始就使用了现代数学的这个所谓极限的概念。在它的基础之上向你介绍微积分。呃,问题是它是一个非常抽象的概念。对于大部分在接触微积分之前啊,主要的学习经验就是刷题啊,甚至是连题也不刷的。同学们来说呢,这种抽象语言会很陌生。而且如果你去了解微积分的历史的时候,你会发现极限这个概念啊是微积分创立之后大概一两百年才出现的这么个东西。你等于说我们现在公认的这些微积分的创始人,这些大佬们、牛顿啊、莱布尼茨、欧拉啊,连他们都不知道极限是什么。但是人家就是凭着直觉创建了微积分,当然作为教科书嘛,追求严谨无可厚非啊。虽然对于我来讲,过早的追求这种严谨,导致学习的人入门困难甚至入不了门。',
|
| 35 |
-
'Chinese-economics-part1': '经济就像一部简单的机器那样运行,但很多人不懂得这一点,或是对经济的运行方式持有不同观点,于是导致很多不必要的经济损失。我深感有责任与大家分享我的简单,但是实用的经济分析模式。这个模式虽然不符合常规传统经济学,但是已经帮助我预测和躲避了全球金融危机。三十多年来对我一直很有用。我们开始吧,经济虽然可能看起来复杂,但是其实是以简单和机械的方式运行。经济由几个简单的零部件和无数次重复的简单交易组成。这些交易首先是由人的天性所驱动,因而形成三股主要的经济动力,一、生产率的提高。二短期债务周期。三、长期债务周期。',
|
| 36 |
-
'Chinese-food-part1': '鸡胸肉先切成条,再切成丁,也可以用鸡腿肉来做,加入几勺墨西哥风味调料可以买现成的,也可以根据公屏上的配方自己调制,抓拌均匀腌制。一会儿现在开始各种蔬菜,切切切半个洋葱,切西丁,半个红甜椒,半个黄甜椒切成细丁,绿色的菜椒也可以。番茄的瓤比较酸,水分也比较多,所以要去瓤后切成丁,现在就可以开始炒馅料了。锅里放适量橄榄油,先把鸡肉丁炒到上色,加入洋葱和彩椒,这里用盐调一下味,盐会帮助蔬菜出水。翻炒一会儿后,放入番茄丁,继续小火翻炒炖煮一会儿最后撒入一把香菜碎。喜欢吃辣的也可以放一些墨西哥辣椒。Hello, panel馅料就炒好了。',
|
| 37 |
-
'Chinese-healthandweather-part1': '我们印象特别深的是二零二二年春节前后那段时间。当时的一月份呢是天气较往年呢,是要炎热的。我们那个时候穿着一件短袖就可以的。但是到了春节二月份的期间呢,它突然的降温天气急剧的变化,夹杂着阴雨的天气啊,那段时间呢我们急诊的出车量就明显的增多了啊,那个时候呢我们接了好多的病人啊,有心脏病急性发作的啊,有脑中风的啊,还有老慢之咳喘。急性发作的病人也有,因为天气太冷,湿温那个司机他反应迟钝了之后,出车祸的患者。那在广东地区呢,其实人们感知最多,最明显的还是这种高温热浪的天气增多。',
|
| 38 |
-
'Chinese-laws-part1': '今天就违约方合同解除权与大家做交流探讨。违约方合同解除权是指当出现合同僵局的情形下,赋予违约方以提起诉讼的方式行使的一种权利,在性质上属于形成诉权。至于合同是否被解除,取决于法院对案件事实的把握、判断和认定。一、违约方合同解除权的发展���程。违约方合同解除权的发展,从否定到肯定,大致经历了以下三个阶段。',
|
| 39 |
-
'Chinese-liyongle-part1': '各位同学大家好,我是李永乐老师。最近呢有个朋友跟我说,他遇到了两件烦心事儿。第一件呢是家里的两个小朋友,总是因为抢玩具和食物而打架。第二件事呢就就是马上就要到年底呢,他需要制作一个方案来给部门的下属分配各种奖励。有的奖励呢是现金,也有晋升的机会以及各种荣誉。每年到了这个时候啊,部门里面就会鸡飞狗跳,所有人都觉得自己拿少了,甚至发生过啊,有人去上级主管部门举报自己同事的事儿,所以他就想问我有没有一种方法可以让大家都满意呢?其实啊这个问题在数学上是有解的,称之为切蛋糕问题。我们今天就来讲一讲如何能够让大家都满意的分配一块蛋糕。首先呢我们先来讲一讲公平的切蛋糕方法是什么?那什么叫公平呢?我们在数学上可以这样定义公平,如果有n个人去分一块蛋糕,那么n个人的话,每一个人都可以拿到一块蛋糕,拿到多少呢?都拿到不少于不少于n分之一的蛋糕。',
|
| 40 |
-
'Chinese-mayun-part2': '第二个公司我们成立了中国黄页。在中国黄业的创业经验中有很多的经验,也是可以要跟他这儿跟大家进行分享。九五年做互联网是最艰难的时候,就中国那时候还没联通互联网,我就到美国去了一趟。回来以后我们要做互联网,我请了二十四个朋友在我们家开会,说了两个小时,没人听懂。我在说什么。最后二十三个人反对一个人同意这一个人就说马云你这样做,你就试试看,不行的话,赶紧逃回来,还来得及。那我自己想了一个晚上,第二天早上我决定还是做下去。中国人很多创业是晚上想想千条路,早上起来走原路,晚上想想是热血沸腾,真好,第二天早上骑个自行车又上班去了,对吧?这是我们很多创业者所碰上的问题。说那一天我觉得因为我看见过互联网,我觉得互联网会将来会好,但这些人没看见过,但是没看见过的机会,就是你怎么把它变成现实。',
|
| 41 |
-
'Chinese-news-part1': '二零二二年二月二十四日,俄罗斯总统普京宣布对乌克兰发起特别军事行动,俄乌冲突就此爆发。三年后的今天,美国总统特朗普重返白宫,是要在六个月内为俄乌带来和平。没想到上台后的第一剑,先砍向了曾经的盟友乌克兰。就目前来看,特朗普的计划是这样的,先由美国和俄罗斯接触,尽可能的修复美俄关系,并和俄罗斯谈的差不多了,把乌克兰和欧洲叫过来签个字,并由欧洲负责乌克兰的战后安全保障。美国只要把当初给乌克兰的援助拿回来,拍拍屁股走人就行了。事实证明,除了第一步,后面的计划都不太顺利。',
|
| 42 |
-
'Chinese-railway-part1': '很多观众可能会说,这不是应该是一个工程师研究的问题吗?为什么我作为一个政治学者要去研究高铁?的确,在高铁的建设和规划过程中有很多技术层面的问题需要解决。比如,高铁经过沿线的地质和地形,高铁的转弯半径、铁轨的间距、建成后预期的客流等等。这些问题不仅会影响高铁的安全运行,和它未来的经济收益,同样也会影响高铁的走势。但是呢大家不要忘记,在中国高铁是一个由政府投资的公共项目。政治学就是研究那些稀缺的公共资源是如何被分配的。一九三零年,代知名的政治学者拉斯维尔曾经对什么是政治给出过一个定义。他说,政治就是一个什么样的人,在何时以何种方式获得什么的问题。',
|
| 43 |
-
'Chinese-ssd-part1': '在选购固态硬盘时,我们有时候会看到某些评测文章或者视频提及某款固态硬盘是有缓存方案还是无缓存方案。在关于性能测试的部分,经常会听到固态硬盘的缓内速度和缓外速度这一概念。这里边这个缓存指的究竟是什么?对固态硬盘的性能又有什么影响?固态硬盘的缓存主要分为外置d ram和SLC、 cash,这两个缓存的概念和用途截然不同。为了理解它们的区别和作用,我们首先要从固态硬盘最基础的工作原理开始了解起。这是一块三星的九七零EVO plus两TB固态硬盘四开表面带有型号和参数的贴纸,以后,就能看到构成固态硬盘的几个基础结构。其中主控控制着固态硬盘数据的读取和写入写入数据时,数据会经过主控处理,随后被存放在的闪存颗粒中。',
|
| 44 |
-
'English-chaos-part2': 'This sequence of events is an example of what is known as the Butterfly Effect, a manifestation of Chaos Theory. For many centuries, the world was explained through the laws of Isaac Newton in classical physics. According to these laws, if the current state of an object is known, its future behavior can be predicted with relative ease. Chaos Theory questions this deterministic vision. Not everything is predictable anymore, nor does it work like clockwork. Since the 1800s, mathematicians have raised the idea that not all phenomena could be predicted by Newtonian laws. But a meteorologist named Edward Lawrence made Chaos Theory a visible phenomenon. It all started in 1961, when he was working on a mathematical model to forecast the weather. Lawrence entered data such as temperature, humidity, pressure, and wind direction into his computer. His computer would draw a graph modeling what the weather would be like. Not always accurate, but very close to reality.',
|
| 45 |
-
'English-computer_sicence-part1': "We built computers to expand our brains. Originally scientists built computers to solve arithmetic, but they turned out to be incredibly useful for many other things as well. Running the entire internet, lifelike graphics, artificial brains, or simulating the universe. But amazingly all of it boils down to just flipping zeros and ones. Computers have become smaller and more powerful at an incredible rate. There was more computing power in your cell phone than there was in the entire world in the mid-1960s. And the entire Apollo moon landing could have been run on a couple of Nintendos. Computer science is a subject that studies what computers can do. It's a diverse and overlapping field, but I'm going to split it into three parts. The fundamental theory of computer science, computer engineering, and applications.",
|
| 46 |
-
'English-generative_ai-part1': "So in 2023, OpenAI, which is a company in California, in fact in San Francisco, if you go to San Francisco, you can even see the lights at night of their building, it announced GPT-4, and it claimed that it can beat 90% of humans on the SAT. For those of you who don't know, SAT is a standardized test that American schoolchildren have to take to enter university. It's an admissions test. And it's multiple choice, and it's considered not so easy. So GPT-4 can do it. They also claimed that it can get top marks in law, medical exams, and other exams. They have a whole suite of things that they claim, well, not they claim, they show that GPT-4 can do it. Okay. Aside from that, it can pass exams. We can ask it to do other things. So you can ask it to write text for you",
|
| 47 |
-
'English-internet-part20': "Many, many years ago in the early 1970s, my partner Bob Kahn and I began working on the design of what we now call the Internet. Bob and I had the responsibility and the opportunity to design the Internet's protocols and its architecture. So we persisted in participating in the Internet's growth and evolution for all of this time up to and including the present. The way information gets transferred from one computer to another is pretty interesting. It need not follow a fixed path. In fact, your path may change in the midst of a computer-to-computer conversation. Information on the Internet goes from one computer to another in what we call a packet of information. And a packet travels from one place to another on the Internet a lot like how you might get from one place to another in a car. Depending on traffic congestion or road conditions, you might choose or be forced to take a different route to get to the same place each time you travel.",
|
| 48 |
-
'English-legalsystem-part1': "In general, courts have three basic functions, only one of which you probably learned about in your history class. The first thing that courts do is settle disputes. In pre-modern history, which is way easier to understand than post-modern history, kings performed this function. But as states got bigger and more powerful, it became much easier to have specialized officials decide important issues, like who owned the fox you caught on someone else's land, or what does the fox say, which was disputed a lot back then. The second thing that courts do is probably the one you heard about in school, or on television, or perhaps while studying for the standardized test, and that's interpret the laws. This becomes increasingly important when you actually try to read laws, or when you realize that legislators are often not as careful as they might be when writing laws in the first place. Take a look at the Affordable Care Act. There are a few famous careless errors in that. Finally, courts create expectations for future actions. This is very important if you want to do business with someone. If you know that you'll be punished for cheating a potential business client, you're less likely to do it.",
|
| 49 |
-
'English-literarytheory-part1': "Literary theory is often likened to different lenses through which we read a text. So if you think of literal lenses, a microscope, you're going to see the wing of a butterfly extremely close up. But if you use the magnifying glass, you get a different image altogether. You're getting different details brought out. Another way to think of literary theory, though, more modern, is filters on your phone or on your computer. Now, I'm going to put filters across my face as I'm talking now. And you'll notice how filters will select particular hues and deselect other hues. They'll enhance certain contrasts or remove brightness or saturation levels, whatever it may be. The point is, it's giving you a different way to feel and interpret the image that you're watching. Now, literary theories do the same with a book.",
|
| 50 |
-
'English-news-part1': "It is a stunning turn of events on the third anniversary of Russia's full-scale invasion of its smaller neighbor, Ukraine. At the UN General Assembly, America sided with Russia and North Korea in voting against the resolution condemning Russia's war. It came as Emmanuel Macron visited the White House for talks with the French president saying Donald Trump believed European peacekeepers would be accepted by Russia as a security guarantee underpinning any future deal. The French leader said he'd already spoken to the UK about a deployment, but added that the solidarity and support of America was crucial. Meanwhile, world leaders traveled to Kiev to pledge further support for Ukraine, with President Zelensky saying we hope we can finish this war this year.",
|
| 51 |
-
'English-proberbs-part1': "There are three books in the Bible that have come to be called the wisdom literature, Proverbs, Ecclesiastes, and Job. And all of these books are addressing the same set of questions. What kind of world are we living in? And what does it look like to live well in this world? So how to be good at life. Yeah. So each of these books tackles these questions from a unique perspective. And it's important to understand all of them to get a fully biblical perspective on the good life. So as a thought experiment, you could actually imagine each of these books as a person. So Proverbs would be like this brilliant young teacher. And Ecclesiastes, the sharp middle-aged critic. And Job would be this weathered old man who's seen a lot in his day. We're going to start by meeting the book of Proverbs, the brilliant young teacher. And she's not just smart. She's smart about everything, work, relationships, sex, spirituality. She has incredible insights, things you wouldn't see on your own. Yeah, she would be the perfect friend to have around when you need really specific advice.",
|
| 52 |
-
'English-stevejobs-part2': "And 17 years later, I did go to college. But I naively chose a college that was almost as expensive as Stanford. And all of my working class parent savings were being spent on my college tuition. After six months, I couldn't see the value in it. I had no idea what I wanted to do with my life, and no idea how college was going to help me figure it out. And here I was, spending all the money my parents had saved their entire life. So I decided to drop out, and trust that it would all work out okay. It was pretty scary at the time, but looking back, it was one of the best decisions I ever made. The minute I dropped out, I could stop taking the required classes that didn't interest me, and begin dropping in on the ones that looked far more interesting. It wasn't all romantic. I didn't have a job. I didn't have a dorm room, so I slept on the floor in friends' rooms. I returned Coke bottles for the five-cent deposits to buy food with. And I would walk the seven miles across town every Sunday night to get one good meal a week at the Hare Krishna Temple.",
|
| 53 |
-
'English-stock-part1': "In the 1600s, the Dutch East India Company employed hundreds of ships to trade gold, porcelain, spices, and silks around the globe. But running this massive operation wasn't cheap. In order to fund their expensive voyages, the company turned to private citizens, individuals who could invest money to support the trip in exchange for a share of the ship's profits. This practice allowed the company to afford even grander voyages, increasing profits for both themselves and their savvy investors. Selling these shares in coffeehouses and shipping ports across the continent, the Dutch East India Company unknowingly invented the world's first stock market.",
|
| 54 |
-
'English-whyJapan-part1': 'Working long hours is a way of life in Japan, so much so that some occasionally die from it. "Kiroshi" is a term that means death by overwork. So why are people working so hard and can it be fixed? Japan has some of the longest working hours in the world. Nearly a quarter of Japanese companies have their employees work more than 80 hours of overtime per month. Those extra hours are often unpaid. Today, Japan\'s corporate culture is driven by the so-called salaryman. The salaryman is defined by his loyalty to his company. And expected to spend his entire career working for the same firm. Not only does he work long hours in the office, but it\'s also assumed he\'ll participate in after-work activities too, like drinking with colleagues. And they\'re not taking enough time off. In 2017, one survey found that despite being entitled to 20 days off per year, Japanese workers left 10 of those days unused, topping every other country in the survey.',
|
| 55 |
-
'English-zuckerberg-part1': "Back when I was in college, our country had just gone to war in Iraq. And the mood on our campus was disbelief. A lot of people felt like we were acting without hearing a lot of important perspectives. And the toll on soldiers and their families and our national psyche was severe. Yet most of us felt like we were powerless to do anything about it. And I remember feeling that if more people had a voice to share their experiences, then maybe it could have gone differently. And those early years shaped my belief that giving more people a voice gives power to the powerless and pushes society to get better over time. Now, back then I was just building an early version of Facebook for my community. But I got a chance to see my values. And I think that's the best way to do it. When students got to express who they were and what mattered to them, they started more student groups, they organized more businesses, and they even challenged some established ways of doing things around campus."
|
| 56 |
-
|
| 57 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_data/{dataset.txt → dataset_aishell/dataset.txt}
RENAMED
|
File without changes
|
test_data/{text → recordings/text}/test_asr_zh.txt
RENAMED
|
File without changes
|
test_data/{text → recordings/text}/test_asr_zh_with_index.txt
RENAMED
|
File without changes
|
test_data/{text → recordings/text}/test_translation_en.txt
RENAMED
|
File without changes
|
test_data/{text → recordings/text}/test_translation_zh.txt
RENAMED
|
File without changes
|
tests/test_app/test_accuracy.py
CHANGED
|
@@ -4,7 +4,7 @@ import pytest
|
|
| 4 |
|
| 5 |
from lib.audio import play_audio_until_end, get_length
|
| 6 |
from lib.pages import TranslatorPage
|
| 7 |
-
from
|
| 8 |
from lib.report import AccuracyReport, AccuracyItem
|
| 9 |
|
| 10 |
@pytest.mark.parametrize("audio", test_audios.get("zh"))
|
|
|
|
| 4 |
|
| 5 |
from lib.audio import play_audio_until_end, get_length
|
| 6 |
from lib.pages import TranslatorPage
|
| 7 |
+
from test_data.audio_clips.audio_list import test_audios, audio_texts
|
| 8 |
from lib.report import AccuracyReport, AccuracyItem
|
| 9 |
|
| 10 |
@pytest.mark.parametrize("audio", test_audios.get("zh"))
|
tests/test_app/test_accuracy_and_delay.py
CHANGED
|
@@ -4,7 +4,7 @@ import pytest
|
|
| 4 |
|
| 5 |
from lib.audio import play_audio, get_length
|
| 6 |
from lib.pages import TranslatorPage
|
| 7 |
-
from
|
| 8 |
from lib.report import AccuracyReport, AccuracyItem
|
| 9 |
from lib.report import DelayReport, DelayItem, LogReport
|
| 10 |
|
|
|
|
| 4 |
|
| 5 |
from lib.audio import play_audio, get_length
|
| 6 |
from lib.pages import TranslatorPage
|
| 7 |
+
from test_data.audio_clips.audio_list import test_audios, audio_texts
|
| 8 |
from lib.report import AccuracyReport, AccuracyItem
|
| 9 |
from lib.report import DelayReport, DelayItem, LogReport
|
| 10 |
|
tests/test_app/test_delay.py
CHANGED
|
@@ -4,7 +4,7 @@ import pytest
|
|
| 4 |
from lib.audio import get_length, play_audio
|
| 5 |
from lib.pages import TranslatorPage
|
| 6 |
from lib.report import DelayReport, DelayItem, LogReport
|
| 7 |
-
from
|
| 8 |
|
| 9 |
@pytest.mark.parametrize("audio", test_audios.get("zh"))
|
| 10 |
def test_delay_zh2en(log_file, app, delay_report: DelayReport,page: TranslatorPage, audio:Path):
|
|
|
|
| 4 |
from lib.audio import get_length, play_audio
|
| 5 |
from lib.pages import TranslatorPage
|
| 6 |
from lib.report import DelayReport, DelayItem, LogReport
|
| 7 |
+
from test_data.audio_clips.audio_list import test_audios
|
| 8 |
|
| 9 |
@pytest.mark.parametrize("audio", test_audios.get("zh"))
|
| 10 |
def test_delay_zh2en(log_file, app, delay_report: DelayReport,page: TranslatorPage, audio:Path):
|