Spaces:
Build error
Build error
| from copy import deepcopy | |
| from itertools import chain | |
| from unittest import TestCase | |
| from voicevox_engine.full_context_label import ( | |
| AccentPhrase, | |
| BreathGroup, | |
| Mora, | |
| Phoneme, | |
| Utterance, | |
| ) | |
| class TestBasePhonemes(TestCase): | |
| def setUp(self): | |
| super().setUp() | |
| # pyopenjtalk.extract_fullcontext("こんにちは、ヒホです。")の結果 | |
| # 出来る限りテスト内で他のライブラリに依存しないため、 | |
| # またテスト内容を透明化するために、テストケースを生成している | |
| self.test_case_hello_hiho = [ | |
| # sil (無音) | |
| "xx^xx-sil+k=o/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:xx_xx#xx_xx@xx_xx|xx_xx/G:5_5%0_xx_xx/H:xx_xx/I:xx-xx" | |
| + "@xx+xx&xx-xx|xx+xx/J:1_5/K:2+2-9", | |
| # k | |
| "xx^sil-k+o=N/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # o | |
| "sil^k-o+N=n/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # N (ん) | |
| "k^o-N+n=i/A:-3+2+4/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # n | |
| "o^N-n+i=ch/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # i | |
| "N^n-i+ch=i/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # ch | |
| "n^i-ch+i=w/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # i | |
| "i^ch-i+w=a/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # w | |
| "ch^i-w+a=pau/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # a | |
| "i^w-a+pau=h/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx" | |
| + "/F:5_5#0_xx@1_1|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5" | |
| + "@1+2&1-2|1+9/J:1_4/K:2+2-9", | |
| # pau (読点) | |
| "w^a-pau+h=i/A:xx+xx+xx/B:09-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:5_5!0_xx-xx" | |
| + "/F:xx_xx#xx_xx@xx_xx|xx_xx/G:4_1%0_xx_xx/H:1_5/I:xx-xx" | |
| + "@xx+xx&xx-xx|xx+xx/J:1_4/K:2+2-9", | |
| # h | |
| "a^pau-h+i=h/A:0+1+4/B:09-xx_xx/C:09_xx+xx/D:22+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # i | |
| "pau^h-i+h=o/A:0+1+4/B:09-xx_xx/C:09_xx+xx/D:22+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # h | |
| "h^i-h+o=d/A:1+2+3/B:09-xx_xx/C:22_xx+xx/D:10+7_2/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # o | |
| "i^h-o+d=e/A:1+2+3/B:09-xx_xx/C:22_xx+xx/D:10+7_2/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # d | |
| "h^o-d+e=s/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # e | |
| "o^d-e+s=U/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # s | |
| "d^e-s+U=sil/A:3+4+1/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # U (無声母音) | |
| "e^s-U+sil=xx/A:3+4+1/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0" | |
| + "/F:4_1#0_xx@1_1|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4" | |
| + "@2+1&2-1|6+4/J:xx_xx/K:2+2-9", | |
| # sil (無音) | |
| "s^U-sil+xx=xx/A:xx+xx+xx/B:10-7_2/C:xx_xx+xx/D:xx+xx_xx/E:4_1!0_xx-xx" | |
| + "/F:xx_xx#xx_xx@xx_xx|xx_xx/G:xx_xx%xx_xx_xx/H:1_4/I:xx-xx" | |
| + "@xx+xx&xx-xx|xx+xx/J:xx_xx/K:2+2-9", | |
| ] | |
| self.phonemes_hello_hiho = [ | |
| Phoneme.from_label(label) for label in self.test_case_hello_hiho | |
| ] | |
| class TestPhoneme(TestBasePhonemes): | |
| def test_phoneme(self): | |
| self.assertEqual( | |
| " ".join([phoneme.phoneme for phoneme in self.phonemes_hello_hiho]), | |
| "sil k o N n i ch i w a pau h i h o d e s U sil", | |
| ) | |
| def test_is_pause(self): | |
| self.assertEqual( | |
| [phoneme.is_pause() for phoneme in self.phonemes_hello_hiho], | |
| [ | |
| True, # sil | |
| False, # k | |
| False, # o | |
| False, # N | |
| False, # n | |
| False, # i | |
| False, # ch | |
| False, # i | |
| False, # w | |
| False, # a | |
| True, # pau | |
| False, # h | |
| False, # i | |
| False, # h | |
| False, # o | |
| False, # d | |
| False, # e | |
| False, # s | |
| False, # u | |
| True, # sil | |
| ], | |
| ) | |
| def test_label(self) -> None: | |
| self.assertEqual( | |
| [phoneme.label for phoneme in self.phonemes_hello_hiho], | |
| self.test_case_hello_hiho, | |
| ) | |
| class TestMora(TestBasePhonemes): | |
| def setUp(self) -> None: | |
| super().setUp() | |
| # contexts["a2"] == "1" ko | |
| self.mora_hello_1 = Mora( | |
| consonant=self.phonemes_hello_hiho[1], vowel=self.phonemes_hello_hiho[2] | |
| ) | |
| # contexts["a2"] == "2" N | |
| self.mora_hello_2 = Mora(consonant=None, vowel=self.phonemes_hello_hiho[3]) | |
| # contexts["a2"] == "3" ni | |
| self.mora_hello_3 = Mora( | |
| consonant=self.phonemes_hello_hiho[4], vowel=self.phonemes_hello_hiho[5] | |
| ) | |
| # contexts["a2"] == "4" chi | |
| self.mora_hello_4 = Mora( | |
| consonant=self.phonemes_hello_hiho[6], vowel=self.phonemes_hello_hiho[7] | |
| ) | |
| # contexts["a2"] == "5" wa | |
| self.mora_hello_5 = Mora( | |
| consonant=self.phonemes_hello_hiho[8], vowel=self.phonemes_hello_hiho[9] | |
| ) | |
| # contexts["a2"] == "1" hi | |
| self.mora_hiho_1 = Mora( | |
| consonant=self.phonemes_hello_hiho[11], vowel=self.phonemes_hello_hiho[12] | |
| ) | |
| # contexts["a2"] == "2" ho | |
| self.mora_hiho_2 = Mora( | |
| consonant=self.phonemes_hello_hiho[13], vowel=self.phonemes_hello_hiho[14] | |
| ) | |
| # contexts["a2"] == "3" de | |
| self.mora_hiho_3 = Mora( | |
| consonant=self.phonemes_hello_hiho[15], vowel=self.phonemes_hello_hiho[16] | |
| ) | |
| # contexts["a2"] == "1" sU | |
| self.mora_hiho_4 = Mora( | |
| consonant=self.phonemes_hello_hiho[17], vowel=self.phonemes_hello_hiho[18] | |
| ) | |
| def assert_phonemes(self, mora: Mora, mora_str: str) -> None: | |
| self.assertEqual( | |
| "".join([phoneme.phoneme for phoneme in mora.phonemes]), mora_str | |
| ) | |
| def assert_labels(self, mora: Mora, label_start: int, label_end: int) -> None: | |
| self.assertEqual(mora.labels, self.test_case_hello_hiho[label_start:label_end]) | |
| def test_phonemes(self) -> None: | |
| self.assert_phonemes(self.mora_hello_1, "ko") | |
| self.assert_phonemes(self.mora_hello_2, "N") | |
| self.assert_phonemes(self.mora_hello_3, "ni") | |
| self.assert_phonemes(self.mora_hello_4, "chi") | |
| self.assert_phonemes(self.mora_hello_5, "wa") | |
| self.assert_phonemes(self.mora_hiho_1, "hi") | |
| self.assert_phonemes(self.mora_hiho_2, "ho") | |
| self.assert_phonemes(self.mora_hiho_3, "de") | |
| self.assert_phonemes(self.mora_hiho_4, "sU") | |
| def test_labels(self) -> None: | |
| self.assert_labels(self.mora_hello_1, 1, 3) | |
| self.assert_labels(self.mora_hello_2, 3, 4) | |
| self.assert_labels(self.mora_hello_3, 4, 6) | |
| self.assert_labels(self.mora_hello_4, 6, 8) | |
| self.assert_labels(self.mora_hello_5, 8, 10) | |
| self.assert_labels(self.mora_hiho_1, 11, 13) | |
| self.assert_labels(self.mora_hiho_2, 13, 15) | |
| self.assert_labels(self.mora_hiho_3, 15, 17) | |
| self.assert_labels(self.mora_hiho_4, 17, 19) | |
| def test_set_context(self): | |
| # 値を書き換えるので、他のテストに影響を出さないためにdeepcopyする | |
| mora_hello_1 = deepcopy(self.mora_hello_1) | |
| # phonemeにあたる"p3"を書き換える | |
| mora_hello_1.set_context("p3", "a") | |
| self.assert_phonemes(mora_hello_1, "aa") | |
| class TestAccentPhrase(TestBasePhonemes): | |
| def setUp(self) -> None: | |
| super().setUp() | |
| # TODO: ValueErrorを吐く作為的ではない自然な例の模索 | |
| # 存在しないなら放置でよい | |
| self.accent_phrase_hello = AccentPhrase.from_phonemes( | |
| self.phonemes_hello_hiho[1:10] | |
| ) | |
| self.accent_phrase_hiho = AccentPhrase.from_phonemes( | |
| self.phonemes_hello_hiho[11:19] | |
| ) | |
| def test_accent(self): | |
| self.assertEqual(self.accent_phrase_hello.accent, 5) | |
| self.assertEqual(self.accent_phrase_hiho.accent, 1) | |
| def test_set_context(self): | |
| accent_phrase_hello = deepcopy(self.accent_phrase_hello) | |
| # phonemeにあたる"p3"を書き換える | |
| accent_phrase_hello.set_context("p3", "a") | |
| self.assertEqual( | |
| "".join([phoneme.phoneme for phoneme in accent_phrase_hello.phonemes]), | |
| "aaaaaaaaa", | |
| ) | |
| def test_phonemes(self): | |
| self.assertEqual( | |
| " ".join( | |
| [phoneme.phoneme for phoneme in self.accent_phrase_hello.phonemes] | |
| ), | |
| "k o N n i ch i w a", | |
| ) | |
| self.assertEqual( | |
| " ".join([phoneme.phoneme for phoneme in self.accent_phrase_hiho.phonemes]), | |
| "h i h o d e s U", | |
| ) | |
| def test_labels(self): | |
| self.assertEqual( | |
| self.accent_phrase_hello.labels, self.test_case_hello_hiho[1:10] | |
| ) | |
| self.assertEqual( | |
| self.accent_phrase_hiho.labels, self.test_case_hello_hiho[11:19] | |
| ) | |
| def test_merge(self): | |
| # 「こんにちはヒホです」 | |
| # 読点を無くしたものと同等 | |
| merged_accent_phrase = self.accent_phrase_hello.merge(self.accent_phrase_hiho) | |
| self.assertEqual(merged_accent_phrase.accent, 5) | |
| self.assertEqual( | |
| " ".join([phoneme.phoneme for phoneme in merged_accent_phrase.phonemes]), | |
| "k o N n i ch i w a h i h o d e s U", | |
| ) | |
| self.assertEqual( | |
| merged_accent_phrase.labels, | |
| self.test_case_hello_hiho[1:10] + self.test_case_hello_hiho[11:19], | |
| ) | |
| class TestBreathGroup(TestBasePhonemes): | |
| def setUp(self) -> None: | |
| super().setUp() | |
| self.breath_group_hello = BreathGroup.from_phonemes( | |
| self.phonemes_hello_hiho[1:10] | |
| ) | |
| self.breath_group_hiho = BreathGroup.from_phonemes( | |
| self.phonemes_hello_hiho[11:19] | |
| ) | |
| def test_set_context(self): | |
| # 値を書き換えるので、他のテストに影響を出さないためにdeepcopyする | |
| breath_group_hello = deepcopy(self.breath_group_hello) | |
| # phonemeにあたる"p3"を書き換える | |
| breath_group_hello.set_context("p3", "a") | |
| self.assertEqual( | |
| "".join([phoneme.phoneme for phoneme in breath_group_hello.phonemes]), | |
| "aaaaaaaaa", | |
| ) | |
| def test_phonemes(self): | |
| self.assertEqual( | |
| " ".join([phoneme.phoneme for phoneme in self.breath_group_hello.phonemes]), | |
| "k o N n i ch i w a", | |
| ) | |
| self.assertEqual( | |
| " ".join([phoneme.phoneme for phoneme in self.breath_group_hiho.phonemes]), | |
| "h i h o d e s U", | |
| ) | |
| def test_labels(self): | |
| self.assertEqual( | |
| self.breath_group_hello.labels, self.test_case_hello_hiho[1:10] | |
| ) | |
| self.assertEqual( | |
| self.breath_group_hiho.labels, self.test_case_hello_hiho[11:19] | |
| ) | |
| class TestUtterance(TestBasePhonemes): | |
| def setUp(self) -> None: | |
| super().setUp() | |
| self.utterance_hello_hiho = Utterance.from_phonemes(self.phonemes_hello_hiho) | |
| def test_phonemes(self): | |
| self.assertEqual( | |
| " ".join( | |
| [phoneme.phoneme for phoneme in self.utterance_hello_hiho.phonemes] | |
| ), | |
| "sil k o N n i ch i w a pau h i h o d e s U sil", | |
| ) | |
| changed_utterance = Utterance.from_phonemes(self.utterance_hello_hiho.phonemes) | |
| self.assertEqual(len(changed_utterance.breath_groups), 2) | |
| accent_phrases = list( | |
| chain.from_iterable( | |
| breath_group.accent_phrases | |
| for breath_group in changed_utterance.breath_groups | |
| ) | |
| ) | |
| for prev, cent, post in zip( | |
| [None] + accent_phrases[:-1], | |
| accent_phrases, | |
| accent_phrases[1:] + [None], | |
| ): | |
| mora_num = len(cent.moras) | |
| accent = cent.accent | |
| if prev is not None: | |
| for phoneme in prev.phonemes: | |
| self.assertEqual(phoneme.contexts["g1"], str(mora_num)) | |
| self.assertEqual(phoneme.contexts["g2"], str(accent)) | |
| if post is not None: | |
| for phoneme in post.phonemes: | |
| self.assertEqual(phoneme.contexts["e1"], str(mora_num)) | |
| self.assertEqual(phoneme.contexts["e2"], str(accent)) | |
| for phoneme in cent.phonemes: | |
| self.assertEqual( | |
| phoneme.contexts["k2"], | |
| str( | |
| sum( | |
| [ | |
| len(breath_group.accent_phrases) | |
| for breath_group in changed_utterance.breath_groups | |
| ] | |
| ) | |
| ), | |
| ) | |
| for prev, cent, post in zip( | |
| [None] + changed_utterance.breath_groups[:-1], | |
| changed_utterance.breath_groups, | |
| changed_utterance.breath_groups[1:] + [None], | |
| ): | |
| accent_phrase_num = len(cent.accent_phrases) | |
| if prev is not None: | |
| for phoneme in prev.phonemes: | |
| self.assertEqual(phoneme.contexts["j1"], str(accent_phrase_num)) | |
| if post is not None: | |
| for phoneme in post.phonemes: | |
| self.assertEqual(phoneme.contexts["h1"], str(accent_phrase_num)) | |
| for phoneme in cent.phonemes: | |
| self.assertEqual(phoneme.contexts["i1"], str(accent_phrase_num)) | |
| self.assertEqual( | |
| phoneme.contexts["i5"], | |
| str(accent_phrases.index(cent.accent_phrases[0]) + 1), | |
| ) | |
| self.assertEqual( | |
| phoneme.contexts["i6"], | |
| str( | |
| len(accent_phrases) | |
| - accent_phrases.index(cent.accent_phrases[0]) | |
| ), | |
| ) | |
| def test_labels(self): | |
| self.assertEqual(self.utterance_hello_hiho.labels, self.test_case_hello_hiho) | |