Spaces:
Build error
Build error
| from typing import List | |
| from unittest import TestCase | |
| from voicevox_engine import kana_parser | |
| from voicevox_engine.kana_parser import create_kana | |
| from voicevox_engine.model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode | |
| def parse_kana(text: str) -> List[AccentPhrase]: | |
| accent_phrases = kana_parser.parse_kana(text) | |
| return accent_phrases | |
| class TestParseKana(TestCase): | |
| def test_phrase_length(self): | |
| self.assertEqual(len(parse_kana("ア'/ア'")), 2) | |
| self.assertEqual(len(parse_kana("ア'、ア'")), 2) | |
| self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5) | |
| self.assertEqual(len(parse_kana("ス'")), 1) | |
| self.assertEqual(len(parse_kana("_ス'")), 1) | |
| self.assertEqual(len(parse_kana("ギェ'")), 1) | |
| self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3) | |
| def test_accent(self): | |
| self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1) | |
| self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1) | |
| self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2) | |
| self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2) | |
| self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3) | |
| self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3) | |
| self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5) | |
| self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5) | |
| def test_mora_length(self): | |
| self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5) | |
| self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5) | |
| self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5) | |
| self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5) | |
| self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5) | |
| self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5) | |
| def test_pause(self): | |
| self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora) | |
| self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora) | |
| self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora) | |
| self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora) | |
| def test_unvoice(self): | |
| self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u") | |
| self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U") | |
| def test_roundtrip(self): | |
| for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]: | |
| self.assertEqual(create_kana(parse_kana(text)), text) | |
| for text in ["ヲ'", "ェ'"]: | |
| self.assertEqual(create_kana(parse_kana(text)), text) | |
| def _accent_phrase_marks_base( | |
| self, text: str, expected_accent_phrases: List[AccentPhrase] | |
| ) -> None: | |
| accent_phrases = kana_parser.parse_kana(text) | |
| self.assertEqual(expected_accent_phrases, accent_phrases) | |
| def test_accent_phrase_marks(self): | |
| def a_slash_a_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = a_slash_a_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ア'/ア'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def a_jp_comma_a_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=Mora( | |
| text="、", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="pau", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = a_jp_comma_a_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ア'、ア'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = a_slash_a_slash_a_slash_a_slash_a_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ア'/ア'/ア'/ア'/ア'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def su_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ス", | |
| consonant="s", | |
| consonant_length=0.0, | |
| vowel="u", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = su_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ス'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def under_score_su_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ス", | |
| consonant="s", | |
| consonant_length=0.0, | |
| vowel="U", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = under_score_su_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="_ス'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def gye_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = gye_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ギェ'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def gye_gye_gye_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=Mora( | |
| text="、", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="pau", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| ] | |
| expected_accent_phrases = gye_gye_gye_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ギェ'、ギェ'/ギェ'", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def test_interrogative_accent_phrase_marks(self): | |
| def a_question_mark_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| is_interrogative=True, | |
| ), | |
| ] | |
| expected_accent_phrases = a_question_mark_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ア'?", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def gye_gye_gye_question_mark_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=Mora( | |
| text="、", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="pau", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ギェ", | |
| consonant="gy", | |
| consonant_length=0.0, | |
| vowel="e", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| is_interrogative=True, | |
| ), | |
| ] | |
| expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases() | |
| self._accent_phrase_marks_base( | |
| text="ギェ'、ギェ'/ギェ'?", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| def a_pause_a_question_pause_a_question_a_question_mark_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=Mora( | |
| text="、", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="pau", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=Mora( | |
| text="、", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="pau", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| is_interrogative=True, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| is_interrogative=True, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=0.0, | |
| pitch=0.0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| is_interrogative=True, | |
| ), | |
| ] | |
| expected_accent_phrases = ( | |
| a_pause_a_question_pause_a_question_a_question_mark_accent_phrases() | |
| ) | |
| self._accent_phrase_marks_base( | |
| text="ア'、ア'?、ア'?/ア'?", | |
| expected_accent_phrases=expected_accent_phrases, | |
| ) | |
| class TestParseKanaException(TestCase): | |
| def _assert_error_code(self, kana: str, code: ParseKanaErrorCode): | |
| with self.assertRaises(ParseKanaError) as err: | |
| parse_kana(kana) | |
| self.assertEqual(err.exception.errcode, code) | |
| def test_exceptions(self): | |
| self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND) | |
| self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP) | |
| self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE) | |
| self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT) | |
| self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT) | |
| self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE) | |
| self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE) | |
| self._assert_error_code("", ParseKanaErrorCode.EMPTY_PHRASE) | |
| with self.assertRaises(ParseKanaError) as err: | |
| parse_kana("ヒト'ツメ/フタツメ") | |
| self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND) | |
| self.assertEqual(err.exception.kwargs, {"text": "フタツメ"}) | |
| with self.assertRaises(ParseKanaError) as err: | |
| parse_kana("ア'/") | |
| self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE) | |
| self.assertEqual(err.exception.kwargs, {"position": "2"}) | |
| with self.assertRaises(ParseKanaError) as err: | |
| kana_parser.parse_kana("ア?ア'") | |
| self.assertEqual( | |
| err.exception.errcode, ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END | |
| ) | |
| class TestCreateKana(TestCase): | |
| def test_create_kana_interrogative(self): | |
| def koreha_arimasuka_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="コ", | |
| consonant="k", | |
| consonant_length=2.5, | |
| vowel="o", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="レ", | |
| consonant="r", | |
| consonant_length=2.5, | |
| vowel="e", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="ワ", | |
| consonant="w", | |
| consonant_length=2.5, | |
| vowel="a", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| ], | |
| accent=3, | |
| pause_mora=None, | |
| is_interrogative=False, | |
| ), | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="ア", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="a", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="リ", | |
| consonant="r", | |
| consonant_length=2.5, | |
| vowel="i", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="マ", | |
| consonant="m", | |
| consonant_length=2.5, | |
| vowel="a", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="ス", | |
| consonant="s", | |
| consonant_length=2.5, | |
| vowel="U", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="カ", | |
| consonant="k", | |
| consonant_length=2.5, | |
| vowel="a", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| ], | |
| accent=3, | |
| pause_mora=None, | |
| is_interrogative=False, | |
| ), | |
| ] | |
| accent_phrases = koreha_arimasuka_accent_phrases() | |
| self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ") | |
| accent_phrases = koreha_arimasuka_accent_phrases() | |
| accent_phrases[-1].is_interrogative = True | |
| self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ?") | |
| def kya_accent_phrases(): | |
| return [ | |
| AccentPhrase( | |
| moras=[ | |
| Mora( | |
| text="キャ", | |
| consonant="ky", | |
| consonant_length=2.5, | |
| vowel="a", | |
| vowel_length=2.5, | |
| pitch=2.5, | |
| ), | |
| Mora( | |
| text="ッ", | |
| consonant=None, | |
| consonant_length=None, | |
| vowel="cl", | |
| vowel_length=0.1, | |
| pitch=0, | |
| ), | |
| ], | |
| accent=1, | |
| pause_mora=None, | |
| is_interrogative=False, | |
| ), | |
| ] | |
| accent_phrases = kya_accent_phrases() | |
| self.assertEqual(create_kana(accent_phrases), "キャ'ッ") | |
| accent_phrases = kya_accent_phrases() | |
| accent_phrases[-1].is_interrogative = True | |
| self.assertEqual(create_kana(accent_phrases), "キャ'ッ?") | |