Spaces:

2ndelement
/

voicevox

Build error

App Files Files Community

voicevox / voicevox_engine /kana_parser.py

2ndelement

init

f1f433f almost 3 years ago

raw

history blame contribute delete

5.21 kB

	from typing import List, Optional

	from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
	from .mora_list import openjtalk_text2mora

	LOOP_LIMIT = 300
	UNVOICE_SYMBOL = "_"
	ACCENT_SYMBOL = "'"
	NOPAUSE_DELIMITER = "/"
	PAUSE_DELIMITER = "、"
	WIDE_INTERROGATION_MARK = "？"

	text2mora_with_unvoice = {}
	for text, (consonant, vowel) in openjtalk_text2mora.items():
	text2mora_with_unvoice[text] = Mora(
	text=text,
	consonant=consonant if len(consonant) > 0 else None,
	consonant_length=0 if len(consonant) > 0 else None,
	vowel=vowel,
	vowel_length=0,
	pitch=0,
	is_interrogative=False,
	)
	if vowel in ["a", "i", "u", "e", "o"]:
	text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora(
	text=text,
	consonant=consonant if len(consonant) > 0 else None,
	consonant_length=0 if len(consonant) > 0 else None,
	vowel=vowel.upper(),
	vowel_length=0,
	pitch=0,
	is_interrogative=False,
	)


	def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
	"""
	longest matchにより読み仮名からAccentPhraseを生成
	入力長Nに対し計算量O(N^2)
	"""
	accent_index: Optional[int] = None
	moras: List[Mora] = []

	base_index = 0 # パース開始位置。ここから右の文字列をstackに詰めていく。
	stack = "" # 保留中の文字列
	matched_text: Optional[str] = None # 保留中の文字列内で最後にマッチした仮名

	outer_loop = 0
	while base_index < len(phrase):
	outer_loop += 1
	if phrase[base_index] == ACCENT_SYMBOL:
	if len(moras) == 0:
	raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
	if accent_index is not None:
	raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
	accent_index = len(moras)
	base_index += 1
	continue
	for watch_index in range(base_index, len(phrase)):
	if phrase[watch_index] == ACCENT_SYMBOL:
	break
	# 普通の文字の場合
	stack += phrase[watch_index]
	if stack in text2mora_with_unvoice:
	matched_text = stack
	# push mora
	if matched_text is None:
	raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
	else:
	moras.append(text2mora_with_unvoice[matched_text].copy(deep=True))
	base_index += len(matched_text)
	stack = ""
	matched_text = None
	if outer_loop > LOOP_LIMIT:
	raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP)
	if accent_index is None:
	raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase)
	else:
	return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None)


	def parse_kana(text: str) -> List[AccentPhrase]:
	"""
	AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換
	"""

	parsed_results: List[AccentPhrase] = []
	phrase_base = 0
	if len(text) == 0:
	raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1)

	for i in range(len(text) + 1):
	if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]:
	phrase = text[phrase_base:i]
	if len(phrase) == 0:
	raise ParseKanaError(
	ParseKanaErrorCode.EMPTY_PHRASE,
	position=str(len(parsed_results) + 1),
	)
	phrase_base = i + 1

	is_interrogative = WIDE_INTERROGATION_MARK in phrase
	if is_interrogative:
	if WIDE_INTERROGATION_MARK in phrase[:-1]:
	raise ParseKanaError(
	ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase
	)
	phrase = phrase.replace(WIDE_INTERROGATION_MARK, "")

	accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
	if i < len(text) and text[i] == PAUSE_DELIMITER:
	accent_phrase.pause_mora = Mora(
	text="、",
	consonant=None,
	consonant_length=None,
	vowel="pau",
	vowel_length=0,
	pitch=0,
	)
	accent_phrase.is_interrogative = is_interrogative

	parsed_results.append(accent_phrase)

	return parsed_results


	def create_kana(accent_phrases: List[AccentPhrase]) -> str:
	text = ""
	for i, phrase in enumerate(accent_phrases):
	for j, mora in enumerate(phrase.moras):
	if mora.vowel in ["A", "I", "U", "E", "O"]:
	text += UNVOICE_SYMBOL

	text += mora.text
	if j + 1 == phrase.accent:
	text += ACCENT_SYMBOL

	if phrase.is_interrogative:
	text += WIDE_INTERROGATION_MARK

	if i < len(accent_phrases) - 1:
	if phrase.pause_mora is None:
	text += NOPAUSE_DELIMITER
	else:
	text += PAUSE_DELIMITER
	return text