Spaces:
Build error
Build error
| from abc import abstractmethod | |
| from enum import Enum | |
| from pathlib import Path | |
| from typing import List, Sequence | |
| import numpy | |
| class BasePhoneme(object): | |
| """ | |
| 音素の応用クラス群の抽象基底クラス | |
| Attributes | |
| ---------- | |
| phoneme_list : Sequence[str] | |
| 音素のリスト | |
| num_phoneme : int | |
| 音素リストの要素数 | |
| space_phoneme : str | |
| 読点に値する音素 | |
| """ | |
| phoneme_list: Sequence[str] | |
| num_phoneme: int | |
| space_phoneme: str | |
| def __init__( | |
| self, | |
| phoneme: str, | |
| start: float, | |
| end: float, | |
| ): | |
| self.phoneme = phoneme | |
| self.start = numpy.round(start, decimals=2) | |
| self.end = numpy.round(end, decimals=2) | |
| def __repr__(self): | |
| return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" | |
| def __eq__(self, o: object): | |
| return isinstance(o, BasePhoneme) and ( | |
| self.phoneme == o.phoneme and self.start == o.start and self.end == o.end | |
| ) | |
| def verify(self): | |
| """ | |
| 音素クラスとして、データが正しいかassertする | |
| """ | |
| assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined." | |
| def phoneme_id(self): | |
| """ | |
| phoneme_id (phoneme list内でのindex)を取得する | |
| Returns | |
| ------- | |
| id : int | |
| phoneme_idを返す | |
| """ | |
| return self.phoneme_list.index(self.phoneme) | |
| def duration(self): | |
| """ | |
| 音素継続期間を取得する | |
| Returns | |
| ------- | |
| duration : int | |
| 音素継続期間を返す | |
| """ | |
| return self.end - self.start | |
| def onehot(self): | |
| """ | |
| phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す | |
| Returns | |
| ------- | |
| onehot : numpu.ndarray | |
| 関数内で変更された配列を返す | |
| """ | |
| array = numpy.zeros(self.num_phoneme, dtype=bool) | |
| array[self.phoneme_id] = True | |
| return array | |
| def parse(cls, s: str): | |
| """ | |
| 文字列をパースして音素クラスを作る | |
| Parameters | |
| ---------- | |
| s : str | |
| パースしたい文字列 | |
| Returns | |
| ------- | |
| phoneme : BasePhoneme | |
| パース結果を用いた音素クラスを返す | |
| Examples | |
| -------- | |
| >>> BasePhoneme.parse('1.7425000 1.9125000 o:') | |
| Phoneme(phoneme='o:', start=1.74, end=1.91) | |
| """ | |
| words = s.split() | |
| return cls( | |
| start=float(words[0]), | |
| end=float(words[1]), | |
| phoneme=words[2], | |
| ) | |
| def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]: | |
| raise NotImplementedError | |
| def load_lab_list(cls, path: Path): | |
| """ | |
| labファイルを読み込む | |
| Parameters | |
| ---------- | |
| path : Path | |
| 読み込みたいlabファイルのパス | |
| Returns | |
| ------- | |
| phonemes : List[BasePhoneme] | |
| パース結果を用いた音素クラスを返す | |
| """ | |
| phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0] | |
| phonemes = cls.convert(phonemes) | |
| for phoneme in phonemes: | |
| phoneme.verify() | |
| return phonemes | |
| def save_lab_list(cls, phonemes: List["BasePhoneme"], path: Path): | |
| """ | |
| 音素クラスのリストをlabファイル形式で保存する | |
| Parameters | |
| ---------- | |
| phonemes : List[BasePhoneme] | |
| 保存したい音素クラスのリスト | |
| path : Path | |
| labファイルの保存先パス | |
| """ | |
| text = "\n".join( | |
| [ | |
| f"{numpy.round(p.start, decimals=2):.2f}\t" | |
| f"{numpy.round(p.end, decimals=2):.2f}\t" | |
| f"{p.phoneme}" | |
| for p in phonemes | |
| ] | |
| ) | |
| path.write_text(text) | |
| class JvsPhoneme(BasePhoneme): | |
| """ | |
| JVS(Japanese versatile speech)コーパスに含まれる音素群クラス | |
| Attributes | |
| ---------- | |
| phoneme_list : Sequence[str] | |
| 音素のリスト | |
| num_phoneme : int | |
| 音素リストの要素数 | |
| space_phoneme : str | |
| 読点に値する音素 | |
| """ | |
| phoneme_list = ( | |
| "pau", | |
| "I", | |
| "N", | |
| "U", | |
| "a", | |
| "b", | |
| "by", | |
| "ch", | |
| "cl", | |
| "d", | |
| "dy", | |
| "e", | |
| "f", | |
| "g", | |
| "gy", | |
| "h", | |
| "hy", | |
| "i", | |
| "j", | |
| "k", | |
| "ky", | |
| "m", | |
| "my", | |
| "n", | |
| "ny", | |
| "o", | |
| "p", | |
| "py", | |
| "r", | |
| "ry", | |
| "s", | |
| "sh", | |
| "t", | |
| "ts", | |
| "u", | |
| "v", | |
| "w", | |
| "y", | |
| "z", | |
| ) | |
| num_phoneme = len(phoneme_list) | |
| space_phoneme = "pau" | |
| def convert(cls, phonemes: List["JvsPhoneme"]) -> List["JvsPhoneme"]: | |
| """ | |
| 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する | |
| Parameters | |
| ---------- | |
| phonemes : List[JvsPhoneme] | |
| 変換したいphonemeのリスト | |
| Returns | |
| ------- | |
| phonemes : List[JvsPhoneme] | |
| 変換されたphonemeのリスト | |
| """ | |
| if "sil" in phonemes[0].phoneme: | |
| phonemes[0].phoneme = cls.space_phoneme | |
| if "sil" in phonemes[-1].phoneme: | |
| phonemes[-1].phoneme = cls.space_phoneme | |
| return phonemes | |
| class OjtPhoneme(BasePhoneme): | |
| """ | |
| OpenJTalkに含まれる音素群クラス | |
| Attributes | |
| ---------- | |
| phoneme_list : Sequence[str] | |
| 音素のリスト | |
| num_phoneme : int | |
| 音素リストの要素数 | |
| space_phoneme : str | |
| 読点に値する音素 | |
| """ | |
| phoneme_list = ( | |
| "pau", | |
| "A", | |
| "E", | |
| "I", | |
| "N", | |
| "O", | |
| "U", | |
| "a", | |
| "b", | |
| "by", | |
| "ch", | |
| "cl", | |
| "d", | |
| "dy", | |
| "e", | |
| "f", | |
| "g", | |
| "gw", | |
| "gy", | |
| "h", | |
| "hy", | |
| "i", | |
| "j", | |
| "k", | |
| "kw", | |
| "ky", | |
| "m", | |
| "my", | |
| "n", | |
| "ny", | |
| "o", | |
| "p", | |
| "py", | |
| "r", | |
| "ry", | |
| "s", | |
| "sh", | |
| "t", | |
| "ts", | |
| "ty", | |
| "u", | |
| "v", | |
| "w", | |
| "y", | |
| "z", | |
| ) | |
| num_phoneme = len(phoneme_list) | |
| space_phoneme = "pau" | |
| def convert(cls, phonemes: List["OjtPhoneme"]): | |
| """ | |
| 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する | |
| Parameters | |
| ---------- | |
| phonemes : List[OjtPhoneme] | |
| 変換したいphonemeのリスト | |
| Returns | |
| ------- | |
| phonemes : List[OjtPhoneme] | |
| 変換されたphonemeのリスト | |
| """ | |
| if "sil" in phonemes[0].phoneme: | |
| phonemes[0].phoneme = cls.space_phoneme | |
| if "sil" in phonemes[-1].phoneme: | |
| phonemes[-1].phoneme = cls.space_phoneme | |
| return phonemes | |
| class PhonemeType(str, Enum): | |
| jvs = "jvs" | |
| openjtalk = "openjtalk" | |
| phoneme_type_to_class = { | |
| PhonemeType.jvs: JvsPhoneme, | |
| PhonemeType.openjtalk: OjtPhoneme, | |
| } | |