edit cleaners
Browse files- saved_model/config.json +2 -2
- text/cleaners.py +12 -35
saved_model/config.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:284f7d38e892008e195482b8490359d503f634fbc8b4b92ffa333b56848e6678
|
| 3 |
+
size 1781
|
text/cleaners.py
CHANGED
|
@@ -1,40 +1,17 @@
|
|
| 1 |
import re
|
| 2 |
-
import pyopenjtalk
|
| 3 |
-
from unidecode import unidecode
|
| 4 |
-
from text.japanese import _japanese_marks
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
def japanese_triphone_cleaners(text):
|
| 8 |
-
sentences = re.split(_japanese_marks, text)
|
| 9 |
-
marks = re.findall(_japanese_marks, text)
|
| 10 |
-
text = ''
|
| 11 |
-
for i, sentence in enumerate(sentences):
|
| 12 |
-
phones = pyopenjtalk.g2p(sentence, kana=False)
|
| 13 |
-
phones = phones.replace(' ','')
|
| 14 |
-
phones = phones.replace('A', 'a').replace('I', 'i').replace('U', 'u').replace('E', 'e').replace('O', 'o')
|
| 15 |
-
phones = phones.replace('ch','ʧ').replace('sh','ʃ').replace('cl','Q')
|
| 16 |
-
triphones = []
|
| 17 |
-
length = len(phones)
|
| 18 |
-
|
| 19 |
-
for j, phone in enumerate(phones):
|
| 20 |
-
if length == 1:
|
| 21 |
-
triphone = phone
|
| 22 |
-
else:
|
| 23 |
-
if j == 0:
|
| 24 |
-
triphone = f'{phone}+{phones[j+1]}'
|
| 25 |
-
elif j == length - 1:
|
| 26 |
-
triphone = f'{phones[j-1]}-{phone}'
|
| 27 |
-
else:
|
| 28 |
-
triphone = f'{phones[j-1]}-{phone}+{phones[j+1]}'
|
| 29 |
-
|
| 30 |
-
triphones.append(triphone)
|
| 31 |
-
|
| 32 |
-
subtext = ' '.join(triphones)
|
| 33 |
-
text += subtext
|
| 34 |
-
if i < len(marks):
|
| 35 |
-
text += unidecode(marks[i]).replace(' ', '')
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
return text
|
|
|
|
| 1 |
import re
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
def japanese_cleaners(text):
|
| 4 |
+
from text.japanese import japanese_to_romaji_with_accent
|
| 5 |
+
text = japanese_to_romaji_with_accent(text)
|
| 6 |
+
if len(text) == 0 or re.match('[A-Za-z]', text[-1]):
|
| 7 |
+
text += '.'
|
| 8 |
+
return text
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
def japanese_cleaners2(text):
|
| 12 |
+
text = text.replace('・・・', '…').replace('・', ' ')
|
| 13 |
+
text = japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…') \
|
| 14 |
+
.replace('(', '').replace(')', '') \
|
| 15 |
+
.replace('[', '').replace(']', '') \
|
| 16 |
+
.replace('*', ' ').replace('{', '').replace('}', '')
|
| 17 |
return text
|