Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- data/.ipynb_checkpoints/OOD_dummy-checkpoint.txt +2 -0
- data/.ipynb_checkpoints/add_phones-checkpoint.py +42 -0
- data/.ipynb_checkpoints/train_list-checkpoint.txt +0 -0
- data/.ipynb_checkpoints/val_list-checkpoint.txt +0 -0
- data/train_list.txt +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part003_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part005_01.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part005_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part006_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part020_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_00.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_01.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_05.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_07.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_08.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_09.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_10.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part025_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part030_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part030_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part042_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part049_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part049_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_07.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_01.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_05.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_06.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_07.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_08.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_00.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_05.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_06.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_01.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_05.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_06.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_08.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_09.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_04.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_06.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_07.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part069_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part069_03.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_00.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_02.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_06.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_08.wav +0 -0
- data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part072_00.wav +0 -0
data/.ipynb_checkpoints/OOD_dummy-checkpoint.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This is an out-of-domain sentence.|
|
| 2 |
+
This is an out-of-domain sentence.|
|
data/.ipynb_checkpoints/add_phones-checkpoint.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re, unicodedata
|
| 2 |
+
|
| 3 |
+
_pad = "$"
|
| 4 |
+
_punctuation = ';:,.!?¡¿—…"«»“” '
|
| 5 |
+
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
| 6 |
+
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
| 7 |
+
|
| 8 |
+
# Export all symbols:
|
| 9 |
+
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
| 10 |
+
|
| 11 |
+
dicts = {}
|
| 12 |
+
for i in range(len((symbols))):
|
| 13 |
+
dicts[symbols[i]] = i
|
| 14 |
+
|
| 15 |
+
class TextCleaner:
|
| 16 |
+
"""
|
| 17 |
+
• Normalises text to NFC so pre-composed IPA glyphs match `symbols`.
|
| 18 |
+
• Splits on event tokens first (e.g. <evt_gasp>), then per-character.
|
| 19 |
+
• Unknown chars map to the <unk> symbol instead of printing.
|
| 20 |
+
"""
|
| 21 |
+
_EVENT_RE = re.compile(r"<[^>]+>|.") # match <evt_xxx> or single char
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
# `dicts` must already include EVENT_TOKENS and "<unk>"
|
| 25 |
+
self.lookup = dicts
|
| 26 |
+
self.unk_id = 0
|
| 27 |
+
|
| 28 |
+
def __call__(self, text: str):
|
| 29 |
+
text = unicodedata.normalize("NFC", text)
|
| 30 |
+
ids = []
|
| 31 |
+
for tok in self._EVENT_RE.findall(text):
|
| 32 |
+
ids.append(self.lookup.get(tok, self.unk_id))
|
| 33 |
+
return ids
|
| 34 |
+
|
| 35 |
+
tc = TextCleaner()
|
| 36 |
+
miss = {}
|
| 37 |
+
|
| 38 |
+
with open("/home/ubuntu/styletts2-ft/data/train_list.txt", encoding="utf-8") as f:
|
| 39 |
+
for line in f:
|
| 40 |
+
for i in tc(line.split("|")[1]): # convert once
|
| 41 |
+
pass # if it got an ID, it's known
|
| 42 |
+
print("Unknown chars left:", [k for k,v in miss.items()])
|
data/.ipynb_checkpoints/train_list-checkpoint.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/.ipynb_checkpoints/val_list-checkpoint.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/train_list.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part003_02.wav
ADDED
|
Binary file (45.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part005_01.wav
ADDED
|
Binary file (52.9 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part005_03.wav
ADDED
|
Binary file (53.9 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part006_02.wav
ADDED
|
Binary file (95.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part020_02.wav
ADDED
|
Binary file (74.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_00.wav
ADDED
|
Binary file (80.8 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_01.wav
ADDED
|
Binary file (79.8 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_05.wav
ADDED
|
Binary file (81.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_07.wav
ADDED
|
Binary file (69.3 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_08.wav
ADDED
|
Binary file (72.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_09.wav
ADDED
|
Binary file (37.5 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part024_10.wav
ADDED
|
Binary file (88.5 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part025_02.wav
ADDED
|
Binary file (82.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part030_02.wav
ADDED
|
Binary file (71.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part030_03.wav
ADDED
|
Binary file (71.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part042_02.wav
ADDED
|
Binary file (83.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part049_02.wav
ADDED
|
Binary file (99.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part049_03.wav
ADDED
|
Binary file (99.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_02.wav
ADDED
|
Binary file (90.4 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_03.wav
ADDED
|
Binary file (80.8 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part053_07.wav
ADDED
|
Binary file (32.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_01.wav
ADDED
|
Binary file (51.9 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_03.wav
ADDED
|
Binary file (53.9 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_05.wav
ADDED
|
Binary file (65.4 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_06.wav
ADDED
|
Binary file (77 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_07.wav
ADDED
|
Binary file (44.3 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part055_08.wav
ADDED
|
Binary file (77 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_00.wav
ADDED
|
Binary file (61.6 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_05.wav
ADDED
|
Binary file (95.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part056_06.wav
ADDED
|
Binary file (34.6 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_01.wav
ADDED
|
Binary file (64.5 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_05.wav
ADDED
|
Binary file (55.8 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_06.wav
ADDED
|
Binary file (48.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_08.wav
ADDED
|
Binary file (73.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part057_09.wav
ADDED
|
Binary file (73.1 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_04.wav
ADDED
|
Binary file (59.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_06.wav
ADDED
|
Binary file (18.3 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part060_07.wav
ADDED
|
Binary file (18.3 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part069_02.wav
ADDED
|
Binary file (94.3 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part069_03.wav
ADDED
|
Binary file (58.7 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_00.wav
ADDED
|
Binary file (70.2 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_02.wav
ADDED
|
Binary file (52 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_06.wav
ADDED
|
Binary file (61.6 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part071_08.wav
ADDED
|
Binary file (86.6 kB). View file
|
|
|
data/wavs/wavs/0024fb6512bb5f24dc4bb935302157f9_12f9320f_part072_00.wav
ADDED
|
Binary file (75 kB). View file
|
|
|