Spaces:
Sleeping
Sleeping
Update meldataset.py
Browse files- meldataset.py +8 -5
meldataset.py
CHANGED
|
@@ -97,6 +97,7 @@ def preprocess(wave: np.ndarray, sample_rate: int = 24000):
|
|
| 97 |
mel = (torch.log(mel + 1e-5) - mean) / std
|
| 98 |
return mel.unsqueeze(0) # (1, n_mels, T)
|
| 99 |
|
|
|
|
| 100 |
class TextCleaner:
|
| 101 |
def __init__(self, symbol_dict, debug=True):
|
| 102 |
self.symbol_dict = symbol_dict
|
|
@@ -105,15 +106,16 @@ class TextCleaner:
|
|
| 105 |
def __call__(self, text: str):
|
| 106 |
text = (text or "").strip()
|
| 107 |
|
| 108 |
-
|
|
|
|
| 109 |
|
| 110 |
ids = []
|
| 111 |
missing = []
|
| 112 |
-
for
|
| 113 |
-
if
|
| 114 |
-
ids.append(self.symbol_dict[
|
| 115 |
else:
|
| 116 |
-
missing.append(
|
| 117 |
|
| 118 |
if self.debug and missing:
|
| 119 |
print(f"[TextCleaner] missing {len(missing)} symbols. sample={missing[:30]}")
|
|
@@ -122,6 +124,7 @@ class TextCleaner:
|
|
| 122 |
|
| 123 |
|
| 124 |
|
|
|
|
| 125 |
class FilePathDataset(torch.utils.data.Dataset):
|
| 126 |
def __init__(
|
| 127 |
self,
|
|
|
|
| 97 |
mel = (torch.log(mel + 1e-5) - mean) / std
|
| 98 |
return mel.unsqueeze(0) # (1, n_mels, T)
|
| 99 |
|
| 100 |
+
|
| 101 |
class TextCleaner:
|
| 102 |
def __init__(self, symbol_dict, debug=True):
|
| 103 |
self.symbol_dict = symbol_dict
|
|
|
|
| 106 |
def __call__(self, text: str):
|
| 107 |
text = (text or "").strip()
|
| 108 |
|
| 109 |
+
# ✅ LUÔN char-level như lúc build dataset
|
| 110 |
+
tokens = list(text)
|
| 111 |
|
| 112 |
ids = []
|
| 113 |
missing = []
|
| 114 |
+
for ch in tokens:
|
| 115 |
+
if ch in self.symbol_dict:
|
| 116 |
+
ids.append(self.symbol_dict[ch])
|
| 117 |
else:
|
| 118 |
+
missing.append(ch)
|
| 119 |
|
| 120 |
if self.debug and missing:
|
| 121 |
print(f"[TextCleaner] missing {len(missing)} symbols. sample={missing[:30]}")
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
|
| 127 |
+
|
| 128 |
class FilePathDataset(torch.utils.data.Dataset):
|
| 129 |
def __init__(
|
| 130 |
self,
|