stephenhoang commited on
Commit
56d1e65
·
verified ·
1 Parent(s): 3b02d3a

Update meldataset.py

Browse files
Files changed (1) hide show
  1. meldataset.py +8 -5
meldataset.py CHANGED
@@ -97,6 +97,7 @@ def preprocess(wave: np.ndarray, sample_rate: int = 24000):
97
  mel = (torch.log(mel + 1e-5) - mean) / std
98
  return mel.unsqueeze(0) # (1, n_mels, T)
99
 
 
100
  class TextCleaner:
101
  def __init__(self, symbol_dict, debug=True):
102
  self.symbol_dict = symbol_dict
@@ -105,15 +106,16 @@ class TextCleaner:
105
  def __call__(self, text: str):
106
  text = (text or "").strip()
107
 
108
- tokens = list(text) # ✅ luôn char-level
 
109
 
110
  ids = []
111
  missing = []
112
- for t in tokens:
113
- if t in self.symbol_dict:
114
- ids.append(self.symbol_dict[t])
115
  else:
116
- missing.append(t)
117
 
118
  if self.debug and missing:
119
  print(f"[TextCleaner] missing {len(missing)} symbols. sample={missing[:30]}")
@@ -122,6 +124,7 @@ class TextCleaner:
122
 
123
 
124
 
 
125
  class FilePathDataset(torch.utils.data.Dataset):
126
  def __init__(
127
  self,
 
97
  mel = (torch.log(mel + 1e-5) - mean) / std
98
  return mel.unsqueeze(0) # (1, n_mels, T)
99
 
100
+
101
  class TextCleaner:
102
  def __init__(self, symbol_dict, debug=True):
103
  self.symbol_dict = symbol_dict
 
106
  def __call__(self, text: str):
107
  text = (text or "").strip()
108
 
109
+ # ✅ LUÔN char-level như lúc build dataset
110
+ tokens = list(text)
111
 
112
  ids = []
113
  missing = []
114
+ for ch in tokens:
115
+ if ch in self.symbol_dict:
116
+ ids.append(self.symbol_dict[ch])
117
  else:
118
+ missing.append(ch)
119
 
120
  if self.debug and missing:
121
  print(f"[TextCleaner] missing {len(missing)} symbols. sample={missing[:30]}")
 
124
 
125
 
126
 
127
+
128
  class FilePathDataset(torch.utils.data.Dataset):
129
  def __init__(
130
  self,