drixo commited on
Commit
f4d6261
·
verified ·
1 Parent(s): 430a758

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +4 -10
tokenizer.py CHANGED
@@ -1,20 +1,14 @@
1
  import sentencepiece as spm
2
 
 
3
  class TTSTokenizer:
4
  def __init__(self, model_path):
5
  self.sp = spm.SentencePieceProcessor()
6
  self.sp.load(model_path)
7
 
8
  def encode(self, text):
9
- return self.sp.encode(text)
10
-
11
- def decode(self, ids):
12
- return self.sp.decode(ids)
13
 
14
- spm.SentencePieceTrainer.train(
15
- input="all_text.txt",
16
- model_prefix="tts_tokenizer",
17
- vocab_size=8000,
18
- model_type="unigram"
19
- )
20
 
 
1
  import sentencepiece as spm
2
 
3
+
4
  class TTSTokenizer:
5
  def __init__(self, model_path):
6
  self.sp = spm.SentencePieceProcessor()
7
  self.sp.load(model_path)
8
 
9
  def encode(self, text):
10
+ return self.sp.encode(text, out_type=int)
 
 
 
11
 
12
+ def decode(self, tokens):
13
+ return self.sp.decode(tokens)
 
 
 
 
14