codemurt commited on
Commit
55906f9
·
verified ·
1 Parent(s): 6c5b431

Update char_tokenizer.py

Browse files
Files changed (1) hide show
  1. char_tokenizer.py +2 -12
char_tokenizer.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Copypasted from
3
  https://huggingface.co/IlyaGusev/ru-word-stress-transformer/blob/main/char_tokenizer.py
4
  with Apache 2.0 license
5
  """
@@ -39,17 +39,7 @@ class CharTokenizer(PreTrainedTokenizer):
39
  do_lower_case=False,
40
  *args,
41
  **kwargs
42
- ):
43
- self.do_lower_case = do_lower_case
44
- self.space_token = space_token
45
-
46
- if not vocab_file or not os.path.isfile(vocab_file):
47
- self.vocab = OrderedDict()
48
- self.ids_to_tokens = OrderedDict()
49
- else:
50
- self.vocab = load_vocab(vocab_file)
51
- self.ids_to_tokens = OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
52
-
53
  super().__init__(
54
  pad_token=pad_token,
55
  unk_token=unk_token,
 
1
  """
2
+ Copypasted and updated from
3
  https://huggingface.co/IlyaGusev/ru-word-stress-transformer/blob/main/char_tokenizer.py
4
  with Apache 2.0 license
5
  """
 
39
  do_lower_case=False,
40
  *args,
41
  **kwargs
42
+ ):
 
 
 
 
 
 
 
 
 
 
43
  super().__init__(
44
  pad_token=pad_token,
45
  unk_token=unk_token,