ace-1 commited on
Commit
16fed5d
·
verified ·
1 Parent(s): 24cf5c6

Fix transformers v5 auto_map + HF init

Browse files
tokenizer/hf_tokenizer.py CHANGED
@@ -16,6 +16,8 @@ class MGPT2Tokenizer(PreTrainedTokenizer):
16
  """
17
 
18
  model_input_names = ["input_ids", "attention_mask"]
 
 
19
 
20
  def __init__(self, model_file: str, **kwargs: Any):
21
  if not model_file.endswith(".model"):
 
16
  """
17
 
18
  model_input_names = ["input_ids", "attention_mask"]
19
+ # Let `PreTrainedTokenizer.from_pretrained()` know which file it should pass to `__init__`.
20
+ vocab_files_names = {"model_file": "tokenizer.model"}
21
 
22
  def __init__(self, model_file: str, **kwargs: Any):
23
  if not model_file.endswith(".model"):
tokenizer_config.json CHANGED
@@ -17,6 +17,9 @@
17
  "tokenizer_class": "MGPT2Tokenizer",
18
  "unk_token": null,
19
  "auto_map": {
20
- "AutoTokenizer": "tokenization_mgpt2.MGPT2Tokenizer"
 
 
 
21
  }
22
  }
 
17
  "tokenizer_class": "MGPT2Tokenizer",
18
  "unk_token": null,
19
  "auto_map": {
20
+ "AutoTokenizer": [
21
+ "tokenization_mgpt2.MGPT2Tokenizer",
22
+ null
23
+ ]
24
  }
25
  }