ace-1
/

mgpt2-tokenizer

ace-1 commited on Mar 7

Commit

16fed5d

verified ·

1 Parent(s): 24cf5c6

Fix transformers v5 auto_map + HF init

Files changed (2) hide show

tokenizer/hf_tokenizer.py CHANGED Viewed

@@ -16,6 +16,8 @@ class MGPT2Tokenizer(PreTrainedTokenizer):
     """
     model_input_names = ["input_ids", "attention_mask"]
     def __init__(self, model_file: str, **kwargs: Any):
         if not model_file.endswith(".model"):

     """
     model_input_names = ["input_ids", "attention_mask"]
+    # Let `PreTrainedTokenizer.from_pretrained()` know which file it should pass to `__init__`.
+    vocab_files_names = {"model_file": "tokenizer.model"}
     def __init__(self, model_file: str, **kwargs: Any):
         if not model_file.endswith(".model"):

tokenizer_config.json CHANGED Viewed

@@ -17,6 +17,9 @@
   "tokenizer_class": "MGPT2Tokenizer",
   "unk_token": null,
   "auto_map": {
-    "AutoTokenizer": "tokenization_mgpt2.MGPT2Tokenizer"
   }
 }

   "tokenizer_class": "MGPT2Tokenizer",
   "unk_token": null,
   "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_mgpt2.MGPT2Tokenizer",
+      null
+    ]
   }
 }