chmielvu commited on
Commit
0b65f6d
·
verified ·
1 Parent(s): 322dce7

Add tokenizer compatibility shim

Browse files
Files changed (1) hide show
  1. sitecustomize.py +16 -0
sitecustomize.py CHANGED
@@ -5,6 +5,13 @@ try:
5
  except Exception: # pragma: no cover
6
  huggingface_hub = None
7
 
 
 
 
 
 
 
 
8
 
9
  if huggingface_hub is not None and not hasattr(huggingface_hub, "HfFolder"):
10
  class HfFolder:
@@ -27,3 +34,12 @@ if huggingface_hub is not None and not hasattr(huggingface_hub, "HfFolder"):
27
  cls.path_token.unlink()
28
 
29
  huggingface_hub.HfFolder = HfFolder
 
 
 
 
 
 
 
 
 
 
5
  except Exception: # pragma: no cover
6
  huggingface_hub = None
7
 
8
+ try:
9
+ from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
10
+ except Exception: # pragma: no cover
11
+ PreTrainedTokenizer = None
12
+ PreTrainedTokenizerBase = None
13
+ PreTrainedTokenizerFast = None
14
+
15
 
16
  if huggingface_hub is not None and not hasattr(huggingface_hub, "HfFolder"):
17
  class HfFolder:
 
34
  cls.path_token.unlink()
35
 
36
  huggingface_hub.HfFolder = HfFolder
37
+
38
+
39
+ def _compat_batch_encode_plus(self, *args, **kwargs):
40
+ return self(*args, **kwargs)
41
+
42
+
43
+ for tokenizer_cls in (PreTrainedTokenizerBase, PreTrainedTokenizer, PreTrainedTokenizerFast):
44
+ if tokenizer_cls is not None and not hasattr(tokenizer_cls, "batch_encode_plus"):
45
+ tokenizer_cls.batch_encode_plus = _compat_batch_encode_plus