Sayan01 commited on
Commit
212787b
·
verified ·
1 Parent(s): 1cad518

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -10
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +10 -78
added_tokens.json CHANGED
@@ -1,12 +1,3 @@
1
  {
2
- "\t": 32109,
3
- "\n": 32103,
4
- " ": 32106,
5
- "<": 32101,
6
- "[PAD]": 32100,
7
- "\\": 32107,
8
- "^": 32108,
9
- "`": 32105,
10
- "{": 32102,
11
- "}": 32104
12
  }
 
1
  {
2
+ "[PAD]": 32100
 
 
 
 
 
 
 
 
 
3
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,13 @@
1
  {
2
- "add_prefix_space": true,
3
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
4
  "1": {
5
  "content": "</s>",
6
  "lstrip": false,
@@ -824,78 +831,6 @@
824
  "rstrip": false,
825
  "single_word": false,
826
  "special": true
827
- },
828
- "32101": {
829
- "content": "<",
830
- "lstrip": false,
831
- "normalized": true,
832
- "rstrip": false,
833
- "single_word": false,
834
- "special": false
835
- },
836
- "32102": {
837
- "content": "{",
838
- "lstrip": false,
839
- "normalized": true,
840
- "rstrip": false,
841
- "single_word": false,
842
- "special": false
843
- },
844
- "32103": {
845
- "content": "\n",
846
- "lstrip": false,
847
- "normalized": true,
848
- "rstrip": false,
849
- "single_word": false,
850
- "special": false
851
- },
852
- "32104": {
853
- "content": "}",
854
- "lstrip": false,
855
- "normalized": true,
856
- "rstrip": false,
857
- "single_word": false,
858
- "special": false
859
- },
860
- "32105": {
861
- "content": "`",
862
- "lstrip": false,
863
- "normalized": true,
864
- "rstrip": false,
865
- "single_word": false,
866
- "special": false
867
- },
868
- "32106": {
869
- "content": " ",
870
- "lstrip": false,
871
- "normalized": true,
872
- "rstrip": false,
873
- "single_word": false,
874
- "special": false
875
- },
876
- "32107": {
877
- "content": "\\",
878
- "lstrip": false,
879
- "normalized": true,
880
- "rstrip": false,
881
- "single_word": false,
882
- "special": false
883
- },
884
- "32108": {
885
- "content": "^",
886
- "lstrip": false,
887
- "normalized": true,
888
- "rstrip": false,
889
- "single_word": false,
890
- "special": false
891
- },
892
- "32109": {
893
- "content": "\t",
894
- "lstrip": false,
895
- "normalized": true,
896
- "rstrip": false,
897
- "single_word": false,
898
- "special": false
899
  }
900
  },
901
  "additional_special_tokens": [
@@ -1003,12 +938,9 @@
1003
  "clean_up_tokenization_spaces": true,
1004
  "eos_token": "</s>",
1005
  "extra_ids": 100,
1006
- "legacy": true,
1007
- "model_max_length": 2048,
1008
  "pad_token": "[PAD]",
1009
- "padding_side": "right",
1010
  "sp_model_kwargs": {},
1011
  "tokenizer_class": "T5Tokenizer",
1012
- "unk_token": "<unk>",
1013
- "use_fast": false
1014
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
  "1": {
12
  "content": "</s>",
13
  "lstrip": false,
 
831
  "rstrip": false,
832
  "single_word": false,
833
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
834
  }
835
  },
836
  "additional_special_tokens": [
 
938
  "clean_up_tokenization_spaces": true,
939
  "eos_token": "</s>",
940
  "extra_ids": 100,
941
+ "model_max_length": 512,
 
942
  "pad_token": "[PAD]",
 
943
  "sp_model_kwargs": {},
944
  "tokenizer_class": "T5Tokenizer",
945
+ "unk_token": "<unk>"
 
946
  }