amnae commited on
Commit
7676e98
·
verified ·
1 Parent(s): 5863ca1

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +27 -0
  2. tokenizer_config.json +2 -1
tokenizer.json CHANGED
@@ -6964,6 +6964,12 @@
6964
  "id": "A",
6965
  "type_id": 0
6966
  }
 
 
 
 
 
 
6967
  }
6968
  ],
6969
  "pair": [
@@ -6979,6 +6985,12 @@
6979
  "type_id": 0
6980
  }
6981
  },
 
 
 
 
 
 
6982
  {
6983
  "SpecialToken": {
6984
  "id": "<s>",
@@ -6990,9 +7002,24 @@
6990
  "id": "B",
6991
  "type_id": 1
6992
  }
 
 
 
 
 
 
6993
  }
6994
  ],
6995
  "special_tokens": {
 
 
 
 
 
 
 
 
 
6996
  "<s>": {
6997
  "id": "<s>",
6998
  "ids": [
 
6964
  "id": "A",
6965
  "type_id": 0
6966
  }
6967
+ },
6968
+ {
6969
+ "SpecialToken": {
6970
+ "id": "</s>",
6971
+ "type_id": 0
6972
+ }
6973
  }
6974
  ],
6975
  "pair": [
 
6985
  "type_id": 0
6986
  }
6987
  },
6988
+ {
6989
+ "SpecialToken": {
6990
+ "id": "</s>",
6991
+ "type_id": 0
6992
+ }
6993
+ },
6994
  {
6995
  "SpecialToken": {
6996
  "id": "<s>",
 
7002
  "id": "B",
7003
  "type_id": 1
7004
  }
7005
+ },
7006
+ {
7007
+ "SpecialToken": {
7008
+ "id": "</s>",
7009
+ "type_id": 1
7010
+ }
7011
  }
7012
  ],
7013
  "special_tokens": {
7014
+ "</s>": {
7015
+ "id": "</s>",
7016
+ "ids": [
7017
+ 2
7018
+ ],
7019
+ "tokens": [
7020
+ "</s>"
7021
+ ]
7022
+ },
7023
  "<s>": {
7024
  "id": "<s>",
7025
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
@@ -6179,6 +6179,7 @@
6179
  "legacy": false,
6180
  "model_max_length": 1000000000000000019884624838656,
6181
  "pad_token": "</s>",
 
6182
  "sp_model_kwargs": {},
6183
  "spaces_between_special_tokens": false,
6184
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
 
6179
  "legacy": false,
6180
  "model_max_length": 1000000000000000019884624838656,
6181
  "pad_token": "</s>",
6182
+ "padding_side": "right",
6183
  "sp_model_kwargs": {},
6184
  "spaces_between_special_tokens": false,
6185
  "tokenizer_class": "LlamaTokenizer",