Wimflorijn commited on
Commit
df85935
·
verified ·
1 Parent(s): aca99c3

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -2
  2. tokenizer_config.json +2 -0
tokenizer.json CHANGED
@@ -1023,7 +1023,8 @@
1023
  {
1024
  "type": "Metaspace",
1025
  "replacement": "▁",
1026
- "add_prefix_space": true
 
1027
  },
1028
  {
1029
  "type": "Digits",
@@ -1080,7 +1081,8 @@
1080
  "decoder": {
1081
  "type": "Metaspace",
1082
  "replacement": "▁",
1083
- "add_prefix_space": true
 
1084
  },
1085
  "model": {
1086
  "type": "Unigram",
 
1023
  {
1024
  "type": "Metaspace",
1025
  "replacement": "▁",
1026
+ "prepend_scheme": "always",
1027
+ "split": true
1028
  },
1029
  {
1030
  "type": "Digits",
 
1081
  "decoder": {
1082
  "type": "Metaspace",
1083
  "replacement": "▁",
1084
+ "prepend_scheme": "always",
1085
+ "split": true
1086
  },
1087
  "model": {
1088
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -987,6 +988,7 @@
987
  "clean_up_tokenization_spaces": true,
988
  "eos_token": "</s>",
989
  "extra_ids": 100,
 
990
  "max_length": 1024,
991
  "model_max_length": 1000000000000000019884624838656,
992
  "pad_to_multiple_of": null,
 
1
  {
2
+ "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
988
  "clean_up_tokenization_spaces": true,
989
  "eos_token": "</s>",
990
  "extra_ids": 100,
991
+ "extra_special_tokens": {},
992
  "max_length": 1024,
993
  "model_max_length": 1000000000000000019884624838656,
994
  "pad_to_multiple_of": null,