themathys commited on
Commit
68e1591
·
verified ·
1 Parent(s): 3e4b9aa

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -29
  2. tokenizer_config.json +2 -2
tokenizer.json CHANGED
@@ -6953,12 +6953,6 @@
6953
  "post_processor": {
6954
  "type": "TemplateProcessing",
6955
  "single": [
6956
- {
6957
- "SpecialToken": {
6958
- "id": "<s>",
6959
- "type_id": 0
6960
- }
6961
- },
6962
  {
6963
  "Sequence": {
6964
  "id": "A",
@@ -6967,24 +6961,12 @@
6967
  }
6968
  ],
6969
  "pair": [
6970
- {
6971
- "SpecialToken": {
6972
- "id": "<s>",
6973
- "type_id": 0
6974
- }
6975
- },
6976
  {
6977
  "Sequence": {
6978
  "id": "A",
6979
  "type_id": 0
6980
  }
6981
  },
6982
- {
6983
- "SpecialToken": {
6984
- "id": "<s>",
6985
- "type_id": 1
6986
- }
6987
- },
6988
  {
6989
  "Sequence": {
6990
  "id": "B",
@@ -6992,17 +6974,7 @@
6992
  }
6993
  }
6994
  ],
6995
- "special_tokens": {
6996
- "<s>": {
6997
- "id": "<s>",
6998
- "ids": [
6999
- 1
7000
- ],
7001
- "tokens": [
7002
- "<s>"
7003
- ]
7004
- }
7005
- }
7006
  },
7007
  "decoder": {
7008
  "type": "Sequence",
 
6953
  "post_processor": {
6954
  "type": "TemplateProcessing",
6955
  "single": [
 
 
 
 
 
 
6956
  {
6957
  "Sequence": {
6958
  "id": "A",
 
6961
  }
6962
  ],
6963
  "pair": [
 
 
 
 
 
 
6964
  {
6965
  "Sequence": {
6966
  "id": "A",
6967
  "type_id": 0
6968
  }
6969
  },
 
 
 
 
 
 
6970
  {
6971
  "Sequence": {
6972
  "id": "B",
 
6974
  }
6975
  }
6976
  ],
6977
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
6978
  },
6979
  "decoder": {
6980
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "add_bos_token": true,
3
  "add_eos_token": false,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
@@ -6174,7 +6174,7 @@
6174
  },
6175
  "bos_token": "<s>",
6176
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ bos_token }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + eos_token }}{% endif %}{% endfor %}",
6177
- "clean_up_tokenization_spaces": false,
6178
  "eos_token": "</s>",
6179
  "legacy": false,
6180
  "model_max_length": 1000000000000000019884624838656,
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
 
6174
  },
6175
  "bos_token": "<s>",
6176
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ bos_token }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + eos_token }}{% endif %}{% endfor %}",
6177
+ "clean_up_tokenization_spaces": true,
6178
  "eos_token": "</s>",
6179
  "legacy": false,
6180
  "model_max_length": 1000000000000000019884624838656,