kaistlayner commited on
Commit
42dc577
·
verified ·
1 Parent(s): bf49622

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +0 -7
tokenizer_config.json CHANGED
@@ -2003,18 +2003,11 @@
2003
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
- "max_length": 512,
2007
  "model_max_length": 1000000000000000019884624838656,
2008
- "pad_to_multiple_of": null,
2009
  "pad_token": "<pad>",
2010
- "pad_token_type_id": 0,
2011
- "padding_side": "left",
2012
  "sp_model_kwargs": {},
2013
  "spaces_between_special_tokens": false,
2014
- "stride": 0,
2015
  "tokenizer_class": "GemmaTokenizer",
2016
- "truncation_side": "right",
2017
- "truncation_strategy": "longest_first",
2018
  "unk_token": "<unk>",
2019
  "use_default_system_prompt": false
2020
  }
 
2003
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
 
2006
  "model_max_length": 1000000000000000019884624838656,
 
2007
  "pad_token": "<pad>",
 
 
2008
  "sp_model_kwargs": {},
2009
  "spaces_between_special_tokens": false,
 
2010
  "tokenizer_class": "GemmaTokenizer",
 
 
2011
  "unk_token": "<unk>",
2012
  "use_default_system_prompt": false
2013
  }