higgsfield commited on
Commit
544b080
·
1 Parent(s): cac413b

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -3
tokenizer_config.json CHANGED
@@ -28,9 +28,9 @@
28
  },
29
  "32000": {
30
  "content": "<|pad|>",
31
- "lstrip": false,
32
  "normalized": false,
33
- "rstrip": false,
34
  "single_word": false,
35
  "special": true
36
  }
@@ -41,6 +41,7 @@
41
  "</s>"
42
  ],
43
  "bos_token": "<s>",
 
44
  "clean_up_tokenization_spaces": false,
45
  "eos_token": "</s>",
46
  "legacy": true,
@@ -50,7 +51,7 @@
50
  "sp_model_kwargs": {},
51
  "spaces_between_special_tokens": false,
52
  "tokenizer_class": "LlamaTokenizer",
53
- "tokenizer_file": "/home/nonroot/.cache/trainagents/mistralai/Mistral-7B-v0.1/models--mistralai--Mistral-7B-v0.1/snapshots/5e9c98b96d071dce59368012254c55b0ec6f8658/tokenizer.json",
54
  "unk_token": "<unk>",
55
  "use_default_system_prompt": true
56
  }
 
28
  },
29
  "32000": {
30
  "content": "<|pad|>",
31
+ "lstrip": true,
32
  "normalized": false,
33
+ "rstrip": true,
34
  "single_word": false,
35
  "special": true
36
  }
 
41
  "</s>"
42
  ],
43
  "bos_token": "<s>",
44
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
45
  "clean_up_tokenization_spaces": false,
46
  "eos_token": "</s>",
47
  "legacy": true,
 
51
  "sp_model_kwargs": {},
52
  "spaces_between_special_tokens": false,
53
  "tokenizer_class": "LlamaTokenizer",
54
+ "tokenizer_file": "/home/nonroot/.cache/huggingface/hub/models--mistralai--Mistral-7B-v0.1/snapshots/5e9c98b96d071dce59368012254c55b0ec6f8658/tokenizer.json",
55
  "unk_token": "<unk>",
56
  "use_default_system_prompt": true
57
  }