Upload tokenizer
Browse files- README.md +1 -1
- tokenizer_config.json +1 -0
README.md
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
---
|
| 2 |
-
license: cc-by-nc-4.0
|
| 3 |
language:
|
| 4 |
- en
|
| 5 |
- de
|
|
@@ -11,6 +10,7 @@ language:
|
|
| 11 |
- ko
|
| 12 |
- it
|
| 13 |
- es
|
|
|
|
| 14 |
metrics:
|
| 15 |
- comet
|
| 16 |
pipeline_tag: translation
|
|
|
|
| 1 |
---
|
|
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
- de
|
|
|
|
| 10 |
- ko
|
| 11 |
- it
|
| 12 |
- es
|
| 13 |
+
license: cc-by-nc-4.0
|
| 14 |
metrics:
|
| 15 |
- comet
|
| 16 |
pipeline_tag: translation
|
tokenizer_config.json
CHANGED
|
@@ -84,6 +84,7 @@
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
"bos_token": "<s>",
|
|
|
|
| 87 |
"clean_up_tokenization_spaces": false,
|
| 88 |
"cls_token": "<CLS>",
|
| 89 |
"eos_token": "<|im_end|>",
|
|
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
"bos_token": "<s>",
|
| 87 |
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
| 88 |
"clean_up_tokenization_spaces": false,
|
| 89 |
"cls_token": "<CLS>",
|
| 90 |
"eos_token": "<|im_end|>",
|