添加 QiTianTokenizerFast 类及其配置,移除 QiTianTokenizer 类的引用并完善 QiTianTokenizerFast 的配置
Browse files- chat_template.jinja +5 -0
- tokenizer.py +1 -6
- tokenizer_config.json +1 -1
chat_template.jinja
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% for message in messages %}<|{{ message['role'] }}|>:
|
| 2 |
+
{% if bos_token %}{{ bos_token }}{% endif %}{{ message['content'] }}{% if eos_token %}{{ eos_token }}
|
| 3 |
+
{% else %}
|
| 4 |
+
{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>:
|
| 5 |
+
{% if bos_token %}{{ bos_token }}{% endif %}{% endif %}
|
tokenizer.py
CHANGED
|
@@ -1,9 +1,4 @@
|
|
| 1 |
-
from transformers import
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
class QiTianTokenizer(PreTrainedTokenizer):
|
| 5 |
-
""" QiTianTokenizer """
|
| 6 |
-
model_input_names: list[str] = ["input_ids", "attention_mask"]
|
| 7 |
|
| 8 |
|
| 9 |
class QiTianTokenizerFast(PreTrainedTokenizerFast):
|
|
|
|
| 1 |
+
from transformers import PreTrainedTokenizerFast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class QiTianTokenizerFast(PreTrainedTokenizerFast):
|
tokenizer_config.json
CHANGED
|
@@ -73,7 +73,7 @@
|
|
| 73 |
],
|
| 74 |
"auto_map": {
|
| 75 |
"AutoTokenizer": [
|
| 76 |
-
|
| 77 |
"tokenizer.QiTianTokenizerFast"
|
| 78 |
]
|
| 79 |
},
|
|
|
|
| 73 |
],
|
| 74 |
"auto_map": {
|
| 75 |
"AutoTokenizer": [
|
| 76 |
+
null,
|
| 77 |
"tokenizer.QiTianTokenizerFast"
|
| 78 |
]
|
| 79 |
},
|