ccui46 commited on
Commit
8cd478e
·
verified ·
1 Parent(s): 053df30

Tokenizer for checkpoint at step 6000 (eval_loss=0.5764)

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. chat_template.jinja +45 -0
  3. tokenizer.json +3 -0
  4. tokenizer_config.json +16 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ 你是一个名为 ChatGLM 的人工智能助手。你是基于智谱 AI 公司训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。
5
+
6
+ # 可用工具
7
+ {%- for tool in tools %}
8
+ {%- set function = tool.function if tool.get("function") else tool %}
9
+
10
+ ## {{ function.name }}
11
+
12
+ {{ function | tojson(indent=4, ensure_ascii=False) }}
13
+ 在调用上述函数时,请使用 Json 格式表示调用的参数。
14
+ {%- endfor %}
15
+ {%- endif -%}
16
+
17
+ {%- for msg in messages %}
18
+ {%- if msg.role == 'system' %}
19
+ <|system|>
20
+ {{ msg.content }}
21
+ {%- endif %}
22
+ {%- endfor %}
23
+
24
+ {%- for message in messages if message.role != 'system' %}
25
+ {%- set role = message['role'] %}
26
+ {%- set content = message['content'] %}
27
+ {%- set visible = content.split('</think>')[-1].strip() %}
28
+ {%- set meta = message.get("metadata", "") %}
29
+
30
+ {%- if role == 'user' %}
31
+ <|user|>
32
+ {{ visible }}
33
+ {%- elif role == 'assistant' and not meta %}
34
+ <|assistant|>
35
+ {{ visible }}
36
+ {%- elif role == 'assistant' and meta %}
37
+ <|assistant|>{{ meta }}
38
+ {{ visible }}
39
+ {%- elif role == 'observation' %}
40
+ <|observation|>
41
+ {{ visible }}
42
+ {%- endif %}
43
+ {%- endfor %}
44
+ {% if add_generation_prompt %}<|assistant|>
45
+ <think>{% endif %}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c
3
+ size 19966496
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": false,
4
+ "do_lower_case": false,
5
+ "eos_token": "<|endoftext|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 128000,
12
+ "pad_token": "<|endoftext|>",
13
+ "padding_side": "left",
14
+ "remove_space": false,
15
+ "tokenizer_class": "TokenizersBackend"
16
+ }