sanjeevnv commited on
Commit
26f81d5
·
verified ·
1 Parent(s): 7a287ad

Add generation markers around answer content for optional SFT loss masking

Browse files
Files changed (2) hide show
  1. chat_template.jinja +3 -3
  2. tokenizer_config.json +1 -1
chat_template.jinja CHANGED
@@ -1,11 +1,11 @@
1
- {#- Simple pretraining chat template: question/answer format, all tokens trainable.
2
- No chat-ML markers, no thinking tags, no loss masking. -#}
3
  {%- for message in messages %}
4
  {%- if message.role == "system" %}
5
  {{- message.content }}
6
  {%- elif message.role == "user" %}
7
  {{- "\nquestion: " + message.content }}
8
  {%- elif message.role == "assistant" %}
9
- {{- "\nanswer: " + message.content }}
10
  {%- endif %}
11
  {%- endfor %}
 
1
+ {#- Pretraining chat template: question/answer format with generation markers.
2
+ Energon chooses whether to apply SFT loss masking or train on all tokens. -#}
3
  {%- for message in messages %}
4
  {%- if message.role == "system" %}
5
  {{- message.content }}
6
  {%- elif message.role == "user" %}
7
  {{- "\nquestion: " + message.content }}
8
  {%- elif message.role == "assistant" %}
9
+ {{- "\nanswer: " }}{% generation %}{{- message.content }}{% endgeneration %}
10
  {%- endif %}
11
  {%- endfor %}
tokenizer_config.json CHANGED
@@ -8015,5 +8015,5 @@
8015
  "model_max_length": 262144,
8016
  "tokenizer_class": "PreTrainedTokenizerFast",
8017
  "unk_token": "<unk>",
8018
- "chat_template": "{#- Simple pretraining chat template: question/answer format, all tokens trainable.\n No chat-ML markers, no thinking tags, no loss masking. -#}\n{%- for message in messages %}\n{%- if message.role == \"system\" %}\n{{- message.content }}\n{%- elif message.role == \"user\" %}\n{{- \"\\nquestion: \" + message.content }}\n{%- elif message.role == \"assistant\" %}\n{{- \"\\nanswer: \" + message.content }}\n{%- endif %}\n{%- endfor %}\n"
8019
  }
 
8015
  "model_max_length": 262144,
8016
  "tokenizer_class": "PreTrainedTokenizerFast",
8017
  "unk_token": "<unk>",
8018
+ "chat_template": "{#- Pretraining chat template: question/answer format with generation markers.\n Energon chooses whether to apply SFT loss masking or train on all tokens. -#}\n{%- for message in messages %}\n{%- if message.role == \"system\" %}\n{{- message.content }}\n{%- elif message.role == \"user\" %}\n{{- \"\\nquestion: \" + message.content }}\n{%- elif message.role == \"assistant\" %}\n{{- \"\\nanswer: \" }}{% generation %}{{- message.content }}{% endgeneration %}\n{%- endif %}\n{%- endfor %}\n"
8019
  }