Salma204 commited on
Commit
c429f1e
·
verified ·
1 Parent(s): d7e7165

M3: GK SFT v3 fixed format + 99K MMLU examples

Browse files
chat_template.jinja CHANGED
@@ -1,5 +1,4 @@
1
  {%- set enable_thinking = true %}
2
- {%- set enable_thinking = false %}
3
  {%- if tools %}
4
  {{- '<|im_start|>system\n' }}
5
  {%- if messages[0].role == 'system' %}
 
1
  {%- set enable_thinking = true %}
 
2
  {%- if tools %}
3
  {{- '<|im_start|>system\n' }}
4
  {%- if messages[0].role == 'system' %}
config.json CHANGED
@@ -4,7 +4,7 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": null,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
@@ -48,7 +48,7 @@
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
51
- "pad_token_id": 151643,
52
  "rms_norm_eps": 1e-06,
53
  "rope_parameters": {
54
  "rope_theta": 1000000,
@@ -57,7 +57,7 @@
57
  "sliding_window": null,
58
  "tie_word_embeddings": true,
59
  "transformers_version": "5.7.0",
60
- "use_cache": false,
61
  "use_sliding_window": false,
62
  "vocab_size": 151936
63
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
 
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
51
+ "pad_token_id": null,
52
  "rms_norm_eps": 1e-06,
53
  "rope_parameters": {
54
  "rope_theta": 1000000,
 
57
  "sliding_window": null,
58
  "tie_word_embeddings": true,
59
  "transformers_version": "5.7.0",
60
+ "use_cache": true,
61
  "use_sliding_window": false,
62
  "vocab_size": 151936
63
  }
generation_config.json CHANGED
@@ -1,13 +1,9 @@
1
  {
2
- "bos_token_id": 151643,
3
  "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "pad_token_id": 151643,
9
- "temperature": 0.6,
10
- "top_k": 20,
11
  "top_p": 0.9,
12
- "transformers_version": "4.51.0"
 
 
 
13
  }
 
1
  {
 
2
  "do_sample": true,
3
+ "temperature": 0.4,
 
 
 
 
 
 
4
  "top_p": 0.9,
5
+ "top_k": 20,
6
+ "max_new_tokens": 96,
7
+ "pad_token_id": 151643,
8
+ "eos_token_id": 151645
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247b247aa1c5fe1a1ba15ed8a5ce1f206b8ceb9ef5e91674b53713280396604f
3
  size 3441185608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41914a093b6bd7ca3c2354ef94548a46c084ab2907d3c67c3d059c05ec3a387f
3
  size 3441185608