Nahush-27 commited on
Commit
e3c9ac3
·
verified ·
1 Parent(s): 65b0a27

Push SFT GK model (MMLU 3k + NaturalReasoning 3k, LoRA r=64)

Browse files
Files changed (3) hide show
  1. config.json +3 -3
  2. generation_config.json +4 -3
  3. model.safetensors +1 -1
config.json CHANGED
@@ -4,7 +4,7 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": null,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
@@ -48,7 +48,7 @@
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
51
- "pad_token_id": 151643,
52
  "rms_norm_eps": 1e-06,
53
  "rope_parameters": {
54
  "rope_theta": 1000000,
@@ -57,7 +57,7 @@
57
  "sliding_window": null,
58
  "tie_word_embeddings": true,
59
  "transformers_version": "5.7.0",
60
- "use_cache": false,
61
  "use_sliding_window": false,
62
  "vocab_size": 151936
63
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
 
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
51
+ "pad_token_id": null,
52
  "rms_norm_eps": 1e-06,
53
  "rope_parameters": {
54
  "rope_theta": 1000000,
 
57
  "sliding_window": null,
58
  "tie_word_embeddings": true,
59
  "transformers_version": "5.7.0",
60
+ "use_cache": true,
61
  "use_sliding_window": false,
62
  "vocab_size": 151936
63
  }
generation_config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
  "bos_token_id": 151643,
3
- "do_sample": false,
4
  "eos_token_id": [
5
  151645,
6
  151643
7
  ],
8
- "max_new_tokens": 4096,
9
  "pad_token_id": 151643,
10
- "temperature": 1.0,
 
 
11
  "transformers_version": "5.7.0"
12
  }
 
1
  {
2
  "bos_token_id": 151643,
3
+ "do_sample": true,
4
  "eos_token_id": [
5
  151645,
6
  151643
7
  ],
 
8
  "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
  "transformers_version": "5.7.0"
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3c236dc027072dd89cc3f6efbe0fad372a064c28fe8b028d6603f9823b39acb
3
  size 3441185608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169ad53ec313c3a34b06c0809216e4fc072cce444a5d4ff2b59690d064130ed5
3
  size 3441185608