notzero commited on
Commit
8459591
·
verified ·
1 Parent(s): e9f8350

Trained with Unsloth

Browse files
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "hidden_size": 3584,
19
  "initializer_range": 0.02,
20
  "intermediate_size": 14336,
21
- "max_position_embeddings": 8192,
22
  "model_type": "gemma2",
23
  "num_attention_heads": 16,
24
  "num_hidden_layers": 42,
@@ -26,12 +26,15 @@
26
  "pad_token_id": 0,
27
  "query_pre_attn_scalar": 256,
28
  "rms_norm_eps": 1e-06,
29
- "rope_scaling": null,
 
 
 
30
  "rope_theta": 10000.0,
31
  "sliding_window": 4096,
32
  "sliding_window_size": 4096,
33
  "torch_dtype": "float16",
34
- "transformers_version": "4.50.3",
35
  "unsloth_version": "2025.3.19",
36
  "use_cache": true,
37
  "vocab_size": 256000
 
18
  "hidden_size": 3584,
19
  "initializer_range": 0.02,
20
  "intermediate_size": 14336,
21
+ "max_position_embeddings": 20000,
22
  "model_type": "gemma2",
23
  "num_attention_heads": 16,
24
  "num_hidden_layers": 42,
 
26
  "pad_token_id": 0,
27
  "query_pre_attn_scalar": 256,
28
  "rms_norm_eps": 1e-06,
29
+ "rope_scaling": {
30
+ "factor": 2.0,
31
+ "type": "linear"
32
+ },
33
  "rope_theta": 10000.0,
34
  "sliding_window": 4096,
35
  "sliding_window_size": 4096,
36
  "torch_dtype": "float16",
37
+ "transformers_version": "4.51.0",
38
  "unsloth_version": "2025.3.19",
39
  "use_cache": true,
40
  "vocab_size": 256000
generation_config.json CHANGED
@@ -6,7 +6,7 @@
6
  1,
7
  107
8
  ],
9
- "max_length": 8192,
10
  "pad_token_id": 0,
11
- "transformers_version": "4.50.3"
12
  }
 
6
  1,
7
  107
8
  ],
9
+ "max_length": 20000,
10
  "pad_token_id": 0,
11
+ "transformers_version": "4.51.0"
12
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7f20f5b85ade5813e32e9303e581d687c7f3d7c14be739b28a7d5627b0bf85
3
  size 4903351824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35bee6f5e48a50392c3d73dc0af91508c756fb67a88a604768659da6697010fe
3
  size 4903351824
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1bd7b2e67e4ec437788de35de02eee72a48fe049514177704b42d5c00018d34
3
  size 4947570728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2517d5d7fce7cecb80be7cae127f8c36e1265d6db3ed82e0d457f12b35ffaa18
3
  size 4947570728
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b53ed02c09f523752e4608415f084b2a5918d0b09cd97b45b54dc577b590f27
3
  size 4962221328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63ec077bc27a8f0ea0cbbd80176b0acfb8b750e0860a39a54752e5466fef14e
3
  size 4962221328
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c820d80f93d186a675d102fe04dcd0bcbf1448656c9f8529f21507519e7b7acb
3
  size 3670322096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2251161a07f923203632153ec296ade679f247c32d179efb909e95549e30e403
3
  size 3670322096