chenjiel commited on
Commit
8fb2682
Β·
unverified Β·
1 Parent(s): b903984
config.json CHANGED
@@ -87,7 +87,7 @@
87
  "router_aux_loss_coef": 0.001,
88
  "shared_expert_intermediate_size": 512,
89
  "tie_word_embeddings": false,
90
- "transformers_version": "4.57.3",
91
  "use_cache": true,
92
  "use_sliding_window": false,
93
  "vocab_size": 151936,
@@ -352,7 +352,8 @@
352
  "model.layers.9.linear_attn.in_proj_ba",
353
  "model.layers.9.linear_attn.in_proj_qkvz",
354
  "model.layers.9.mlp.gate",
355
- "model.layers.9.mlp.shared_expert_gate"
 
356
  ],
357
  "quant_algo": "NVFP4",
358
  "kv_cache_scheme": {
@@ -362,7 +363,7 @@
362
  },
363
  "producer": {
364
  "name": "modelopt",
365
- "version": "0.0.1.dev320+g53a2ddeba.d20251211"
366
  },
367
  "quant_method": "modelopt"
368
  }
 
87
  "router_aux_loss_coef": 0.001,
88
  "shared_expert_intermediate_size": 512,
89
  "tie_word_embeddings": false,
90
+ "transformers_version": "4.57.1",
91
  "use_cache": true,
92
  "use_sliding_window": false,
93
  "vocab_size": 151936,
 
352
  "model.layers.9.linear_attn.in_proj_ba",
353
  "model.layers.9.linear_attn.in_proj_qkvz",
354
  "model.layers.9.mlp.gate",
355
+ "model.layers.9.mlp.shared_expert_gate",
356
+ "mtp.layers.0*"
357
  ],
358
  "quant_algo": "NVFP4",
359
  "kv_cache_scheme": {
 
363
  },
364
  "producer": {
365
  "name": "modelopt",
366
+ "version": "0.0.1.dev443+gc9e06886e.d20260206"
367
  },
368
  "quant_method": "modelopt"
369
  }
generation_config.json CHANGED
@@ -2,12 +2,12 @@
2
  "bos_token_id": 151643,
3
  "do_sample": true,
4
  "eos_token_id": [
5
- 151645,
6
- 151643
7
  ],
8
  "pad_token_id": 151643,
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
- "transformers_version": "4.57.3"
13
- }
 
2
  "bos_token_id": 151643,
3
  "do_sample": true,
4
  "eos_token_id": [
5
+ 151645,
6
+ 151643
7
  ],
8
  "pad_token_id": 151643,
9
  "temperature": 0.7,
10
  "top_k": 20,
11
  "top_p": 0.8,
12
+ "transformers_version": "4.57.0.dev0"
13
+ }
hf_quant_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "producer": {
3
  "name": "modelopt",
4
- "version": "0.0.1.dev320+g53a2ddeba.d20251211"
5
  },
6
  "quantization": {
7
  "quant_algo": "NVFP4",
@@ -248,7 +248,8 @@
248
  "model.layers.9.linear_attn.in_proj_ba",
249
  "model.layers.9.linear_attn.in_proj_qkvz",
250
  "model.layers.9.mlp.gate",
251
- "model.layers.9.mlp.shared_expert_gate"
 
252
  ]
253
  }
254
  }
 
1
  {
2
  "producer": {
3
  "name": "modelopt",
4
+ "version": "0.0.1.dev443+gc9e06886e.d20260206"
5
  },
6
  "quantization": {
7
  "quant_algo": "NVFP4",
 
248
  "model.layers.9.linear_attn.in_proj_ba",
249
  "model.layers.9.linear_attn.in_proj_qkvz",
250
  "model.layers.9.mlp.gate",
251
+ "model.layers.9.mlp.shared_expert_gate",
252
+ "mtp.layers.0*"
253
  ]
254
  }
255
  }
model-00001-of-00010.safetensors β†’ model-00001-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ea7ef4556da0354da45327dbfb73b3745af537268f6f139a6d9b8c6ab615085
3
  size 5003036968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2432c420ab12ae5109e63bec3a12076c9651bf717245a59ceee00d299725a08b
3
  size 5003036968
model-00002-of-00010.safetensors β†’ model-00002-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c1ea498dc43e31131ec03b490462034b3cf88cfbe83258042633855b0ec3a78
3
  size 5003483960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0be1603d7a3512b643e0af4632b2cc5e568a8cc07c618d779646e91a42a60086
3
  size 5003483960
model-00003-of-00010.safetensors β†’ model-00003-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3018ed11a41eed94ced4d54a7701d2212d490fdd5ed4f194a0848b2a5bdcd21f
3
  size 5003514400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38337b058eb06d6290a5d3c407b452921bfc351c25293882f77fabe6936f4c9
3
  size 5003514400
model-00004-of-00010.safetensors β†’ model-00004-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a156886d74cec515b33b6cb3854526f35778f3e0b92f7c582bf866d677a244a3
3
  size 5003755712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad60f38a2fbfe2aea0bae6d6efafa80bc496984d832430a08503a0349c824e3
3
  size 5003755712
model-00005-of-00010.safetensors β†’ model-00005-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a8cacbaa6024b00a2590ba19bfbe1a6a4b56330dcc50cf96191759dccfb4eb1
3
  size 5003581304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91683c98c4594079005cb2eaf35f45945f1a1a22a12daba8962203665893436f
3
  size 5003581304
model-00006-of-00010.safetensors β†’ model-00006-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fafcd653de96fd4864acada3b8abe4000de6e5eeaf1a848775ef598bdde39bf
3
  size 5003516056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c19190847278b7ba2bafe1928bf7566162bcb1534825accb98116bc7ddfff8fd
3
  size 5003516056
model-00007-of-00010.safetensors β†’ model-00007-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a5d226c87ade2dfa1b3a99f1521fb19ec7f8e9c4c7165c450a45f7457a17d6
3
  size 5003593008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35c51ab71581678a014b0c9350495439ac7b662d90b7fe02fee8ed1c05cfb7a8
3
  size 5003593008
model-00008-of-00010.safetensors β†’ model-00008-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b43b15d1da43f381ce283ae9e728438caf38093f73175d0fca8c08f45599605
3
  size 5003516072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c20ca2472189edba57b2e7ee410da91782f293654f29fa82cce6390d3e05591e
3
  size 5003516072
model-00009-of-00010.safetensors β†’ model-00009-of-00011.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c72461f3b0b76a5125c88f24a4e080916e3cd816569ad22ca644c0d538d714
3
  size 5003744824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59c7b3a6c88d7a1e6979858dcee4e40dbdf0e005bb8b284d62beaa70efc84f22
3
  size 5003744824
model-00010-of-00010.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ec3f7e8aa5cd222cfc0e0f44c51152cfad491312c4f1b073e1928693c6eebb3
3
- size 2424868248
 
 
 
 
model-00010-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d5a6161676e0c3b7f398edb9424d3476391933a73cb44401bdcd3d12ff3eea
3
+ size 5000330496
model-00011-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57215c4d31f794dd3868e62941492d33df28054feb38dee1a1f32d65481216e1
3
+ size 725675520
model.safetensors.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50f99846ba972853a3171baf1349e5b51be76effe30d5ec1e412e714439ea64e
3
- size 28326603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c2f67f083110def2b0d727e2b90b9eeb5179b7cc400a450880da4d04abb47d
3
+ size 28463294