Update config.json
Browse files- config.json +5 -5
config.json
CHANGED
|
@@ -29,7 +29,7 @@
|
|
| 29 |
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 30 |
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 31 |
},
|
| 32 |
-
"aux_loss_alpha": 0.
|
| 33 |
"bad_words_ids": null,
|
| 34 |
"begin_suppress_tokens": null,
|
| 35 |
"bos_token_id": 163584,
|
|
@@ -69,16 +69,16 @@
|
|
| 69 |
"min_length": 0,
|
| 70 |
"model_type": "kimi_k2",
|
| 71 |
"moe_intermediate_size": 2048,
|
| 72 |
-
"moe_layer_freq":
|
| 73 |
"n_group": 1,
|
| 74 |
-
"n_routed_experts":
|
| 75 |
"n_shared_experts": 1,
|
| 76 |
"no_repeat_ngram_size": 0,
|
| 77 |
"norm_topk_prob": true,
|
| 78 |
"num_attention_heads": 64,
|
| 79 |
"num_beam_groups": 1,
|
| 80 |
"num_beams": 1,
|
| 81 |
-
"num_experts_per_tok":
|
| 82 |
"num_hidden_layers": 61,
|
| 83 |
"num_key_value_heads": 64,
|
| 84 |
"num_nextn_predict_layers": 0,
|
|
@@ -112,7 +112,7 @@
|
|
| 112 |
"routed_scaling_factor": 2.827,
|
| 113 |
"scoring_func": "sigmoid",
|
| 114 |
"sep_token_id": null,
|
| 115 |
-
"seq_aux":
|
| 116 |
"suppress_tokens": null,
|
| 117 |
"task_specific_params": null,
|
| 118 |
"temperature": 1.0,
|
|
|
|
| 29 |
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 30 |
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 31 |
},
|
| 32 |
+
"aux_loss_alpha": 0.0,
|
| 33 |
"bad_words_ids": null,
|
| 34 |
"begin_suppress_tokens": null,
|
| 35 |
"bos_token_id": 163584,
|
|
|
|
| 69 |
"min_length": 0,
|
| 70 |
"model_type": "kimi_k2",
|
| 71 |
"moe_intermediate_size": 2048,
|
| 72 |
+
"moe_layer_freq": 0,
|
| 73 |
"n_group": 1,
|
| 74 |
+
"n_routed_experts": 0,
|
| 75 |
"n_shared_experts": 1,
|
| 76 |
"no_repeat_ngram_size": 0,
|
| 77 |
"norm_topk_prob": true,
|
| 78 |
"num_attention_heads": 64,
|
| 79 |
"num_beam_groups": 1,
|
| 80 |
"num_beams": 1,
|
| 81 |
+
"num_experts_per_tok": 0,
|
| 82 |
"num_hidden_layers": 61,
|
| 83 |
"num_key_value_heads": 64,
|
| 84 |
"num_nextn_predict_layers": 0,
|
|
|
|
| 112 |
"routed_scaling_factor": 2.827,
|
| 113 |
"scoring_func": "sigmoid",
|
| 114 |
"sep_token_id": null,
|
| 115 |
+
"seq_aux": false,
|
| 116 |
"suppress_tokens": null,
|
| 117 |
"task_specific_params": null,
|
| 118 |
"temperature": 1.0,
|