Upload config.json with huggingface_hub
Browse files- config.json +0 -3
config.json
CHANGED
|
@@ -9,7 +9,6 @@
|
|
| 9 |
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 10 |
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 11 |
},
|
| 12 |
-
"aux_loss_alpha": 0.001,
|
| 13 |
"bos_token_id": 0,
|
| 14 |
"eos_token_id": 1,
|
| 15 |
"ep_size": 1,
|
|
@@ -32,7 +31,6 @@
|
|
| 32 |
"num_hidden_layers": 61,
|
| 33 |
"num_key_value_heads": 128,
|
| 34 |
"num_nextn_predict_layers": 1,
|
| 35 |
-
"pretraining_tp": 1,
|
| 36 |
"q_lora_rank": 1536,
|
| 37 |
"qk_nope_head_dim": 128,
|
| 38 |
"qk_rope_head_dim": 64,
|
|
@@ -58,7 +56,6 @@
|
|
| 58 |
"rope_theta": 10000,
|
| 59 |
"routed_scaling_factor": 2.5,
|
| 60 |
"scoring_func": "sigmoid",
|
| 61 |
-
"seq_aux": true,
|
| 62 |
"tie_word_embeddings": false,
|
| 63 |
"topk_group": 4,
|
| 64 |
"topk_method": "noaux_tc",
|
|
|
|
| 9 |
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
| 10 |
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
| 11 |
},
|
|
|
|
| 12 |
"bos_token_id": 0,
|
| 13 |
"eos_token_id": 1,
|
| 14 |
"ep_size": 1,
|
|
|
|
| 31 |
"num_hidden_layers": 61,
|
| 32 |
"num_key_value_heads": 128,
|
| 33 |
"num_nextn_predict_layers": 1,
|
|
|
|
| 34 |
"q_lora_rank": 1536,
|
| 35 |
"qk_nope_head_dim": 128,
|
| 36 |
"qk_rope_head_dim": 64,
|
|
|
|
| 56 |
"rope_theta": 10000,
|
| 57 |
"routed_scaling_factor": 2.5,
|
| 58 |
"scoring_func": "sigmoid",
|
|
|
|
| 59 |
"tie_word_embeddings": false,
|
| 60 |
"topk_group": 4,
|
| 61 |
"topk_method": "noaux_tc",
|