rajthakkar123 commited on
Commit
21c2331
·
verified ·
1 Parent(s): 0497d7b

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +5 -5
config.json CHANGED
@@ -29,7 +29,7 @@
29
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
30
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
31
  },
32
- "aux_loss_alpha": 0.001,
33
  "bad_words_ids": null,
34
  "begin_suppress_tokens": null,
35
  "bos_token_id": 163584,
@@ -69,16 +69,16 @@
69
  "min_length": 0,
70
  "model_type": "kimi_k2",
71
  "moe_intermediate_size": 2048,
72
- "moe_layer_freq": 1,
73
  "n_group": 1,
74
- "n_routed_experts": 384,
75
  "n_shared_experts": 1,
76
  "no_repeat_ngram_size": 0,
77
  "norm_topk_prob": true,
78
  "num_attention_heads": 64,
79
  "num_beam_groups": 1,
80
  "num_beams": 1,
81
- "num_experts_per_tok": 8,
82
  "num_hidden_layers": 61,
83
  "num_key_value_heads": 64,
84
  "num_nextn_predict_layers": 0,
@@ -112,7 +112,7 @@
112
  "routed_scaling_factor": 2.827,
113
  "scoring_func": "sigmoid",
114
  "sep_token_id": null,
115
- "seq_aux": true,
116
  "suppress_tokens": null,
117
  "task_specific_params": null,
118
  "temperature": 1.0,
 
29
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
30
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
31
  },
32
+ "aux_loss_alpha": 0.0,
33
  "bad_words_ids": null,
34
  "begin_suppress_tokens": null,
35
  "bos_token_id": 163584,
 
69
  "min_length": 0,
70
  "model_type": "kimi_k2",
71
  "moe_intermediate_size": 2048,
72
+ "moe_layer_freq": 0,
73
  "n_group": 1,
74
+ "n_routed_experts": 0,
75
  "n_shared_experts": 1,
76
  "no_repeat_ngram_size": 0,
77
  "norm_topk_prob": true,
78
  "num_attention_heads": 64,
79
  "num_beam_groups": 1,
80
  "num_beams": 1,
81
+ "num_experts_per_tok": 0,
82
  "num_hidden_layers": 61,
83
  "num_key_value_heads": 64,
84
  "num_nextn_predict_layers": 0,
 
112
  "routed_scaling_factor": 2.827,
113
  "scoring_func": "sigmoid",
114
  "sep_token_id": null,
115
+ "seq_aux": false,
116
  "suppress_tokens": null,
117
  "task_specific_params": null,
118
  "temperature": 1.0,