Victor1o commited on
Commit
8936d79
·
verified ·
1 Parent(s): 3db1c3d

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +0 -3
config.json CHANGED
@@ -9,7 +9,6 @@
9
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
12
- "aux_loss_alpha": 0.001,
13
  "bos_token_id": 0,
14
  "eos_token_id": 1,
15
  "ep_size": 1,
@@ -32,7 +31,6 @@
32
  "num_hidden_layers": 61,
33
  "num_key_value_heads": 128,
34
  "num_nextn_predict_layers": 1,
35
- "pretraining_tp": 1,
36
  "q_lora_rank": 1536,
37
  "qk_nope_head_dim": 128,
38
  "qk_rope_head_dim": 64,
@@ -58,7 +56,6 @@
58
  "rope_theta": 10000,
59
  "routed_scaling_factor": 2.5,
60
  "scoring_func": "sigmoid",
61
- "seq_aux": true,
62
  "tie_word_embeddings": false,
63
  "topk_group": 4,
64
  "topk_method": "noaux_tc",
 
9
  "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
  },
 
12
  "bos_token_id": 0,
13
  "eos_token_id": 1,
14
  "ep_size": 1,
 
31
  "num_hidden_layers": 61,
32
  "num_key_value_heads": 128,
33
  "num_nextn_predict_layers": 1,
 
34
  "q_lora_rank": 1536,
35
  "qk_nope_head_dim": 128,
36
  "qk_rope_head_dim": 64,
 
56
  "rope_theta": 10000,
57
  "routed_scaling_factor": 2.5,
58
  "scoring_func": "sigmoid",
 
59
  "tie_word_embeddings": false,
60
  "topk_group": 4,
61
  "topk_method": "noaux_tc",