utdawn commited on
Commit
d439464
·
verified ·
1 Parent(s): e763a7e

Update configuration_llada2_moe.py

Browse files
Files changed (1) hide show
  1. configuration_llada2_moe.py +3 -2
configuration_llada2_moe.py CHANGED
@@ -16,7 +16,7 @@ class LLaDA2MoeConfig(PretrainedConfig):
16
  num_key_value_heads=0,
17
  hidden_act="silu",
18
  use_qkv_bias=False, # llada2 only
19
- use_qk_norm=False,
20
  use_bias=True, # llada2 only
21
  rms_norm_eps=1e-05,
22
  norm_head=False, # llada2 only
@@ -54,6 +54,7 @@ class LLaDA2MoeConfig(PretrainedConfig):
54
  self.num_key_value_heads = num_key_value_heads
55
  self.hidden_act = hidden_act
56
  self.use_qkv_bias = use_qkv_bias
 
57
  self.use_bias = use_bias
58
  self.norm_head = norm_head
59
  self.rms_norm_eps = rms_norm_eps
@@ -84,4 +85,4 @@ class LLaDA2MoeConfig(PretrainedConfig):
84
 
85
  super().__init__(
86
  pad_token_id=pad_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
87
- )
 
16
  num_key_value_heads=0,
17
  hidden_act="silu",
18
  use_qkv_bias=False, # llada2 only
19
+ use_qk_norm=True,
20
  use_bias=True, # llada2 only
21
  rms_norm_eps=1e-05,
22
  norm_head=False, # llada2 only
 
54
  self.num_key_value_heads = num_key_value_heads
55
  self.hidden_act = hidden_act
56
  self.use_qkv_bias = use_qkv_bias
57
+ self.use_qk_norm = use_qk_norm
58
  self.use_bias = use_bias
59
  self.norm_head = norm_head
60
  self.rms_norm_eps = rms_norm_eps
 
85
 
86
  super().__init__(
87
  pad_token_id=pad_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
88
+ )