fxmeng commited on
Commit
8c38f72
·
verified ·
1 Parent(s): 6fc2211

Upload configuration_llamamla.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_llamamla.py +25 -0
configuration_llamamla.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.llama.configuration_llama import LlamaConfig
2
+
3
+ class LlamaMLAConfig(LlamaConfig):
4
+ model_type = "deepseek_v3"
5
+
6
+ def __init__(
7
+ self,
8
+ *args,
9
+ kv_lora_rank=512,
10
+ q_lora_rank=None,
11
+ qk_rope_head_dim=64,
12
+ qk_nope_head_dim=128,
13
+ v_head_dim=128,
14
+ qk_latent_layernorm=True,
15
+ **kwargs
16
+ ):
17
+ super().__init__(*args, **kwargs)
18
+
19
+ self.kv_lora_rank = kv_lora_rank
20
+ self.q_lora_rank = q_lora_rank
21
+ self.qk_rope_head_dim = qk_rope_head_dim
22
+ self.qk_nope_head_dim = qk_nope_head_dim
23
+ self.qk_head_dim = qk_rope_head_dim + qk_nope_head_dim
24
+ self.v_head_dim = v_head_dim
25
+ self.qk_latent_layernorm = qk_latent_layernorm