mohamedahraf273 commited on
Commit
c10a920
·
verified ·
1 Parent(s): 6f1aa1e

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +13 -14
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
  4. tokenizer.json +2 -2
config.json CHANGED
@@ -9,30 +9,30 @@
9
  "eos_token_id": 2,
10
  "ep_size": 1,
11
  "first_k_dense_replace": 1,
12
- "head_dim": 16,
13
  "hidden_act": "silu",
14
- "hidden_size": 256,
15
  "initializer_range": 0.006,
16
- "intermediate_size": 512,
17
- "kv_lora_rank": 64,
18
  "max_position_embeddings": 262144,
19
  "model_type": "deepseek_v3",
20
- "moe_intermediate_size": 256,
21
  "moe_layer_freq": 1,
22
  "n_group": 1,
23
  "n_routed_experts": 4,
24
  "n_shared_experts": 1,
25
  "norm_topk_prob": true,
26
- "num_attention_heads": 4,
27
  "num_experts_per_tok": 2,
28
  "num_hidden_layers": 2,
29
- "num_key_value_heads": 4,
30
  "num_nextn_predict_layers": 1,
31
  "pretraining_tp": 1,
32
  "q_lora_rank": null,
33
- "qk_head_dim": 48,
34
- "qk_nope_head_dim": 32,
35
- "qk_rope_head_dim": 16,
36
  "rms_norm_eps": 1e-06,
37
  "rope_interleave": true,
38
  "rope_scaling": {
@@ -51,9 +51,8 @@
51
  "tie_word_embeddings": false,
52
  "topk_group": 1,
53
  "topk_method": "noaux_tc",
54
- "torch_dtype": "float32",
55
- "transformers_version": "4.53.3",
56
  "use_cache": true,
57
- "v_head_dim": 32,
58
- "vocab_size": 128256
59
  }
 
9
  "eos_token_id": 2,
10
  "ep_size": 1,
11
  "first_k_dense_replace": 1,
12
+ "head_dim": 2,
13
  "hidden_act": "silu",
14
+ "hidden_size": 32,
15
  "initializer_range": 0.006,
16
+ "intermediate_size": 64,
17
+ "kv_lora_rank": 8,
18
  "max_position_embeddings": 262144,
19
  "model_type": "deepseek_v3",
20
+ "moe_intermediate_size": 32,
21
  "moe_layer_freq": 1,
22
  "n_group": 1,
23
  "n_routed_experts": 4,
24
  "n_shared_experts": 1,
25
  "norm_topk_prob": true,
26
+ "num_attention_heads": 2,
27
  "num_experts_per_tok": 2,
28
  "num_hidden_layers": 2,
29
+ "num_key_value_heads": 2,
30
  "num_nextn_predict_layers": 1,
31
  "pretraining_tp": 1,
32
  "q_lora_rank": null,
33
+ "qk_head_dim": 6,
34
+ "qk_nope_head_dim": 4,
35
+ "qk_rope_head_dim": 2,
36
  "rms_norm_eps": 1e-06,
37
  "rope_interleave": true,
38
  "rope_scaling": {
 
51
  "tie_word_embeddings": false,
52
  "topk_group": 1,
53
  "topk_method": "noaux_tc",
54
+ "transformers_version": "4.57.6",
 
55
  "use_cache": true,
56
+ "v_head_dim": 4,
57
+ "vocab_size": 32000
58
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.53.3"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.57.6"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632aadf26d1207997826637896a4d61bfb3c68a1658d51ba9bc59927b2abaf9e
3
- size 269137576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4849d415de47276e86a8a8e094fd22d511a27fe17a4d0f927620df3629fbf63d
3
+ size 4148048
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fdb3c045850a565109244ef3b89475cc2582769417647a41c46da14a3782aa5
3
- size 10680605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c28a86aef25706f0664c264e436b8ff99d29187ea70ebb291097307400c3cb5
3
+ size 673497