jackJessada commited on
Commit
222225b
·
verified ·
1 Parent(s): 42c8b86

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,25 +1,33 @@
1
  {
2
  "architectures": [
3
- "Qwen3ForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
 
8
  "eos_token_id": 151643,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 12288,
14
  "max_position_embeddings": 32768,
15
- "max_window_layers": 36,
16
- "model_type": "qwen3",
 
 
 
17
  "num_attention_heads": 32,
18
- "num_hidden_layers": 36,
19
- "num_key_value_heads": 8,
 
 
 
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
22
- "rope_theta": 1000000,
 
23
  "sliding_window": null,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
 
1
  {
2
  "architectures": [
3
+ "Qwen3MoeForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "decoder_sparse_step": 1,
9
  "eos_token_id": 151643,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
  "max_position_embeddings": 32768,
16
+ "max_window_layers": 48,
17
+ "mlp_only_layers": [],
18
+ "model_type": "qwen3_moe",
19
+ "moe_intermediate_size": 768,
20
+ "norm_topk_prob": true,
21
  "num_attention_heads": 32,
22
+ "num_experts": 128,
23
+ "num_experts_per_tok": 8,
24
+ "num_hidden_layers": 48,
25
+ "num_key_value_heads": 4,
26
+ "output_router_logits": false,
27
  "rms_norm_eps": 1e-06,
28
  "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "router_aux_loss_coef": 0.001,
31
  "sliding_window": null,
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
model-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4161384146bc3e62cb0f1efaae7841838324a3e8b0739b9655499c92db557228
3
+ size 4997184968
model-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5298cdf3cd6d0a00b103df9f39f643b17c7047c13c0decde26e4945d044ed403
3
+ size 4997741608
model-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:481e61bc5023ccd5e6c38eba603365294522aab7bf5f61cebf2227b2669ec1e9
3
+ size 4997742208
model-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64cc9f6654435f2ed1f5356b1120dd0a0b106718b95b7ae1febef85471a7ec61
3
+ size 4997743184
model-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c81e15adef813ea437d3c40642833fec0ff4c7537c2bf91e6efe8fdea01a6afa
3
+ size 4997743184
model-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38afa87191977540673b2be403088c6972bb0fe4e88f7eec220940002897a7fa
3
+ size 4997743184
model-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdd2c3549237e02098fc5704afa64e839b0a708820e4955142462c4e7ef4487
3
+ size 4997743184
model-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18870ecf16e36bd91e301efa95a10baabe7e9f4f58a5d7fe07817bc8a57cac0b
3
+ size 4997743184
model-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589b2ba8a81eb98de7eb5740e6543f293bed0c29fe9acc9724f8d24bc4f2a066
3
+ size 4997743184
model-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec9ce76579d5a94aba31b46b5556ff12bda3b122324911a0f0fbc970a1abb655
3
+ size 4997743184
model-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02e6bb22cb188a9c3d0ac2dcd913592bc4eb74cc9bdb482ebc45cf7fe950a1e
3
+ size 4997743184
model-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda89bb8181d57315dc14564e8b8d7a74eb0b67bc87514af21e63e8ead185a06
3
+ size 4997743184
model-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b1436d23345116a1ba334610af17eb8f5ef01ad7ab8491f4fba07e9a6bdf13
3
+ size 1094220288
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff