prasadsachin commited on
Commit
84ce90f
·
verified ·
1 Parent(s): da2fb55

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-hub
3
+ pipeline_tag: text-generation
4
+ ---
5
+ This is a [`Qwen3Moe` model](https://keras.io/api/keras_hub/models/qwen3_moe) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
6
+ This model is related to a `CausalLM` task.
7
+
8
+ Model config:
9
+ * **name:** qwen3_moe_backbone
10
+ * **trainable:** True
11
+ * **dtype:** {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'bfloat16'}, 'registered_name': None}
12
+ * **vocabulary_size:** 151936
13
+ * **num_layers:** 94
14
+ * **num_query_heads:** 64
15
+ * **head_dim:** 128
16
+ * **hidden_dim:** 4096
17
+ * **intermediate_dim:** 12288
18
+ * **moe_intermediate_dim:** 1536
19
+ * **rope_max_wavelength:** 1000000.0
20
+ * **num_key_value_heads:** 4
21
+ * **rope_scaling_factor:** 1.0
22
+ * **layer_norm_epsilon:** 1e-06
23
+ * **dropout:** 0
24
+ * **tie_word_embeddings:** False
25
+ * **sliding_window_size:** None
26
+ * **num_experts:** 128
27
+ * **top_k:** 8
28
+ * **norm_top_k_prob:** True
29
+ * **decoder_sparse_step:** 1
30
+ * **mlp_only_layers:** []
31
+ * **router_aux_loss_coefficient:** 0.001
32
+
33
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_backbone",
3
+ "class_name": "Qwen3MoeBackbone",
4
+ "config": {
5
+ "name": "qwen3_moe_backbone",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "vocabulary_size": 151936,
16
+ "num_layers": 94,
17
+ "num_query_heads": 64,
18
+ "head_dim": 128,
19
+ "hidden_dim": 4096,
20
+ "intermediate_dim": 12288,
21
+ "moe_intermediate_dim": 1536,
22
+ "rope_max_wavelength": 1000000.0,
23
+ "num_key_value_heads": 4,
24
+ "rope_scaling_factor": 1.0,
25
+ "layer_norm_epsilon": 1e-06,
26
+ "dropout": 0,
27
+ "tie_word_embeddings": false,
28
+ "sliding_window_size": null,
29
+ "num_experts": 128,
30
+ "top_k": 8,
31
+ "norm_top_k_prob": true,
32
+ "decoder_sparse_step": 1,
33
+ "mlp_only_layers": [],
34
+ "router_aux_loss_coefficient": 0.001
35
+ },
36
+ "registered_name": "keras_hub>Qwen3MoeBackbone"
37
+ }
metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.11.3",
3
+ "keras_hub_version": "0.23.0.dev0",
4
+ "parameter_count": 235093634560,
5
+ "date_saved": "2025-10-07@19:39:37",
6
+ "tasks": [
7
+ "CausalLM"
8
+ ]
9
+ }
model.weights.json ADDED
The diff for this file is too large to render. See raw diff
 
model_00000.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bdaeb3b608a5b6c341c3a3bd43d0c3c9d11e6dc6fbf69679b7957dc3907c03a
3
+ size 15661661840
model_00001.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af259ee5a5e10864e6b8548725b6fd2772d94be971168e4677914da4017b43e
3
+ size 14926655440
model_00002.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d2c2b9062cd9f88990d7ea723a91b85849f3a2efe484e2542138959e0ea3c0
3
+ size 14926655440
model_00003.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a4721a6a9b36a4e4ece65009aaec72ba498cf97e7fcf43c9761eeab807e1f7
3
+ size 14926655440
model_00004.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91de0ac1095ad69de2e118c1864dbe207d3f09c6a8f7ab2366a3b50362c14464
3
+ size 14926655632
model_00005.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1493d873dcb343ab14f6d58c7d8c919f0b32c18bf6fae80243a2fe9161d72a57
3
+ size 14926655632
model_00006.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02093cde47b388375f09fadcb2c8c718d4269b4c7eca2beddf80f0dd85adb9a
3
+ size 14926655632
model_00007.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3dafccaa6e49ca82de4c44f0bba468c6dfb6d1f2c79039924bac80fcbd4f0f1
3
+ size 14926655632
model_00008.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce10e5219dfe8c291474add9a4bad2e75f3efb6e33054a94a2a7ab72dcda43c
3
+ size 14926655632
model_00009.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6482129e5764aec32234a327a794204ba13440912e22cffddb9e29710099da89
3
+ size 14926655632
model_00010.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5566033e9c92ef0834558d7c9d25344914729b507f538de5cd528ec6b149c18a
3
+ size 14926655632
model_00011.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb4339025664291877a1c6eccad36dd940e6a5ac142b01b45f4e0dfc304950f
3
+ size 14926655632
model_00012.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548ae0c5fbe1bbcaf543ee6fcbb854f88eb80bf801668af737d2f14903fabe40
3
+ size 14926655632
model_00013.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad57dd7382d5683ccd6d12dac6257d9f7acfe07ab8ec3c4799d4b51c00ccf6d
3
+ size 14926655632
model_00014.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72459f56f8d37a32ce9321ed7157d9f74f0cf8541e085cbbe2a9539b755cefa6
3
+ size 14926655632
model_00015.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9569b8b5c76fc8c8cda211fdaa5cd72b6ce84d6f0931ff23896a58c3d5b9cf5e
3
+ size 14926655632
model_00016.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bac9b4fdae119a12bd3b930268c6a4d7c9d2bc79ecb54fc2d34a07ae8fa704d
3
+ size 14926655632
model_00017.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397f9957d7031bad6a1483597e1d3121bfd12fdb5b5a9f652a8bd963c063b888
3
+ size 14926655632
model_00018.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20db29d8470f890cfb81e3442caa521e3a2789f99897b95a1224c30c67054358
3
+ size 14926655632
model_00019.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1efc486bccc14be04d2faa7daf07693ae16e8138967607b80f26b881709305
3
+ size 14926655632
model_00020.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368228482d93ab3a0fc170cf87e995e4881095dd3c3fb80155fa7e5678862121
3
+ size 14926655632
model_00021.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702e84aa67bfa1c2fc67c23d3ac7b80e54b775d488c64771d1a5f290944fd9f7
3
+ size 14926655632
model_00022.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a59efa422de21640373547cedcdc47410bcd8cb73748f4bca44d25695502b87
3
+ size 14926655632
model_00023.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cc784c9df80327ed6cf7f46a678f9e4d9ffb402b34c4f2c143d497bd93fa96
3
+ size 14926655632
model_00024.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a00a77144006f6a2372a6024498969e21a4dbb2c36d8254b25b217970e9501
3
+ size 14926655632
model_00025.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46920d80b31edcfdafe1f69ec22ebf9a03ca67f5d9514b29389a9e0d9e037b4
3
+ size 14926655632
model_00026.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5118c5c3765a6c90b6c5a1300935f66c8ff61186e7fe839e87118eb68481ec9d
3
+ size 14926655632
model_00027.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f0aaa7b6294d899e41d08c0a1cae3f0e3bf651bf655b2431094a39db14b966
3
+ size 14926655632
model_00028.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de00734da638f303bbf73a99d5e279fe29ec98c7f10857b759436c4bd2102fa0
3
+ size 14926655632
model_00029.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5be5b36711efad4c5b25ffd8343a6baff6a5c64f95ecbe1a0daa2565dc6e5e2
3
+ size 14926655632
model_00030.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a02528b26f9d2493eb04ffacfbeb7685ff5bd57af0e7ccc015642a1d242068
3
+ size 14926655632
model_00031.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68c96e63daf05fdaacbc707b6b3eee24d3e184f4802015a9a54a80f7dbfe940
3
+ size 6729886896
preprocessor.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor",
3
+ "class_name": "Qwen3MoeCausalLMPreprocessor",
4
+ "config": {
5
+ "name": "qwen3_moe_causal_lm_preprocessor_2",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "float32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
17
+ "class_name": "Qwen3MoeTokenizer",
18
+ "config": {
19
+ "name": "qwen3_moe_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false,
32
+ "unsplittable_tokens": [
33
+ "</tool_call>",
34
+ "<|endoftext|>",
35
+ "<|video_pad|>",
36
+ "<think>",
37
+ "<|im_end|>",
38
+ "</think>",
39
+ "<|file_sep|>",
40
+ "<tool_call>",
41
+ "<|object_ref_end|>",
42
+ "<|fim_suffix|>",
43
+ "<|quad_start|>",
44
+ "<|image_pad|>",
45
+ "<|object_ref_start|>",
46
+ "<|im_start|>",
47
+ "<|box_start|>",
48
+ "<|fim_middle|>",
49
+ "<|vision_end|>",
50
+ "<|fim_pad|>",
51
+ "<|box_end|>",
52
+ "</tool_response>",
53
+ "<|quad_end|>",
54
+ "<|vision_pad|>",
55
+ "<|fim_prefix|>",
56
+ "<|vision_start|>",
57
+ "<tool_response>",
58
+ "<|repo_name|>"
59
+ ]
60
+ },
61
+ "registered_name": "keras_hub>Qwen3MoeTokenizer"
62
+ },
63
+ "config_file": "preprocessor.json",
64
+ "sequence_length": 1024,
65
+ "add_start_token": true,
66
+ "add_end_token": true
67
+ },
68
+ "registered_name": "keras_hub>Qwen3MoeCausalLMPreprocessor"
69
+ }
task.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm",
3
+ "class_name": "Qwen3MoeCausalLM",
4
+ "config": {
5
+ "backbone": {
6
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_backbone",
7
+ "class_name": "Qwen3MoeBackbone",
8
+ "config": {
9
+ "name": "qwen3_moe_backbone",
10
+ "trainable": true,
11
+ "dtype": {
12
+ "module": "keras",
13
+ "class_name": "DTypePolicy",
14
+ "config": {
15
+ "name": "bfloat16"
16
+ },
17
+ "registered_name": null
18
+ },
19
+ "vocabulary_size": 151936,
20
+ "num_layers": 94,
21
+ "num_query_heads": 64,
22
+ "head_dim": 128,
23
+ "hidden_dim": 4096,
24
+ "intermediate_dim": 12288,
25
+ "moe_intermediate_dim": 1536,
26
+ "rope_max_wavelength": 1000000.0,
27
+ "num_key_value_heads": 4,
28
+ "rope_scaling_factor": 1.0,
29
+ "layer_norm_epsilon": 1e-06,
30
+ "dropout": 0,
31
+ "tie_word_embeddings": false,
32
+ "sliding_window_size": null,
33
+ "num_experts": 128,
34
+ "top_k": 8,
35
+ "norm_top_k_prob": true,
36
+ "decoder_sparse_step": 1,
37
+ "mlp_only_layers": [],
38
+ "router_aux_loss_coefficient": 0.001
39
+ },
40
+ "registered_name": "keras_hub>Qwen3MoeBackbone"
41
+ },
42
+ "preprocessor": {
43
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor",
44
+ "class_name": "Qwen3MoeCausalLMPreprocessor",
45
+ "config": {
46
+ "name": "qwen3_moe_causal_lm_preprocessor_2",
47
+ "trainable": true,
48
+ "dtype": {
49
+ "module": "keras",
50
+ "class_name": "DTypePolicy",
51
+ "config": {
52
+ "name": "float32"
53
+ },
54
+ "registered_name": null
55
+ },
56
+ "tokenizer": {
57
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
58
+ "class_name": "Qwen3MoeTokenizer",
59
+ "config": {
60
+ "name": "qwen3_moe_tokenizer",
61
+ "trainable": true,
62
+ "dtype": {
63
+ "module": "keras",
64
+ "class_name": "DTypePolicy",
65
+ "config": {
66
+ "name": "int32"
67
+ },
68
+ "registered_name": null
69
+ },
70
+ "config_file": "tokenizer.json",
71
+ "sequence_length": null,
72
+ "add_prefix_space": false,
73
+ "unsplittable_tokens": [
74
+ "</tool_call>",
75
+ "<|endoftext|>",
76
+ "<|video_pad|>",
77
+ "<think>",
78
+ "<|im_end|>",
79
+ "</think>",
80
+ "<|file_sep|>",
81
+ "<tool_call>",
82
+ "<|object_ref_end|>",
83
+ "<|fim_suffix|>",
84
+ "<|quad_start|>",
85
+ "<|image_pad|>",
86
+ "<|object_ref_start|>",
87
+ "<|im_start|>",
88
+ "<|box_start|>",
89
+ "<|fim_middle|>",
90
+ "<|vision_end|>",
91
+ "<|fim_pad|>",
92
+ "<|box_end|>",
93
+ "</tool_response>",
94
+ "<|quad_end|>",
95
+ "<|vision_pad|>",
96
+ "<|fim_prefix|>",
97
+ "<|vision_start|>",
98
+ "<tool_response>",
99
+ "<|repo_name|>"
100
+ ]
101
+ },
102
+ "registered_name": "keras_hub>Qwen3MoeTokenizer"
103
+ },
104
+ "config_file": "preprocessor.json",
105
+ "sequence_length": 1024,
106
+ "add_start_token": true,
107
+ "add_end_token": true
108
+ },
109
+ "registered_name": "keras_hub>Qwen3MoeCausalLMPreprocessor"
110
+ },
111
+ "name": "qwen3_moe_causal_lm"
112
+ },
113
+ "registered_name": "keras_hub>Qwen3MoeCausalLM"
114
+ }
tokenizer.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
3
+ "class_name": "Qwen3MoeTokenizer",
4
+ "config": {
5
+ "name": "qwen3_moe_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false,
18
+ "unsplittable_tokens": [
19
+ "</tool_call>",
20
+ "<|endoftext|>",
21
+ "<|video_pad|>",
22
+ "<think>",
23
+ "<|im_end|>",
24
+ "</think>",
25
+ "<|file_sep|>",
26
+ "<tool_call>",
27
+ "<|object_ref_end|>",
28
+ "<|fim_suffix|>",
29
+ "<|quad_start|>",
30
+ "<|image_pad|>",
31
+ "<|object_ref_start|>",
32
+ "<|im_start|>",
33
+ "<|box_start|>",
34
+ "<|fim_middle|>",
35
+ "<|vision_end|>",
36
+ "<|fim_pad|>",
37
+ "<|box_end|>",
38
+ "</tool_response>",
39
+ "<|quad_end|>",
40
+ "<|vision_pad|>",
41
+ "<|fim_prefix|>",
42
+ "<|vision_start|>",
43
+ "<tool_response>",
44
+ "<|repo_name|>"
45
+ ]
46
+ },
47
+ "registered_name": "keras_hub>Qwen3MoeTokenizer"
48
+ }