Upload folder using huggingface_hub
Browse files- README.md +33 -0
- assets/tokenizer/merges.txt +0 -0
- assets/tokenizer/vocabulary.json +0 -0
- config.json +37 -0
- metadata.json +9 -0
- model.weights.json +0 -0
- model_00000.weights.h5 +3 -0
- model_00001.weights.h5 +3 -0
- model_00002.weights.h5 +3 -0
- model_00003.weights.h5 +3 -0
- model_00004.weights.h5 +3 -0
- model_00005.weights.h5 +3 -0
- model_00006.weights.h5 +3 -0
- model_00007.weights.h5 +3 -0
- model_00008.weights.h5 +3 -0
- model_00009.weights.h5 +3 -0
- model_00010.weights.h5 +3 -0
- model_00011.weights.h5 +3 -0
- model_00012.weights.h5 +3 -0
- model_00013.weights.h5 +3 -0
- model_00014.weights.h5 +3 -0
- model_00015.weights.h5 +3 -0
- model_00016.weights.h5 +3 -0
- model_00017.weights.h5 +3 -0
- model_00018.weights.h5 +3 -0
- model_00019.weights.h5 +3 -0
- model_00020.weights.h5 +3 -0
- model_00021.weights.h5 +3 -0
- model_00022.weights.h5 +3 -0
- model_00023.weights.h5 +3 -0
- model_00024.weights.h5 +3 -0
- model_00025.weights.h5 +3 -0
- model_00026.weights.h5 +3 -0
- model_00027.weights.h5 +3 -0
- model_00028.weights.h5 +3 -0
- model_00029.weights.h5 +3 -0
- model_00030.weights.h5 +3 -0
- model_00031.weights.h5 +3 -0
- preprocessor.json +69 -0
- task.json +114 -0
- tokenizer.json +48 -0
README.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: keras-hub
|
| 3 |
+
pipeline_tag: text-generation
|
| 4 |
+
---
|
| 5 |
+
This is a [`Qwen3Moe` model](https://keras.io/api/keras_hub/models/qwen3_moe) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
|
| 6 |
+
This model is related to a `CausalLM` task.
|
| 7 |
+
|
| 8 |
+
Model config:
|
| 9 |
+
* **name:** qwen3_moe_backbone
|
| 10 |
+
* **trainable:** True
|
| 11 |
+
* **dtype:** {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'bfloat16'}, 'registered_name': None}
|
| 12 |
+
* **vocabulary_size:** 151936
|
| 13 |
+
* **num_layers:** 94
|
| 14 |
+
* **num_query_heads:** 64
|
| 15 |
+
* **head_dim:** 128
|
| 16 |
+
* **hidden_dim:** 4096
|
| 17 |
+
* **intermediate_dim:** 12288
|
| 18 |
+
* **moe_intermediate_dim:** 1536
|
| 19 |
+
* **rope_max_wavelength:** 1000000.0
|
| 20 |
+
* **num_key_value_heads:** 4
|
| 21 |
+
* **rope_scaling_factor:** 1.0
|
| 22 |
+
* **layer_norm_epsilon:** 1e-06
|
| 23 |
+
* **dropout:** 0
|
| 24 |
+
* **tie_word_embeddings:** False
|
| 25 |
+
* **sliding_window_size:** None
|
| 26 |
+
* **num_experts:** 128
|
| 27 |
+
* **top_k:** 8
|
| 28 |
+
* **norm_top_k_prob:** True
|
| 29 |
+
* **decoder_sparse_step:** 1
|
| 30 |
+
* **mlp_only_layers:** []
|
| 31 |
+
* **router_aux_loss_coefficient:** 0.001
|
| 32 |
+
|
| 33 |
+
This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
|
assets/tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
assets/tokenizer/vocabulary.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_backbone",
|
| 3 |
+
"class_name": "Qwen3MoeBackbone",
|
| 4 |
+
"config": {
|
| 5 |
+
"name": "qwen3_moe_backbone",
|
| 6 |
+
"trainable": true,
|
| 7 |
+
"dtype": {
|
| 8 |
+
"module": "keras",
|
| 9 |
+
"class_name": "DTypePolicy",
|
| 10 |
+
"config": {
|
| 11 |
+
"name": "bfloat16"
|
| 12 |
+
},
|
| 13 |
+
"registered_name": null
|
| 14 |
+
},
|
| 15 |
+
"vocabulary_size": 151936,
|
| 16 |
+
"num_layers": 94,
|
| 17 |
+
"num_query_heads": 64,
|
| 18 |
+
"head_dim": 128,
|
| 19 |
+
"hidden_dim": 4096,
|
| 20 |
+
"intermediate_dim": 12288,
|
| 21 |
+
"moe_intermediate_dim": 1536,
|
| 22 |
+
"rope_max_wavelength": 1000000.0,
|
| 23 |
+
"num_key_value_heads": 4,
|
| 24 |
+
"rope_scaling_factor": 1.0,
|
| 25 |
+
"layer_norm_epsilon": 1e-06,
|
| 26 |
+
"dropout": 0,
|
| 27 |
+
"tie_word_embeddings": false,
|
| 28 |
+
"sliding_window_size": null,
|
| 29 |
+
"num_experts": 128,
|
| 30 |
+
"top_k": 8,
|
| 31 |
+
"norm_top_k_prob": true,
|
| 32 |
+
"decoder_sparse_step": 1,
|
| 33 |
+
"mlp_only_layers": [],
|
| 34 |
+
"router_aux_loss_coefficient": 0.001
|
| 35 |
+
},
|
| 36 |
+
"registered_name": "keras_hub>Qwen3MoeBackbone"
|
| 37 |
+
}
|
metadata.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"keras_version": "3.11.3",
|
| 3 |
+
"keras_hub_version": "0.23.0.dev0",
|
| 4 |
+
"parameter_count": 235093634560,
|
| 5 |
+
"date_saved": "2025-10-07@19:39:37",
|
| 6 |
+
"tasks": [
|
| 7 |
+
"CausalLM"
|
| 8 |
+
]
|
| 9 |
+
}
|
model.weights.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model_00000.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bdaeb3b608a5b6c341c3a3bd43d0c3c9d11e6dc6fbf69679b7957dc3907c03a
|
| 3 |
+
size 15661661840
|
model_00001.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4af259ee5a5e10864e6b8548725b6fd2772d94be971168e4677914da4017b43e
|
| 3 |
+
size 14926655440
|
model_00002.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2d2c2b9062cd9f88990d7ea723a91b85849f3a2efe484e2542138959e0ea3c0
|
| 3 |
+
size 14926655440
|
model_00003.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70a4721a6a9b36a4e4ece65009aaec72ba498cf97e7fcf43c9761eeab807e1f7
|
| 3 |
+
size 14926655440
|
model_00004.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91de0ac1095ad69de2e118c1864dbe207d3f09c6a8f7ab2366a3b50362c14464
|
| 3 |
+
size 14926655632
|
model_00005.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1493d873dcb343ab14f6d58c7d8c919f0b32c18bf6fae80243a2fe9161d72a57
|
| 3 |
+
size 14926655632
|
model_00006.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d02093cde47b388375f09fadcb2c8c718d4269b4c7eca2beddf80f0dd85adb9a
|
| 3 |
+
size 14926655632
|
model_00007.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3dafccaa6e49ca82de4c44f0bba468c6dfb6d1f2c79039924bac80fcbd4f0f1
|
| 3 |
+
size 14926655632
|
model_00008.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ce10e5219dfe8c291474add9a4bad2e75f3efb6e33054a94a2a7ab72dcda43c
|
| 3 |
+
size 14926655632
|
model_00009.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6482129e5764aec32234a327a794204ba13440912e22cffddb9e29710099da89
|
| 3 |
+
size 14926655632
|
model_00010.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5566033e9c92ef0834558d7c9d25344914729b507f538de5cd528ec6b149c18a
|
| 3 |
+
size 14926655632
|
model_00011.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eb4339025664291877a1c6eccad36dd940e6a5ac142b01b45f4e0dfc304950f
|
| 3 |
+
size 14926655632
|
model_00012.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:548ae0c5fbe1bbcaf543ee6fcbb854f88eb80bf801668af737d2f14903fabe40
|
| 3 |
+
size 14926655632
|
model_00013.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aad57dd7382d5683ccd6d12dac6257d9f7acfe07ab8ec3c4799d4b51c00ccf6d
|
| 3 |
+
size 14926655632
|
model_00014.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72459f56f8d37a32ce9321ed7157d9f74f0cf8541e085cbbe2a9539b755cefa6
|
| 3 |
+
size 14926655632
|
model_00015.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9569b8b5c76fc8c8cda211fdaa5cd72b6ce84d6f0931ff23896a58c3d5b9cf5e
|
| 3 |
+
size 14926655632
|
model_00016.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bac9b4fdae119a12bd3b930268c6a4d7c9d2bc79ecb54fc2d34a07ae8fa704d
|
| 3 |
+
size 14926655632
|
model_00017.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:397f9957d7031bad6a1483597e1d3121bfd12fdb5b5a9f652a8bd963c063b888
|
| 3 |
+
size 14926655632
|
model_00018.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20db29d8470f890cfb81e3442caa521e3a2789f99897b95a1224c30c67054358
|
| 3 |
+
size 14926655632
|
model_00019.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae1efc486bccc14be04d2faa7daf07693ae16e8138967607b80f26b881709305
|
| 3 |
+
size 14926655632
|
model_00020.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:368228482d93ab3a0fc170cf87e995e4881095dd3c3fb80155fa7e5678862121
|
| 3 |
+
size 14926655632
|
model_00021.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:702e84aa67bfa1c2fc67c23d3ac7b80e54b775d488c64771d1a5f290944fd9f7
|
| 3 |
+
size 14926655632
|
model_00022.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a59efa422de21640373547cedcdc47410bcd8cb73748f4bca44d25695502b87
|
| 3 |
+
size 14926655632
|
model_00023.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5cc784c9df80327ed6cf7f46a678f9e4d9ffb402b34c4f2c143d497bd93fa96
|
| 3 |
+
size 14926655632
|
model_00024.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76a00a77144006f6a2372a6024498969e21a4dbb2c36d8254b25b217970e9501
|
| 3 |
+
size 14926655632
|
model_00025.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d46920d80b31edcfdafe1f69ec22ebf9a03ca67f5d9514b29389a9e0d9e037b4
|
| 3 |
+
size 14926655632
|
model_00026.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5118c5c3765a6c90b6c5a1300935f66c8ff61186e7fe839e87118eb68481ec9d
|
| 3 |
+
size 14926655632
|
model_00027.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0f0aaa7b6294d899e41d08c0a1cae3f0e3bf651bf655b2431094a39db14b966
|
| 3 |
+
size 14926655632
|
model_00028.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de00734da638f303bbf73a99d5e279fe29ec98c7f10857b759436c4bd2102fa0
|
| 3 |
+
size 14926655632
|
model_00029.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5be5b36711efad4c5b25ffd8343a6baff6a5c64f95ecbe1a0daa2565dc6e5e2
|
| 3 |
+
size 14926655632
|
model_00030.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16a02528b26f9d2493eb04ffacfbeb7685ff5bd57af0e7ccc015642a1d242068
|
| 3 |
+
size 14926655632
|
model_00031.weights.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d68c96e63daf05fdaacbc707b6b3eee24d3e184f4802015a9a54a80f7dbfe940
|
| 3 |
+
size 6729886896
|
preprocessor.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor",
|
| 3 |
+
"class_name": "Qwen3MoeCausalLMPreprocessor",
|
| 4 |
+
"config": {
|
| 5 |
+
"name": "qwen3_moe_causal_lm_preprocessor_2",
|
| 6 |
+
"trainable": true,
|
| 7 |
+
"dtype": {
|
| 8 |
+
"module": "keras",
|
| 9 |
+
"class_name": "DTypePolicy",
|
| 10 |
+
"config": {
|
| 11 |
+
"name": "float32"
|
| 12 |
+
},
|
| 13 |
+
"registered_name": null
|
| 14 |
+
},
|
| 15 |
+
"tokenizer": {
|
| 16 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
|
| 17 |
+
"class_name": "Qwen3MoeTokenizer",
|
| 18 |
+
"config": {
|
| 19 |
+
"name": "qwen3_moe_tokenizer",
|
| 20 |
+
"trainable": true,
|
| 21 |
+
"dtype": {
|
| 22 |
+
"module": "keras",
|
| 23 |
+
"class_name": "DTypePolicy",
|
| 24 |
+
"config": {
|
| 25 |
+
"name": "int32"
|
| 26 |
+
},
|
| 27 |
+
"registered_name": null
|
| 28 |
+
},
|
| 29 |
+
"config_file": "tokenizer.json",
|
| 30 |
+
"sequence_length": null,
|
| 31 |
+
"add_prefix_space": false,
|
| 32 |
+
"unsplittable_tokens": [
|
| 33 |
+
"</tool_call>",
|
| 34 |
+
"<|endoftext|>",
|
| 35 |
+
"<|video_pad|>",
|
| 36 |
+
"<think>",
|
| 37 |
+
"<|im_end|>",
|
| 38 |
+
"</think>",
|
| 39 |
+
"<|file_sep|>",
|
| 40 |
+
"<tool_call>",
|
| 41 |
+
"<|object_ref_end|>",
|
| 42 |
+
"<|fim_suffix|>",
|
| 43 |
+
"<|quad_start|>",
|
| 44 |
+
"<|image_pad|>",
|
| 45 |
+
"<|object_ref_start|>",
|
| 46 |
+
"<|im_start|>",
|
| 47 |
+
"<|box_start|>",
|
| 48 |
+
"<|fim_middle|>",
|
| 49 |
+
"<|vision_end|>",
|
| 50 |
+
"<|fim_pad|>",
|
| 51 |
+
"<|box_end|>",
|
| 52 |
+
"</tool_response>",
|
| 53 |
+
"<|quad_end|>",
|
| 54 |
+
"<|vision_pad|>",
|
| 55 |
+
"<|fim_prefix|>",
|
| 56 |
+
"<|vision_start|>",
|
| 57 |
+
"<tool_response>",
|
| 58 |
+
"<|repo_name|>"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
"registered_name": "keras_hub>Qwen3MoeTokenizer"
|
| 62 |
+
},
|
| 63 |
+
"config_file": "preprocessor.json",
|
| 64 |
+
"sequence_length": 1024,
|
| 65 |
+
"add_start_token": true,
|
| 66 |
+
"add_end_token": true
|
| 67 |
+
},
|
| 68 |
+
"registered_name": "keras_hub>Qwen3MoeCausalLMPreprocessor"
|
| 69 |
+
}
|
task.json
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm",
|
| 3 |
+
"class_name": "Qwen3MoeCausalLM",
|
| 4 |
+
"config": {
|
| 5 |
+
"backbone": {
|
| 6 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_backbone",
|
| 7 |
+
"class_name": "Qwen3MoeBackbone",
|
| 8 |
+
"config": {
|
| 9 |
+
"name": "qwen3_moe_backbone",
|
| 10 |
+
"trainable": true,
|
| 11 |
+
"dtype": {
|
| 12 |
+
"module": "keras",
|
| 13 |
+
"class_name": "DTypePolicy",
|
| 14 |
+
"config": {
|
| 15 |
+
"name": "bfloat16"
|
| 16 |
+
},
|
| 17 |
+
"registered_name": null
|
| 18 |
+
},
|
| 19 |
+
"vocabulary_size": 151936,
|
| 20 |
+
"num_layers": 94,
|
| 21 |
+
"num_query_heads": 64,
|
| 22 |
+
"head_dim": 128,
|
| 23 |
+
"hidden_dim": 4096,
|
| 24 |
+
"intermediate_dim": 12288,
|
| 25 |
+
"moe_intermediate_dim": 1536,
|
| 26 |
+
"rope_max_wavelength": 1000000.0,
|
| 27 |
+
"num_key_value_heads": 4,
|
| 28 |
+
"rope_scaling_factor": 1.0,
|
| 29 |
+
"layer_norm_epsilon": 1e-06,
|
| 30 |
+
"dropout": 0,
|
| 31 |
+
"tie_word_embeddings": false,
|
| 32 |
+
"sliding_window_size": null,
|
| 33 |
+
"num_experts": 128,
|
| 34 |
+
"top_k": 8,
|
| 35 |
+
"norm_top_k_prob": true,
|
| 36 |
+
"decoder_sparse_step": 1,
|
| 37 |
+
"mlp_only_layers": [],
|
| 38 |
+
"router_aux_loss_coefficient": 0.001
|
| 39 |
+
},
|
| 40 |
+
"registered_name": "keras_hub>Qwen3MoeBackbone"
|
| 41 |
+
},
|
| 42 |
+
"preprocessor": {
|
| 43 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor",
|
| 44 |
+
"class_name": "Qwen3MoeCausalLMPreprocessor",
|
| 45 |
+
"config": {
|
| 46 |
+
"name": "qwen3_moe_causal_lm_preprocessor_2",
|
| 47 |
+
"trainable": true,
|
| 48 |
+
"dtype": {
|
| 49 |
+
"module": "keras",
|
| 50 |
+
"class_name": "DTypePolicy",
|
| 51 |
+
"config": {
|
| 52 |
+
"name": "float32"
|
| 53 |
+
},
|
| 54 |
+
"registered_name": null
|
| 55 |
+
},
|
| 56 |
+
"tokenizer": {
|
| 57 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
|
| 58 |
+
"class_name": "Qwen3MoeTokenizer",
|
| 59 |
+
"config": {
|
| 60 |
+
"name": "qwen3_moe_tokenizer",
|
| 61 |
+
"trainable": true,
|
| 62 |
+
"dtype": {
|
| 63 |
+
"module": "keras",
|
| 64 |
+
"class_name": "DTypePolicy",
|
| 65 |
+
"config": {
|
| 66 |
+
"name": "int32"
|
| 67 |
+
},
|
| 68 |
+
"registered_name": null
|
| 69 |
+
},
|
| 70 |
+
"config_file": "tokenizer.json",
|
| 71 |
+
"sequence_length": null,
|
| 72 |
+
"add_prefix_space": false,
|
| 73 |
+
"unsplittable_tokens": [
|
| 74 |
+
"</tool_call>",
|
| 75 |
+
"<|endoftext|>",
|
| 76 |
+
"<|video_pad|>",
|
| 77 |
+
"<think>",
|
| 78 |
+
"<|im_end|>",
|
| 79 |
+
"</think>",
|
| 80 |
+
"<|file_sep|>",
|
| 81 |
+
"<tool_call>",
|
| 82 |
+
"<|object_ref_end|>",
|
| 83 |
+
"<|fim_suffix|>",
|
| 84 |
+
"<|quad_start|>",
|
| 85 |
+
"<|image_pad|>",
|
| 86 |
+
"<|object_ref_start|>",
|
| 87 |
+
"<|im_start|>",
|
| 88 |
+
"<|box_start|>",
|
| 89 |
+
"<|fim_middle|>",
|
| 90 |
+
"<|vision_end|>",
|
| 91 |
+
"<|fim_pad|>",
|
| 92 |
+
"<|box_end|>",
|
| 93 |
+
"</tool_response>",
|
| 94 |
+
"<|quad_end|>",
|
| 95 |
+
"<|vision_pad|>",
|
| 96 |
+
"<|fim_prefix|>",
|
| 97 |
+
"<|vision_start|>",
|
| 98 |
+
"<tool_response>",
|
| 99 |
+
"<|repo_name|>"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
"registered_name": "keras_hub>Qwen3MoeTokenizer"
|
| 103 |
+
},
|
| 104 |
+
"config_file": "preprocessor.json",
|
| 105 |
+
"sequence_length": 1024,
|
| 106 |
+
"add_start_token": true,
|
| 107 |
+
"add_end_token": true
|
| 108 |
+
},
|
| 109 |
+
"registered_name": "keras_hub>Qwen3MoeCausalLMPreprocessor"
|
| 110 |
+
},
|
| 111 |
+
"name": "qwen3_moe_causal_lm"
|
| 112 |
+
},
|
| 113 |
+
"registered_name": "keras_hub>Qwen3MoeCausalLM"
|
| 114 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"module": "keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer",
|
| 3 |
+
"class_name": "Qwen3MoeTokenizer",
|
| 4 |
+
"config": {
|
| 5 |
+
"name": "qwen3_moe_tokenizer",
|
| 6 |
+
"trainable": true,
|
| 7 |
+
"dtype": {
|
| 8 |
+
"module": "keras",
|
| 9 |
+
"class_name": "DTypePolicy",
|
| 10 |
+
"config": {
|
| 11 |
+
"name": "int32"
|
| 12 |
+
},
|
| 13 |
+
"registered_name": null
|
| 14 |
+
},
|
| 15 |
+
"config_file": "tokenizer.json",
|
| 16 |
+
"sequence_length": null,
|
| 17 |
+
"add_prefix_space": false,
|
| 18 |
+
"unsplittable_tokens": [
|
| 19 |
+
"</tool_call>",
|
| 20 |
+
"<|endoftext|>",
|
| 21 |
+
"<|video_pad|>",
|
| 22 |
+
"<think>",
|
| 23 |
+
"<|im_end|>",
|
| 24 |
+
"</think>",
|
| 25 |
+
"<|file_sep|>",
|
| 26 |
+
"<tool_call>",
|
| 27 |
+
"<|object_ref_end|>",
|
| 28 |
+
"<|fim_suffix|>",
|
| 29 |
+
"<|quad_start|>",
|
| 30 |
+
"<|image_pad|>",
|
| 31 |
+
"<|object_ref_start|>",
|
| 32 |
+
"<|im_start|>",
|
| 33 |
+
"<|box_start|>",
|
| 34 |
+
"<|fim_middle|>",
|
| 35 |
+
"<|vision_end|>",
|
| 36 |
+
"<|fim_pad|>",
|
| 37 |
+
"<|box_end|>",
|
| 38 |
+
"</tool_response>",
|
| 39 |
+
"<|quad_end|>",
|
| 40 |
+
"<|vision_pad|>",
|
| 41 |
+
"<|fim_prefix|>",
|
| 42 |
+
"<|vision_start|>",
|
| 43 |
+
"<tool_response>",
|
| 44 |
+
"<|repo_name|>"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"registered_name": "keras_hub>Qwen3MoeTokenizer"
|
| 48 |
+
}
|