qwen3_coder_instruct_30b_a3b_en / model.weights.json
prasadsachin's picture
Upload folder using huggingface_hub
2f57005 verified
{
"metadata": {
"total_size": 122128490496.0
},
"weight_map": {
"/layers/reversible_embedding/vars": [
"model_00000.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder/mlp/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder/mlp/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_key_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_key_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_output_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_query_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_query_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layer/_value_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/mlp/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_1/mlp/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_key_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_key_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_output_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_query_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_query_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layer/_value_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_1/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/mlp/expert_bank/vars": [
"model_00000.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_2/mlp/_sparse_feedforward_gate_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_feedforward_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_key_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_key_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_output_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_query_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_query_dense_layer_norm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layer/_value_dense/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_2/_self_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/mlp/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_3/mlp/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_key_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_key_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_output_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_query_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_query_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layer/_value_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_3/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/mlp/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_4/mlp/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_key_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_key_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_output_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_query_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_query_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layer/_value_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_4/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/mlp/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_5/mlp/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_key_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_key_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_output_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_query_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_query_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layer/_value_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_5/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/mlp/expert_bank/vars": [
"model_00001.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_6/mlp/_sparse_feedforward_gate_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_feedforward_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_key_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_key_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_output_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_query_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_query_dense_layer_norm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layer/_value_dense/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_6/_self_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/mlp/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_7/mlp/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_key_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_key_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_output_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_query_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_query_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layer/_value_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_7/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/mlp/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_8/mlp/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_key_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_key_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_output_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_query_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_query_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layer/_value_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_8/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/mlp/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_9/mlp/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_key_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_key_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_output_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_query_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_query_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layer/_value_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_9/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/mlp/expert_bank/vars": [
"model_00002.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_10/mlp/_sparse_feedforward_gate_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_feedforward_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_key_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_key_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_output_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_query_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_query_dense_layer_norm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layer/_value_dense/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_10/_self_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/mlp/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_11/mlp/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_key_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_key_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_output_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_query_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_query_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layer/_value_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_11/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/mlp/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_12/mlp/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_key_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_key_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_output_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_query_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_query_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layer/_value_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_12/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/mlp/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_13/mlp/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_key_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_key_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_output_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_query_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_query_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layer/_value_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_13/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/mlp/expert_bank/vars": [
"model_00003.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_14/mlp/_sparse_feedforward_gate_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_feedforward_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_key_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_key_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_output_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_query_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_query_dense_layer_norm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layer/_value_dense/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_14/_self_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/mlp/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_15/mlp/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_key_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_key_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_output_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_query_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_query_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layer/_value_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_15/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/mlp/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_16/mlp/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_key_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_key_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_output_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_query_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_query_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layer/_value_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_16/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/mlp/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_17/mlp/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_key_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_key_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_output_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_query_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_query_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layer/_value_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_17/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/mlp/expert_bank/vars": [
"model_00004.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_18/mlp/_sparse_feedforward_gate_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_feedforward_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_key_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_key_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_output_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_query_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_query_dense_layer_norm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layer/_value_dense/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_18/_self_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/mlp/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_19/mlp/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_key_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_key_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_output_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_query_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_query_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layer/_value_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_19/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/mlp/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_20/mlp/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_key_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_key_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_output_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_query_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_query_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layer/_value_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_20/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/mlp/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_21/mlp/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_key_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_key_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_output_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_query_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_query_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layer/_value_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_21/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/mlp/expert_bank/vars": [
"model_00005.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_22/mlp/_sparse_feedforward_gate_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_feedforward_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_key_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_key_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_output_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_query_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_query_dense_layer_norm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layer/_value_dense/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_22/_self_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/mlp/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_23/mlp/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_key_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_key_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_output_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_query_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_query_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layer/_value_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_23/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/mlp/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_24/mlp/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_key_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_key_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_output_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_query_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_query_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layer/_value_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_24/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/mlp/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_25/mlp/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_key_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_key_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_output_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_query_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_query_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layer/_value_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_25/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/mlp/expert_bank/vars": [
"model_00006.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_26/mlp/_sparse_feedforward_gate_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_feedforward_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_key_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_key_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_output_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_query_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_query_dense_layer_norm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layer/_value_dense/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_26/_self_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/mlp/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_27/mlp/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_key_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_key_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_output_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_query_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_query_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layer/_value_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_27/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/mlp/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_28/mlp/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_key_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_key_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_output_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_query_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_query_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layer/_value_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_28/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/mlp/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_29/mlp/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_key_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_key_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_output_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_query_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_query_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layer/_value_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_29/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/mlp/expert_bank/vars": [
"model_00007.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_30/mlp/_sparse_feedforward_gate_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_feedforward_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_key_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_key_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_output_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_query_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_query_dense_layer_norm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layer/_value_dense/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_30/_self_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/mlp/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_31/mlp/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_key_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_key_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_output_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_query_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_query_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layer/_value_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_31/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/mlp/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_32/mlp/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_key_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_key_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_output_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_query_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_query_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layer/_value_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_32/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/mlp/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_33/mlp/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_key_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_key_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_output_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_query_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_query_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layer/_value_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_33/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/mlp/expert_bank/vars": [
"model_00008.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_34/mlp/_sparse_feedforward_gate_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_feedforward_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_key_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_key_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_output_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_query_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_query_dense_layer_norm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layer/_value_dense/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_34/_self_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/mlp/expert_bank/vars": [
"model_00009.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_35/mlp/_sparse_feedforward_gate_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_feedforward_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_key_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_key_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_output_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_query_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_query_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layer/_value_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_35/_self_attention_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/mlp/expert_bank/vars": [
"model_00009.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_36/mlp/_sparse_feedforward_gate_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_feedforward_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_key_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_key_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_output_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_query_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_query_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layer/_value_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_36/_self_attention_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/mlp/expert_bank/vars": [
"model_00009.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_37/mlp/_sparse_feedforward_gate_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_feedforward_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_key_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_key_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_output_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_query_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_query_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layer/_value_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_37/_self_attention_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/mlp/expert_bank/vars": [
"model_00009.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_38/mlp/_sparse_feedforward_gate_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_feedforward_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_key_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_key_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_output_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_query_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_query_dense_layer_norm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layer/_value_dense/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_38/_self_attention_layernorm/vars": "model_00009.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/mlp/expert_bank/vars": [
"model_00010.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_39/mlp/_sparse_feedforward_gate_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_feedforward_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_key_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_key_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_output_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_query_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_query_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layer/_value_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_39/_self_attention_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/mlp/expert_bank/vars": [
"model_00010.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_40/mlp/_sparse_feedforward_gate_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_feedforward_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_key_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_key_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_output_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_query_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_query_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layer/_value_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_40/_self_attention_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/mlp/expert_bank/vars": [
"model_00010.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_41/mlp/_sparse_feedforward_gate_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_feedforward_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_key_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_key_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_output_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_query_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_query_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layer/_value_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_41/_self_attention_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/mlp/expert_bank/vars": [
"model_00010.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_42/mlp/_sparse_feedforward_gate_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_feedforward_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_key_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_key_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_output_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_query_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_query_dense_layer_norm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layer/_value_dense/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_42/_self_attention_layernorm/vars": "model_00010.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/mlp/expert_bank/vars": [
"model_00011.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_43/mlp/_sparse_feedforward_gate_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_feedforward_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_key_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_key_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_output_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_query_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_query_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layer/_value_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_43/_self_attention_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/mlp/expert_bank/vars": [
"model_00011.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_44/mlp/_sparse_feedforward_gate_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_feedforward_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_key_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_key_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_output_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_query_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_query_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layer/_value_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_44/_self_attention_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/mlp/expert_bank/vars": [
"model_00011.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_45/mlp/_sparse_feedforward_gate_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_feedforward_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_key_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_key_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_output_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_query_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_query_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layer/_value_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_45/_self_attention_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/mlp/expert_bank/vars": [
"model_00011.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_46/mlp/_sparse_feedforward_gate_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_feedforward_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_key_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_key_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_output_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_query_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_query_dense_layer_norm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layer/_value_dense/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_46/_self_attention_layernorm/vars": "model_00011.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/mlp/expert_bank/vars": [
"model_00012.weights.h5"
],
"/layers/qwen3_moe_transformer_decoder_47/mlp/_sparse_feedforward_gate_dense/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_feedforward_layernorm/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_key_dense/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_key_dense_layer_norm/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_output_dense/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_query_dense/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_query_dense_layer_norm/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layer/_value_dense/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_transformer_decoder_47/_self_attention_layernorm/vars": "model_00012.weights.h5",
"/layers/qwen3_moe_layer_norm/vars": "model_00012.weights.h5"
}
}