gpt_oss_120b_en / model.weights.json
prasadsachin's picture
Upload folder using huggingface_hub
ff5b0a7 verified
{
"metadata": {
"total_size": 467316626688.0
},
"weight_map": {
"/layers/reversible_embedding/vars": [
"model_00000.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/input_layernorm/vars": "model_00000.weights.h5",
"/layers/gpt_oss_transformer_decoder/post_attention_layernorm/vars": "model_00000.weights.h5",
"/layers/gpt_oss_transformer_decoder/self_attention_layer/vars": "model_00000.weights.h5",
"/layers/gpt_oss_transformer_decoder/self_attention_layer/key_dense/vars": [
"model_00000.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/self_attention_layer/output_dense/vars": [
"model_00000.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/self_attention_layer/query_dense/vars": [
"model_00000.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/self_attention_layer/value_dense/vars": [
"model_00000.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/sparse_moe_block/experts/vars": [
"model_00000.weights.h5",
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder/sparse_moe_block/router/router_dense/vars": [
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/input_layernorm/vars": "model_00001.weights.h5",
"/layers/gpt_oss_transformer_decoder_1/post_attention_layernorm/vars": "model_00001.weights.h5",
"/layers/gpt_oss_transformer_decoder_1/self_attention_layer/vars": "model_00001.weights.h5",
"/layers/gpt_oss_transformer_decoder_1/self_attention_layer/key_dense/vars": [
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/self_attention_layer/output_dense/vars": [
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/self_attention_layer/query_dense/vars": [
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/self_attention_layer/value_dense/vars": [
"model_00001.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/sparse_moe_block/experts/vars": [
"model_00001.weights.h5",
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_1/sparse_moe_block/router/router_dense/vars": [
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/input_layernorm/vars": "model_00002.weights.h5",
"/layers/gpt_oss_transformer_decoder_2/post_attention_layernorm/vars": "model_00002.weights.h5",
"/layers/gpt_oss_transformer_decoder_2/self_attention_layer/vars": "model_00002.weights.h5",
"/layers/gpt_oss_transformer_decoder_2/self_attention_layer/key_dense/vars": [
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/self_attention_layer/output_dense/vars": [
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/self_attention_layer/query_dense/vars": [
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/self_attention_layer/value_dense/vars": [
"model_00002.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/sparse_moe_block/experts/vars": [
"model_00002.weights.h5",
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_2/sparse_moe_block/router/router_dense/vars": [
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/input_layernorm/vars": "model_00003.weights.h5",
"/layers/gpt_oss_transformer_decoder_3/post_attention_layernorm/vars": "model_00003.weights.h5",
"/layers/gpt_oss_transformer_decoder_3/self_attention_layer/vars": "model_00003.weights.h5",
"/layers/gpt_oss_transformer_decoder_3/self_attention_layer/key_dense/vars": [
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/self_attention_layer/output_dense/vars": [
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/self_attention_layer/query_dense/vars": [
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/self_attention_layer/value_dense/vars": [
"model_00003.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/sparse_moe_block/experts/vars": [
"model_00003.weights.h5",
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_3/sparse_moe_block/router/router_dense/vars": [
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/input_layernorm/vars": "model_00004.weights.h5",
"/layers/gpt_oss_transformer_decoder_4/post_attention_layernorm/vars": "model_00004.weights.h5",
"/layers/gpt_oss_transformer_decoder_4/self_attention_layer/vars": "model_00004.weights.h5",
"/layers/gpt_oss_transformer_decoder_4/self_attention_layer/key_dense/vars": [
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/self_attention_layer/output_dense/vars": [
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/self_attention_layer/query_dense/vars": [
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/self_attention_layer/value_dense/vars": [
"model_00004.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/sparse_moe_block/experts/vars": [
"model_00004.weights.h5",
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_4/sparse_moe_block/router/router_dense/vars": [
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/input_layernorm/vars": "model_00005.weights.h5",
"/layers/gpt_oss_transformer_decoder_5/post_attention_layernorm/vars": "model_00005.weights.h5",
"/layers/gpt_oss_transformer_decoder_5/self_attention_layer/vars": "model_00005.weights.h5",
"/layers/gpt_oss_transformer_decoder_5/self_attention_layer/key_dense/vars": [
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/self_attention_layer/output_dense/vars": [
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/self_attention_layer/query_dense/vars": [
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/self_attention_layer/value_dense/vars": [
"model_00005.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/sparse_moe_block/experts/vars": [
"model_00005.weights.h5",
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_5/sparse_moe_block/router/router_dense/vars": [
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/input_layernorm/vars": "model_00006.weights.h5",
"/layers/gpt_oss_transformer_decoder_6/post_attention_layernorm/vars": "model_00006.weights.h5",
"/layers/gpt_oss_transformer_decoder_6/self_attention_layer/vars": "model_00006.weights.h5",
"/layers/gpt_oss_transformer_decoder_6/self_attention_layer/key_dense/vars": [
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/self_attention_layer/output_dense/vars": [
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/self_attention_layer/query_dense/vars": [
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/self_attention_layer/value_dense/vars": [
"model_00006.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/sparse_moe_block/experts/vars": [
"model_00006.weights.h5",
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_6/sparse_moe_block/router/router_dense/vars": [
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/input_layernorm/vars": "model_00007.weights.h5",
"/layers/gpt_oss_transformer_decoder_7/post_attention_layernorm/vars": "model_00007.weights.h5",
"/layers/gpt_oss_transformer_decoder_7/self_attention_layer/vars": "model_00007.weights.h5",
"/layers/gpt_oss_transformer_decoder_7/self_attention_layer/key_dense/vars": [
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/self_attention_layer/output_dense/vars": [
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/self_attention_layer/query_dense/vars": [
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/self_attention_layer/value_dense/vars": [
"model_00007.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/sparse_moe_block/experts/vars": [
"model_00007.weights.h5",
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_7/sparse_moe_block/router/router_dense/vars": [
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/input_layernorm/vars": "model_00008.weights.h5",
"/layers/gpt_oss_transformer_decoder_8/post_attention_layernorm/vars": "model_00008.weights.h5",
"/layers/gpt_oss_transformer_decoder_8/self_attention_layer/vars": "model_00008.weights.h5",
"/layers/gpt_oss_transformer_decoder_8/self_attention_layer/key_dense/vars": [
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/self_attention_layer/output_dense/vars": [
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/self_attention_layer/query_dense/vars": [
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/self_attention_layer/value_dense/vars": [
"model_00008.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/sparse_moe_block/experts/vars": [
"model_00008.weights.h5",
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_8/sparse_moe_block/router/router_dense/vars": [
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/input_layernorm/vars": "model_00009.weights.h5",
"/layers/gpt_oss_transformer_decoder_9/post_attention_layernorm/vars": "model_00009.weights.h5",
"/layers/gpt_oss_transformer_decoder_9/self_attention_layer/vars": "model_00009.weights.h5",
"/layers/gpt_oss_transformer_decoder_9/self_attention_layer/key_dense/vars": [
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/self_attention_layer/output_dense/vars": [
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/self_attention_layer/query_dense/vars": [
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/self_attention_layer/value_dense/vars": [
"model_00009.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/sparse_moe_block/experts/vars": [
"model_00009.weights.h5",
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_9/sparse_moe_block/router/router_dense/vars": [
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/input_layernorm/vars": "model_00010.weights.h5",
"/layers/gpt_oss_transformer_decoder_10/post_attention_layernorm/vars": "model_00010.weights.h5",
"/layers/gpt_oss_transformer_decoder_10/self_attention_layer/vars": "model_00010.weights.h5",
"/layers/gpt_oss_transformer_decoder_10/self_attention_layer/key_dense/vars": [
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/self_attention_layer/output_dense/vars": [
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/self_attention_layer/query_dense/vars": [
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/self_attention_layer/value_dense/vars": [
"model_00010.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/sparse_moe_block/experts/vars": [
"model_00010.weights.h5",
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_10/sparse_moe_block/router/router_dense/vars": [
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/input_layernorm/vars": "model_00011.weights.h5",
"/layers/gpt_oss_transformer_decoder_11/post_attention_layernorm/vars": "model_00011.weights.h5",
"/layers/gpt_oss_transformer_decoder_11/self_attention_layer/vars": "model_00011.weights.h5",
"/layers/gpt_oss_transformer_decoder_11/self_attention_layer/key_dense/vars": [
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/self_attention_layer/output_dense/vars": [
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/self_attention_layer/query_dense/vars": [
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/self_attention_layer/value_dense/vars": [
"model_00011.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/sparse_moe_block/experts/vars": [
"model_00011.weights.h5",
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_11/sparse_moe_block/router/router_dense/vars": [
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/input_layernorm/vars": "model_00012.weights.h5",
"/layers/gpt_oss_transformer_decoder_12/post_attention_layernorm/vars": "model_00012.weights.h5",
"/layers/gpt_oss_transformer_decoder_12/self_attention_layer/vars": "model_00012.weights.h5",
"/layers/gpt_oss_transformer_decoder_12/self_attention_layer/key_dense/vars": [
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/self_attention_layer/output_dense/vars": [
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/self_attention_layer/query_dense/vars": [
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/self_attention_layer/value_dense/vars": [
"model_00012.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/sparse_moe_block/experts/vars": [
"model_00012.weights.h5",
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_12/sparse_moe_block/router/router_dense/vars": [
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/input_layernorm/vars": "model_00013.weights.h5",
"/layers/gpt_oss_transformer_decoder_13/post_attention_layernorm/vars": "model_00013.weights.h5",
"/layers/gpt_oss_transformer_decoder_13/self_attention_layer/vars": "model_00013.weights.h5",
"/layers/gpt_oss_transformer_decoder_13/self_attention_layer/key_dense/vars": [
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/self_attention_layer/output_dense/vars": [
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/self_attention_layer/query_dense/vars": [
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/self_attention_layer/value_dense/vars": [
"model_00013.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/sparse_moe_block/experts/vars": [
"model_00013.weights.h5",
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_13/sparse_moe_block/router/router_dense/vars": [
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/input_layernorm/vars": "model_00014.weights.h5",
"/layers/gpt_oss_transformer_decoder_14/post_attention_layernorm/vars": "model_00014.weights.h5",
"/layers/gpt_oss_transformer_decoder_14/self_attention_layer/vars": "model_00014.weights.h5",
"/layers/gpt_oss_transformer_decoder_14/self_attention_layer/key_dense/vars": [
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/self_attention_layer/output_dense/vars": [
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/self_attention_layer/query_dense/vars": [
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/self_attention_layer/value_dense/vars": [
"model_00014.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/sparse_moe_block/experts/vars": [
"model_00014.weights.h5",
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_14/sparse_moe_block/router/router_dense/vars": [
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/input_layernorm/vars": "model_00015.weights.h5",
"/layers/gpt_oss_transformer_decoder_15/post_attention_layernorm/vars": "model_00015.weights.h5",
"/layers/gpt_oss_transformer_decoder_15/self_attention_layer/vars": "model_00015.weights.h5",
"/layers/gpt_oss_transformer_decoder_15/self_attention_layer/key_dense/vars": [
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/self_attention_layer/output_dense/vars": [
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/self_attention_layer/query_dense/vars": [
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/self_attention_layer/value_dense/vars": [
"model_00015.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/sparse_moe_block/experts/vars": [
"model_00015.weights.h5",
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_15/sparse_moe_block/router/router_dense/vars": [
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/input_layernorm/vars": "model_00016.weights.h5",
"/layers/gpt_oss_transformer_decoder_16/post_attention_layernorm/vars": "model_00016.weights.h5",
"/layers/gpt_oss_transformer_decoder_16/self_attention_layer/vars": "model_00016.weights.h5",
"/layers/gpt_oss_transformer_decoder_16/self_attention_layer/key_dense/vars": [
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/self_attention_layer/output_dense/vars": [
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/self_attention_layer/query_dense/vars": [
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/self_attention_layer/value_dense/vars": [
"model_00016.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/sparse_moe_block/experts/vars": [
"model_00016.weights.h5",
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_16/sparse_moe_block/router/router_dense/vars": [
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/input_layernorm/vars": "model_00017.weights.h5",
"/layers/gpt_oss_transformer_decoder_17/post_attention_layernorm/vars": "model_00017.weights.h5",
"/layers/gpt_oss_transformer_decoder_17/self_attention_layer/vars": "model_00017.weights.h5",
"/layers/gpt_oss_transformer_decoder_17/self_attention_layer/key_dense/vars": [
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/self_attention_layer/output_dense/vars": [
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/self_attention_layer/query_dense/vars": [
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/self_attention_layer/value_dense/vars": [
"model_00017.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/sparse_moe_block/experts/vars": [
"model_00017.weights.h5",
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_17/sparse_moe_block/router/router_dense/vars": [
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/input_layernorm/vars": "model_00018.weights.h5",
"/layers/gpt_oss_transformer_decoder_18/post_attention_layernorm/vars": "model_00018.weights.h5",
"/layers/gpt_oss_transformer_decoder_18/self_attention_layer/vars": "model_00018.weights.h5",
"/layers/gpt_oss_transformer_decoder_18/self_attention_layer/key_dense/vars": [
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/self_attention_layer/output_dense/vars": [
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/self_attention_layer/query_dense/vars": [
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/self_attention_layer/value_dense/vars": [
"model_00018.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/sparse_moe_block/experts/vars": [
"model_00018.weights.h5",
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_18/sparse_moe_block/router/router_dense/vars": [
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/input_layernorm/vars": "model_00019.weights.h5",
"/layers/gpt_oss_transformer_decoder_19/post_attention_layernorm/vars": "model_00019.weights.h5",
"/layers/gpt_oss_transformer_decoder_19/self_attention_layer/vars": "model_00019.weights.h5",
"/layers/gpt_oss_transformer_decoder_19/self_attention_layer/key_dense/vars": [
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/self_attention_layer/output_dense/vars": [
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/self_attention_layer/query_dense/vars": [
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/self_attention_layer/value_dense/vars": [
"model_00019.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/sparse_moe_block/experts/vars": [
"model_00019.weights.h5",
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_19/sparse_moe_block/router/router_dense/vars": [
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/input_layernorm/vars": "model_00020.weights.h5",
"/layers/gpt_oss_transformer_decoder_20/post_attention_layernorm/vars": "model_00020.weights.h5",
"/layers/gpt_oss_transformer_decoder_20/self_attention_layer/vars": "model_00020.weights.h5",
"/layers/gpt_oss_transformer_decoder_20/self_attention_layer/key_dense/vars": [
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/self_attention_layer/output_dense/vars": [
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/self_attention_layer/query_dense/vars": [
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/self_attention_layer/value_dense/vars": [
"model_00020.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/sparse_moe_block/experts/vars": [
"model_00020.weights.h5",
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_20/sparse_moe_block/router/router_dense/vars": [
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/input_layernorm/vars": "model_00021.weights.h5",
"/layers/gpt_oss_transformer_decoder_21/post_attention_layernorm/vars": "model_00021.weights.h5",
"/layers/gpt_oss_transformer_decoder_21/self_attention_layer/vars": "model_00021.weights.h5",
"/layers/gpt_oss_transformer_decoder_21/self_attention_layer/key_dense/vars": [
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/self_attention_layer/output_dense/vars": [
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/self_attention_layer/query_dense/vars": [
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/self_attention_layer/value_dense/vars": [
"model_00021.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/sparse_moe_block/experts/vars": [
"model_00021.weights.h5",
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_21/sparse_moe_block/router/router_dense/vars": [
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/input_layernorm/vars": "model_00022.weights.h5",
"/layers/gpt_oss_transformer_decoder_22/post_attention_layernorm/vars": "model_00022.weights.h5",
"/layers/gpt_oss_transformer_decoder_22/self_attention_layer/vars": "model_00022.weights.h5",
"/layers/gpt_oss_transformer_decoder_22/self_attention_layer/key_dense/vars": [
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/self_attention_layer/output_dense/vars": [
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/self_attention_layer/query_dense/vars": [
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/self_attention_layer/value_dense/vars": [
"model_00022.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/sparse_moe_block/experts/vars": [
"model_00022.weights.h5",
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_22/sparse_moe_block/router/router_dense/vars": [
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/input_layernorm/vars": "model_00023.weights.h5",
"/layers/gpt_oss_transformer_decoder_23/post_attention_layernorm/vars": "model_00023.weights.h5",
"/layers/gpt_oss_transformer_decoder_23/self_attention_layer/vars": "model_00023.weights.h5",
"/layers/gpt_oss_transformer_decoder_23/self_attention_layer/key_dense/vars": [
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/self_attention_layer/output_dense/vars": [
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/self_attention_layer/query_dense/vars": [
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/self_attention_layer/value_dense/vars": [
"model_00023.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/sparse_moe_block/experts/vars": [
"model_00023.weights.h5",
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_23/sparse_moe_block/router/router_dense/vars": [
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/input_layernorm/vars": "model_00024.weights.h5",
"/layers/gpt_oss_transformer_decoder_24/post_attention_layernorm/vars": "model_00024.weights.h5",
"/layers/gpt_oss_transformer_decoder_24/self_attention_layer/vars": "model_00024.weights.h5",
"/layers/gpt_oss_transformer_decoder_24/self_attention_layer/key_dense/vars": [
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/self_attention_layer/output_dense/vars": [
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/self_attention_layer/query_dense/vars": [
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/self_attention_layer/value_dense/vars": [
"model_00024.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/sparse_moe_block/experts/vars": [
"model_00024.weights.h5",
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_24/sparse_moe_block/router/router_dense/vars": [
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/input_layernorm/vars": "model_00025.weights.h5",
"/layers/gpt_oss_transformer_decoder_25/post_attention_layernorm/vars": "model_00025.weights.h5",
"/layers/gpt_oss_transformer_decoder_25/self_attention_layer/vars": "model_00025.weights.h5",
"/layers/gpt_oss_transformer_decoder_25/self_attention_layer/key_dense/vars": [
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/self_attention_layer/output_dense/vars": [
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/self_attention_layer/query_dense/vars": [
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/self_attention_layer/value_dense/vars": [
"model_00025.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/sparse_moe_block/experts/vars": [
"model_00025.weights.h5",
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_25/sparse_moe_block/router/router_dense/vars": [
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/input_layernorm/vars": "model_00026.weights.h5",
"/layers/gpt_oss_transformer_decoder_26/post_attention_layernorm/vars": "model_00026.weights.h5",
"/layers/gpt_oss_transformer_decoder_26/self_attention_layer/vars": "model_00026.weights.h5",
"/layers/gpt_oss_transformer_decoder_26/self_attention_layer/key_dense/vars": [
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/self_attention_layer/output_dense/vars": [
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/self_attention_layer/query_dense/vars": [
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/self_attention_layer/value_dense/vars": [
"model_00026.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/sparse_moe_block/experts/vars": [
"model_00026.weights.h5",
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_26/sparse_moe_block/router/router_dense/vars": [
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/input_layernorm/vars": "model_00027.weights.h5",
"/layers/gpt_oss_transformer_decoder_27/post_attention_layernorm/vars": "model_00027.weights.h5",
"/layers/gpt_oss_transformer_decoder_27/self_attention_layer/vars": "model_00027.weights.h5",
"/layers/gpt_oss_transformer_decoder_27/self_attention_layer/key_dense/vars": [
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/self_attention_layer/output_dense/vars": [
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/self_attention_layer/query_dense/vars": [
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/self_attention_layer/value_dense/vars": [
"model_00027.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/sparse_moe_block/experts/vars": [
"model_00027.weights.h5",
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_27/sparse_moe_block/router/router_dense/vars": [
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/input_layernorm/vars": "model_00028.weights.h5",
"/layers/gpt_oss_transformer_decoder_28/post_attention_layernorm/vars": "model_00028.weights.h5",
"/layers/gpt_oss_transformer_decoder_28/self_attention_layer/vars": "model_00028.weights.h5",
"/layers/gpt_oss_transformer_decoder_28/self_attention_layer/key_dense/vars": [
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/self_attention_layer/output_dense/vars": [
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/self_attention_layer/query_dense/vars": [
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/self_attention_layer/value_dense/vars": [
"model_00028.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/sparse_moe_block/experts/vars": [
"model_00028.weights.h5",
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_28/sparse_moe_block/router/router_dense/vars": [
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/input_layernorm/vars": "model_00029.weights.h5",
"/layers/gpt_oss_transformer_decoder_29/post_attention_layernorm/vars": "model_00029.weights.h5",
"/layers/gpt_oss_transformer_decoder_29/self_attention_layer/vars": "model_00029.weights.h5",
"/layers/gpt_oss_transformer_decoder_29/self_attention_layer/key_dense/vars": [
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/self_attention_layer/output_dense/vars": [
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/self_attention_layer/query_dense/vars": [
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/self_attention_layer/value_dense/vars": [
"model_00029.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/sparse_moe_block/experts/vars": [
"model_00029.weights.h5",
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_29/sparse_moe_block/router/router_dense/vars": [
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/input_layernorm/vars": "model_00030.weights.h5",
"/layers/gpt_oss_transformer_decoder_30/post_attention_layernorm/vars": "model_00030.weights.h5",
"/layers/gpt_oss_transformer_decoder_30/self_attention_layer/vars": "model_00030.weights.h5",
"/layers/gpt_oss_transformer_decoder_30/self_attention_layer/key_dense/vars": [
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/self_attention_layer/output_dense/vars": [
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/self_attention_layer/query_dense/vars": [
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/self_attention_layer/value_dense/vars": [
"model_00030.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/sparse_moe_block/experts/vars": [
"model_00030.weights.h5",
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_30/sparse_moe_block/router/router_dense/vars": [
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/input_layernorm/vars": "model_00031.weights.h5",
"/layers/gpt_oss_transformer_decoder_31/post_attention_layernorm/vars": "model_00031.weights.h5",
"/layers/gpt_oss_transformer_decoder_31/self_attention_layer/vars": "model_00031.weights.h5",
"/layers/gpt_oss_transformer_decoder_31/self_attention_layer/key_dense/vars": [
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/self_attention_layer/output_dense/vars": [
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/self_attention_layer/query_dense/vars": [
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/self_attention_layer/value_dense/vars": [
"model_00031.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/sparse_moe_block/experts/vars": [
"model_00031.weights.h5",
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_31/sparse_moe_block/router/router_dense/vars": [
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/input_layernorm/vars": "model_00032.weights.h5",
"/layers/gpt_oss_transformer_decoder_32/post_attention_layernorm/vars": "model_00032.weights.h5",
"/layers/gpt_oss_transformer_decoder_32/self_attention_layer/vars": "model_00032.weights.h5",
"/layers/gpt_oss_transformer_decoder_32/self_attention_layer/key_dense/vars": [
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/self_attention_layer/output_dense/vars": [
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/self_attention_layer/query_dense/vars": [
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/self_attention_layer/value_dense/vars": [
"model_00032.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/sparse_moe_block/experts/vars": [
"model_00032.weights.h5",
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_32/sparse_moe_block/router/router_dense/vars": [
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/input_layernorm/vars": "model_00033.weights.h5",
"/layers/gpt_oss_transformer_decoder_33/post_attention_layernorm/vars": "model_00033.weights.h5",
"/layers/gpt_oss_transformer_decoder_33/self_attention_layer/vars": "model_00033.weights.h5",
"/layers/gpt_oss_transformer_decoder_33/self_attention_layer/key_dense/vars": [
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/self_attention_layer/output_dense/vars": [
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/self_attention_layer/query_dense/vars": [
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/self_attention_layer/value_dense/vars": [
"model_00033.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/sparse_moe_block/experts/vars": [
"model_00033.weights.h5",
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_33/sparse_moe_block/router/router_dense/vars": [
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/input_layernorm/vars": "model_00034.weights.h5",
"/layers/gpt_oss_transformer_decoder_34/post_attention_layernorm/vars": "model_00034.weights.h5",
"/layers/gpt_oss_transformer_decoder_34/self_attention_layer/vars": "model_00034.weights.h5",
"/layers/gpt_oss_transformer_decoder_34/self_attention_layer/key_dense/vars": [
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/self_attention_layer/output_dense/vars": [
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/self_attention_layer/query_dense/vars": [
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/self_attention_layer/value_dense/vars": [
"model_00034.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/sparse_moe_block/experts/vars": [
"model_00034.weights.h5",
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_34/sparse_moe_block/router/router_dense/vars": [
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/input_layernorm/vars": "model_00035.weights.h5",
"/layers/gpt_oss_transformer_decoder_35/post_attention_layernorm/vars": "model_00035.weights.h5",
"/layers/gpt_oss_transformer_decoder_35/self_attention_layer/vars": "model_00035.weights.h5",
"/layers/gpt_oss_transformer_decoder_35/self_attention_layer/key_dense/vars": [
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/self_attention_layer/output_dense/vars": [
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/self_attention_layer/query_dense/vars": [
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/self_attention_layer/value_dense/vars": [
"model_00035.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/sparse_moe_block/experts/vars": [
"model_00035.weights.h5",
"model_00036.weights.h5"
],
"/layers/gpt_oss_transformer_decoder_35/sparse_moe_block/router/router_dense/vars": [
"model_00036.weights.h5"
],
"/layers/gpt_oss_layer_normalization/vars": "model_00036.weights.h5"
}
}