| { | |
| "metadata": { | |
| "total_size": 83659028736.0 | |
| }, | |
| "weight_map": { | |
| "/layers/reversible_embedding/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/input_layernorm/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder/post_attention_layernorm/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder/self_attention_layer/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder/self_attention_layer/key_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/self_attention_layer/output_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/self_attention_layer/query_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/self_attention_layer/value_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/sparse_moe_block/experts/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/input_layernorm/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_1/post_attention_layernorm/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_1/self_attention_layer/vars": "model_00000.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_1/self_attention_layer/key_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/self_attention_layer/output_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/self_attention_layer/query_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/self_attention_layer/value_dense/vars": [ | |
| "model_00000.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/sparse_moe_block/experts/vars": [ | |
| "model_00000.weights.h5", | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_1/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/input_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_2/post_attention_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_2/self_attention_layer/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_2/self_attention_layer/key_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/self_attention_layer/output_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/self_attention_layer/query_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/self_attention_layer/value_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/sparse_moe_block/experts/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_2/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/input_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_3/post_attention_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_3/self_attention_layer/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_3/self_attention_layer/key_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/self_attention_layer/output_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/self_attention_layer/query_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/self_attention_layer/value_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/sparse_moe_block/experts/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_3/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/input_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_4/post_attention_layernorm/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_4/self_attention_layer/vars": "model_00001.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_4/self_attention_layer/key_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/self_attention_layer/output_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/self_attention_layer/query_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/self_attention_layer/value_dense/vars": [ | |
| "model_00001.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/sparse_moe_block/experts/vars": [ | |
| "model_00001.weights.h5", | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_4/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/input_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_5/post_attention_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_5/self_attention_layer/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_5/self_attention_layer/key_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/self_attention_layer/output_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/self_attention_layer/query_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/self_attention_layer/value_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/sparse_moe_block/experts/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_5/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/input_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_6/post_attention_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_6/self_attention_layer/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_6/self_attention_layer/key_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/self_attention_layer/output_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/self_attention_layer/query_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/self_attention_layer/value_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/sparse_moe_block/experts/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_6/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/input_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_7/post_attention_layernorm/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_7/self_attention_layer/vars": "model_00002.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_7/self_attention_layer/key_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/self_attention_layer/output_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/self_attention_layer/query_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/self_attention_layer/value_dense/vars": [ | |
| "model_00002.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/sparse_moe_block/experts/vars": [ | |
| "model_00002.weights.h5", | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_7/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/input_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_8/post_attention_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_8/self_attention_layer/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_8/self_attention_layer/key_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/self_attention_layer/output_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/self_attention_layer/query_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/self_attention_layer/value_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/sparse_moe_block/experts/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_8/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/input_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_9/post_attention_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_9/self_attention_layer/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_9/self_attention_layer/key_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/self_attention_layer/output_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/self_attention_layer/query_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/self_attention_layer/value_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/sparse_moe_block/experts/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_9/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/input_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_10/post_attention_layernorm/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_10/self_attention_layer/vars": "model_00003.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_10/self_attention_layer/key_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/self_attention_layer/output_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/self_attention_layer/query_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/self_attention_layer/value_dense/vars": [ | |
| "model_00003.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/sparse_moe_block/experts/vars": [ | |
| "model_00003.weights.h5", | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_10/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/input_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_11/post_attention_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_11/self_attention_layer/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_11/self_attention_layer/key_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/self_attention_layer/output_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/self_attention_layer/query_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/self_attention_layer/value_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/sparse_moe_block/experts/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_11/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/input_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_12/post_attention_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_12/self_attention_layer/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_12/self_attention_layer/key_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/self_attention_layer/output_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/self_attention_layer/query_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/self_attention_layer/value_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/sparse_moe_block/experts/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_12/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/input_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_13/post_attention_layernorm/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_13/self_attention_layer/vars": "model_00004.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_13/self_attention_layer/key_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/self_attention_layer/output_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/self_attention_layer/query_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/self_attention_layer/value_dense/vars": [ | |
| "model_00004.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/sparse_moe_block/experts/vars": [ | |
| "model_00004.weights.h5", | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_13/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/input_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_14/post_attention_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_14/self_attention_layer/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_14/self_attention_layer/key_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/self_attention_layer/output_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/self_attention_layer/query_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/self_attention_layer/value_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/sparse_moe_block/experts/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_14/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/input_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_15/post_attention_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_15/self_attention_layer/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_15/self_attention_layer/key_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/self_attention_layer/output_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/self_attention_layer/query_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/self_attention_layer/value_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/sparse_moe_block/experts/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_15/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/input_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_16/post_attention_layernorm/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_16/self_attention_layer/vars": "model_00005.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_16/self_attention_layer/key_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/self_attention_layer/output_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/self_attention_layer/query_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/self_attention_layer/value_dense/vars": [ | |
| "model_00005.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/sparse_moe_block/experts/vars": [ | |
| "model_00005.weights.h5", | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_16/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/input_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_17/post_attention_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_17/self_attention_layer/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_17/self_attention_layer/key_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/self_attention_layer/output_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/self_attention_layer/query_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/self_attention_layer/value_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/sparse_moe_block/experts/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_17/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/input_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_18/post_attention_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_18/self_attention_layer/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_18/self_attention_layer/key_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/self_attention_layer/output_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/self_attention_layer/query_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/self_attention_layer/value_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/sparse_moe_block/experts/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_18/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/input_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_19/post_attention_layernorm/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_19/self_attention_layer/vars": "model_00006.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_19/self_attention_layer/key_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/self_attention_layer/output_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/self_attention_layer/query_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/self_attention_layer/value_dense/vars": [ | |
| "model_00006.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/sparse_moe_block/experts/vars": [ | |
| "model_00006.weights.h5", | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_19/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/input_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_20/post_attention_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_20/self_attention_layer/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_20/self_attention_layer/key_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/self_attention_layer/output_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/self_attention_layer/query_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/self_attention_layer/value_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/sparse_moe_block/experts/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_20/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/input_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_21/post_attention_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_21/self_attention_layer/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_21/self_attention_layer/key_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/self_attention_layer/output_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/self_attention_layer/query_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/self_attention_layer/value_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/sparse_moe_block/experts/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_21/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/input_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_22/post_attention_layernorm/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_22/self_attention_layer/vars": "model_00007.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_22/self_attention_layer/key_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/self_attention_layer/output_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/self_attention_layer/query_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/self_attention_layer/value_dense/vars": [ | |
| "model_00007.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/sparse_moe_block/experts/vars": [ | |
| "model_00007.weights.h5", | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_22/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/input_layernorm/vars": "model_00008.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_23/post_attention_layernorm/vars": "model_00008.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_23/self_attention_layer/vars": "model_00008.weights.h5", | |
| "/layers/gpt_oss_transformer_decoder_23/self_attention_layer/key_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/self_attention_layer/output_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/self_attention_layer/query_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/self_attention_layer/value_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/sparse_moe_block/experts/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_transformer_decoder_23/sparse_moe_block/router/router_dense/vars": [ | |
| "model_00008.weights.h5" | |
| ], | |
| "/layers/gpt_oss_layer_normalization/vars": "model_00008.weights.h5" | |
| } | |
| } |