| { | |
| "metadata": { | |
| "total_size": 15520397312.0 | |
| }, | |
| "weight_map": { | |
| "/layers/reversible_embedding/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_1/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_2/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_3/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_4/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_5/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_6/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_7/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_8/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_9/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_10/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_11/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_12/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_13/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_14/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_15/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_16/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_17/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_18/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_19/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/ffw_linear/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/gating_ffw/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/gating_ffw_2/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/post_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/post_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/pre_attention_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_20/pre_ffw_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/key_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/key_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/output_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/query_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/query_norm/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/attention/value_dense/vars": "model_00000.weights.h5", | |
| "/layers/gemma3_decoder_block_21/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_21/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_22/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_23/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_24/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_25/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_26/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_27/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_28/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_29/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_30/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_31/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_32/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/key_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/key_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/output_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/query_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/query_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/attention/value_dense/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/ffw_linear/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/gating_ffw/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/gating_ffw_2/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/post_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/post_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/pre_attention_norm/vars": "model_00001.weights.h5", | |
| "/layers/gemma3_decoder_block_33/pre_ffw_norm/vars": "model_00001.weights.h5", | |
| "/layers/rms_normalization/vars": "model_00001.weights.h5" | |
| } | |
| } |