{"tree_metadata": {"('batch_stats', 'teacher')": {"key_metadata": [{"key": "batch_stats", "key_type": 2}, {"key": "teacher", "key_type": 2}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_0', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_1', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_2', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_3', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_4', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_5', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_6', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_7', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'blocks_8', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_3', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_3', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_4', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_4', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_5', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_5', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_6', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_6', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_7', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_7', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_8', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'heads_8', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'x_embedder', 'proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'x_embedder', 'proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_0', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_0", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_1', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_1", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_2', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_2", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_3', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_3", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_4', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_4", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_5', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_5", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_6', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_6", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_7', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_7", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'mu', 'student', 'y_embedder_8', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_8", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_0', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_1', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_2', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_3', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_4', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_5', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_6', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_7', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'class_cond_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'blocks_8', 'guidance_tokens')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_3', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_3', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_4', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_4', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_5', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_5', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_6', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_6', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_7', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_7', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_8', '_flax_linear', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'heads_8', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'x_embedder', 'proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'x_embedder', 'proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_0', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_0", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_1', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_1", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_2', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_2", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_3', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_3", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_4', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_4", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_5', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_5", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_6', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_6", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_7', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_7", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '0', 'nu', 'student', 'y_embedder_8', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_8", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('opt_state', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '2', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_0', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_1', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_2', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_3', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_4', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_5', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_6', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_7', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'blocks_8', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_3', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_3', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_4', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_4', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_5', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_5', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_6', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_6', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_7', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_7', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_8', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'heads_8', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'x_embedder', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'x_embedder', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_0', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_0", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_1', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_1", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_2', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_2", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_3', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_3", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_4', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_4", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_5', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_5", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_6', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_6", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_7', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_7", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student', 'y_embedder_8', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student", "key_type": 2}, {"key": "y_embedder_8", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_0', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_1', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_2', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_3', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_4', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_5', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_6', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_7', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_0', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_1', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_2', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_3', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_4', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_5', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_6', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'attn', 'proj', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'attn', 'qkv', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'blocks', 'layers_7', 'mlp', 'fc2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "blocks", "key_type": 2}, {"key": "layers_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "fc2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'class_cond_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "class_cond_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'guidance_embedder', 'mlp', 'layers_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_embedder", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "layers_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'blocks_8', 'guidance_tokens')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "blocks_8", "key_type": 2}, {"key": "guidance_tokens", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_0', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_0', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_0", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_2', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_2', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_2", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_3', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_3', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_3", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_4', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_4', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_4", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_5', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_5', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_5", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_6', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_6', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_6", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_7', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_7', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_7", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_8', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'heads_8', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "heads_8", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'x_embedder', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'x_embedder', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "x_embedder", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_0', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_0", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_1', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_1", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_2', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_2", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_3', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_3", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_4', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_4", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_5', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_5", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_6', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_6", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_7', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_7", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'student_ema', 'y_embedder_8', 'embedding_table', '_flax_embedding', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "student_ema", "key_type": 2}, {"key": "y_embedder_8", "key_type": 2}, {"key": "embedding_table", "key_type": 2}, {"key": "_flax_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_0', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_1', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_2', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_3', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_4', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_5', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_6', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_0', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_1', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_2', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_3', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_4', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_5', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_6', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'qkv', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'net1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'net1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'net2', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'net2', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "net2", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'norm', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'blocks_7', 'mlp', 'norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'class_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "class_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'pos_embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_in', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_in', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_in", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_out', 'adaLN_modulation', 'layers_1', '_flax_linear', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "adaLN_modulation", "key_type": 2}, {"key": "layers_1", "key_type": 2}, {"key": "_flax_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_out', 'proj_out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('params', 'teacher', 'blocks_7', 'proj_out', 'proj_out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "teacher", "key_type": 2}, {"key": "blocks_7", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "proj_out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}}, "use_zarr3": false}