diff --git a/.gitattributes b/.gitattributes index 704deea1b9e4d8b96c55d4c6e3f0bd9383a270ad..ca064b4706a13c51c378c049d1a514436ea0b476 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2527,3 +2527,51 @@ checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=l checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.7 filter=lfs diff=lfs merge=lfs -text checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_3.mlp.wi_1.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_2.self_attention.query.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/6.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/3.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/7.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/5.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/2.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_20.mlp.wo.kernel/4.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/6.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/3.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/7.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/5.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/2.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_23.mlp.wo.kernel/4.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.self_attention.key.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.mlp.wi_1.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_13.mlp.wi_1.kernel/0.6 filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint_1007000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_1007000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..2d0ddea1f0046008cb8eb77c5f75d9445443f01b --- /dev/null +++ b/checkpoint_1007000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"