diff --git a/.gitattributes b/.gitattributes index 637fa167e56685c01bc97f08a420ea76330cf6df..10a5c75385f2a4d4e658a84f55f4e574cfdb6d15 100644 --- a/.gitattributes +++ b/.gitattributes @@ -30,3 +30,48 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/train_ds-003-of-004.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/train_ds-001-of-004.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/checkpoint filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/train_ds-000-of-004.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/train_ds-002-of-004.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_22.attention.query.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_3.mlp.wi_0.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.7 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.6 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.5 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.3 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.2 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.self_attention.query.kernel/0.4 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/6.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/3.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/7.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/5.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/2.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.decoder.layers_21.encoder_decoder_attention.out.kernel/4.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/6.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/3.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/7.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/5.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/2.0 filter=lfs diff=lfs merge=lfs -text +checkpoint_1007000/target.encoder.layers_2.mlp.wo.kernel/4.0 filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint_1007000/checkpoint b/checkpoint_1007000/checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..24c59f3725b97ecdd2cffd94c80331f23b9c31d6 --- /dev/null +++ b/checkpoint_1007000/checkpoint @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd585cc57ec6ce27ec16da81a66e38b816d4c341a807471d5618fbe798d9873 +size 19920805 diff --git a/checkpoint_1007000/state.param_states.decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_1007000/state.param_states.decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..2d0ddea1f0046008cb8eb77c5f75d9445443f01b --- /dev/null +++ b/checkpoint_1007000/state.param_states.decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"