End of training
f396038
verified
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=all, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=all, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=last, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=last, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=last_k_2, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=last_k_2, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=layer-2, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=25.0, layer_mapper=layer-2, projector=orthogonal
End of training
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=all, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=all, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=last, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=last, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=last_k_2, projector=linear
End of training
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=last_k_2, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=layer-2, projector=linear
Training in progress, step 61875
-
attn_loss_fn=cos, attn_weight=5, layer_mapper=layer-2, projector=orthogonal
Training in progress, step 61875
-
attn_loss_fn=kl, attn_weight=25.0, layer_mapper=all, projector=linear
Training in progress, step 61875
-
attn_loss_fn=kl, attn_weight=25.0, layer_mapper=last_k_2, projector=linear
End of training
-
attn_loss_fn=kl, attn_weight=5, layer_mapper=all, projector=linear
Training in progress, step 61875
-
attn_loss_fn=kl, attn_weight=5, layer_mapper=last_k_2, projector=linear
Training in progress, step 61875