Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +238 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8574164bb4d11eaf453dcb6ad3966428cd591430ae9c31f0937299ed1a487081
|
| 3 |
size 1037269336
|
last-checkpoint/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a09647bd6cf33248479b856f41c1b82c476851b19566acd615bb9266f2b1b0ee
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f60fae928a89055d942fe282de8f7700321637408cadeae0860f984db5297c7
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5e367c94a7f4f9ffb0449a994883423b0ee48aa8ed33c52f9254bc053771053
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22ebe4a089211b888c7db1c62207d175ec46c124ad172ec679adaea45438cb12
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5000/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eda741d4f4d77768028d65e555ce867c47e40bb8497ec8a08f5c144c7be204e
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step5000
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd00f37ba9aa2f280e60110d762d55bd77f2e19074544210642612fc0d0c6aed
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da7f2a246e741148e024dc29f274d353214e019d5f548b483c4905c46044d9c6
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59fe33085db221039a6aa12c757a1cedc0cc5b1d3be922c202529c8eb1b8058a
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15166ad530c105df387795709025f21626f6ea307321c73af1fa12ffc3d040d0
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ccb65ec1efdeb7bb899bcfdbd59da40edf4d90e5de5df4ddf919745dfd59ebe
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7574,6 +7574,240 @@
|
|
| 7574 |
"eval_samples_per_second": 173.724,
|
| 7575 |
"eval_steps_per_second": 10.894,
|
| 7576 |
"step": 4850
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7577 |
}
|
| 7578 |
],
|
| 7579 |
"logging_steps": 5,
|
|
@@ -7602,7 +7836,7 @@
|
|
| 7602 |
"attributes": {}
|
| 7603 |
}
|
| 7604 |
},
|
| 7605 |
-
"total_flos": 1.
|
| 7606 |
"train_batch_size": 4,
|
| 7607 |
"trial_name": null,
|
| 7608 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.8494781255722046,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.7268498328245384,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7574 |
"eval_samples_per_second": 173.724,
|
| 7575 |
"eval_steps_per_second": 10.894,
|
| 7576 |
"step": 4850
|
| 7577 |
+
},
|
| 7578 |
+
{
|
| 7579 |
+
"epoch": 0.7057711876726268,
|
| 7580 |
+
"grad_norm": 2.359971761703491,
|
| 7581 |
+
"learning_rate": 5.321233654341051e-05,
|
| 7582 |
+
"loss": 2.0426,
|
| 7583 |
+
"step": 4855
|
| 7584 |
+
},
|
| 7585 |
+
{
|
| 7586 |
+
"epoch": 0.7064980375054514,
|
| 7587 |
+
"grad_norm": 2.5104758739471436,
|
| 7588 |
+
"learning_rate": 5.3132997135038396e-05,
|
| 7589 |
+
"loss": 2.075,
|
| 7590 |
+
"step": 4860
|
| 7591 |
+
},
|
| 7592 |
+
{
|
| 7593 |
+
"epoch": 0.7072248873382759,
|
| 7594 |
+
"grad_norm": 2.3607850074768066,
|
| 7595 |
+
"learning_rate": 5.305365008699002e-05,
|
| 7596 |
+
"loss": 2.184,
|
| 7597 |
+
"step": 4865
|
| 7598 |
+
},
|
| 7599 |
+
{
|
| 7600 |
+
"epoch": 0.7079517371711005,
|
| 7601 |
+
"grad_norm": 2.6986582279205322,
|
| 7602 |
+
"learning_rate": 5.2974295599021475e-05,
|
| 7603 |
+
"loss": 2.0019,
|
| 7604 |
+
"step": 4870
|
| 7605 |
+
},
|
| 7606 |
+
{
|
| 7607 |
+
"epoch": 0.7086785870039249,
|
| 7608 |
+
"grad_norm": 2.2969441413879395,
|
| 7609 |
+
"learning_rate": 5.289493387090762e-05,
|
| 7610 |
+
"loss": 2.1051,
|
| 7611 |
+
"step": 4875
|
| 7612 |
+
},
|
| 7613 |
+
{
|
| 7614 |
+
"epoch": 0.7094054368367495,
|
| 7615 |
+
"grad_norm": 2.4311702251434326,
|
| 7616 |
+
"learning_rate": 5.2815565102441487e-05,
|
| 7617 |
+
"loss": 2.0222,
|
| 7618 |
+
"step": 4880
|
| 7619 |
+
},
|
| 7620 |
+
{
|
| 7621 |
+
"epoch": 0.7101322866695741,
|
| 7622 |
+
"grad_norm": 2.284479856491089,
|
| 7623 |
+
"learning_rate": 5.273618949343387e-05,
|
| 7624 |
+
"loss": 2.0578,
|
| 7625 |
+
"step": 4885
|
| 7626 |
+
},
|
| 7627 |
+
{
|
| 7628 |
+
"epoch": 0.7108591365023986,
|
| 7629 |
+
"grad_norm": 2.054469108581543,
|
| 7630 |
+
"learning_rate": 5.265680724371276e-05,
|
| 7631 |
+
"loss": 2.0806,
|
| 7632 |
+
"step": 4890
|
| 7633 |
+
},
|
| 7634 |
+
{
|
| 7635 |
+
"epoch": 0.7115859863352232,
|
| 7636 |
+
"grad_norm": 2.0409023761749268,
|
| 7637 |
+
"learning_rate": 5.257741855312288e-05,
|
| 7638 |
+
"loss": 2.1366,
|
| 7639 |
+
"step": 4895
|
| 7640 |
+
},
|
| 7641 |
+
{
|
| 7642 |
+
"epoch": 0.7123128361680476,
|
| 7643 |
+
"grad_norm": 2.3130247592926025,
|
| 7644 |
+
"learning_rate": 5.2498023621525144e-05,
|
| 7645 |
+
"loss": 1.9231,
|
| 7646 |
+
"step": 4900
|
| 7647 |
+
},
|
| 7648 |
+
{
|
| 7649 |
+
"epoch": 0.7123128361680476,
|
| 7650 |
+
"eval_loss": 1.85334312915802,
|
| 7651 |
+
"eval_runtime": 21.9469,
|
| 7652 |
+
"eval_samples_per_second": 150.409,
|
| 7653 |
+
"eval_steps_per_second": 9.432,
|
| 7654 |
+
"step": 4900
|
| 7655 |
+
},
|
| 7656 |
+
{
|
| 7657 |
+
"epoch": 0.7130396860008722,
|
| 7658 |
+
"grad_norm": 2.8905739784240723,
|
| 7659 |
+
"learning_rate": 5.241862264879624e-05,
|
| 7660 |
+
"loss": 2.1506,
|
| 7661 |
+
"step": 4905
|
| 7662 |
+
},
|
| 7663 |
+
{
|
| 7664 |
+
"epoch": 0.7137665358336968,
|
| 7665 |
+
"grad_norm": 1.8220387697219849,
|
| 7666 |
+
"learning_rate": 5.2339215834828e-05,
|
| 7667 |
+
"loss": 1.8484,
|
| 7668 |
+
"step": 4910
|
| 7669 |
+
},
|
| 7670 |
+
{
|
| 7671 |
+
"epoch": 0.7144933856665213,
|
| 7672 |
+
"grad_norm": 2.53902530670166,
|
| 7673 |
+
"learning_rate": 5.225980337952697e-05,
|
| 7674 |
+
"loss": 1.9491,
|
| 7675 |
+
"step": 4915
|
| 7676 |
+
},
|
| 7677 |
+
{
|
| 7678 |
+
"epoch": 0.7152202354993459,
|
| 7679 |
+
"grad_norm": 2.232422351837158,
|
| 7680 |
+
"learning_rate": 5.2180385482813935e-05,
|
| 7681 |
+
"loss": 1.9356,
|
| 7682 |
+
"step": 4920
|
| 7683 |
+
},
|
| 7684 |
+
{
|
| 7685 |
+
"epoch": 0.7159470853321703,
|
| 7686 |
+
"grad_norm": 2.471998691558838,
|
| 7687 |
+
"learning_rate": 5.210096234462335e-05,
|
| 7688 |
+
"loss": 2.0199,
|
| 7689 |
+
"step": 4925
|
| 7690 |
+
},
|
| 7691 |
+
{
|
| 7692 |
+
"epoch": 0.7166739351649949,
|
| 7693 |
+
"grad_norm": 2.3903968334198,
|
| 7694 |
+
"learning_rate": 5.202153416490285e-05,
|
| 7695 |
+
"loss": 2.0745,
|
| 7696 |
+
"step": 4930
|
| 7697 |
+
},
|
| 7698 |
+
{
|
| 7699 |
+
"epoch": 0.7174007849978195,
|
| 7700 |
+
"grad_norm": 2.582702159881592,
|
| 7701 |
+
"learning_rate": 5.1942101143612804e-05,
|
| 7702 |
+
"loss": 2.1917,
|
| 7703 |
+
"step": 4935
|
| 7704 |
+
},
|
| 7705 |
+
{
|
| 7706 |
+
"epoch": 0.718127634830644,
|
| 7707 |
+
"grad_norm": 2.2047088146209717,
|
| 7708 |
+
"learning_rate": 5.186266348072575e-05,
|
| 7709 |
+
"loss": 2.0905,
|
| 7710 |
+
"step": 4940
|
| 7711 |
+
},
|
| 7712 |
+
{
|
| 7713 |
+
"epoch": 0.7188544846634686,
|
| 7714 |
+
"grad_norm": 2.3632895946502686,
|
| 7715 |
+
"learning_rate": 5.178322137622589e-05,
|
| 7716 |
+
"loss": 1.8037,
|
| 7717 |
+
"step": 4945
|
| 7718 |
+
},
|
| 7719 |
+
{
|
| 7720 |
+
"epoch": 0.719581334496293,
|
| 7721 |
+
"grad_norm": 2.1407690048217773,
|
| 7722 |
+
"learning_rate": 5.170377503010865e-05,
|
| 7723 |
+
"loss": 1.9275,
|
| 7724 |
+
"step": 4950
|
| 7725 |
+
},
|
| 7726 |
+
{
|
| 7727 |
+
"epoch": 0.719581334496293,
|
| 7728 |
+
"eval_loss": 1.8587294816970825,
|
| 7729 |
+
"eval_runtime": 19.3641,
|
| 7730 |
+
"eval_samples_per_second": 170.47,
|
| 7731 |
+
"eval_steps_per_second": 10.69,
|
| 7732 |
+
"step": 4950
|
| 7733 |
+
},
|
| 7734 |
+
{
|
| 7735 |
+
"epoch": 0.7203081843291176,
|
| 7736 |
+
"grad_norm": 2.4468822479248047,
|
| 7737 |
+
"learning_rate": 5.16243246423801e-05,
|
| 7738 |
+
"loss": 2.0012,
|
| 7739 |
+
"step": 4955
|
| 7740 |
+
},
|
| 7741 |
+
{
|
| 7742 |
+
"epoch": 0.7210350341619421,
|
| 7743 |
+
"grad_norm": 2.2367379665374756,
|
| 7744 |
+
"learning_rate": 5.15448704130565e-05,
|
| 7745 |
+
"loss": 2.1336,
|
| 7746 |
+
"step": 4960
|
| 7747 |
+
},
|
| 7748 |
+
{
|
| 7749 |
+
"epoch": 0.7217618839947667,
|
| 7750 |
+
"grad_norm": 2.382683515548706,
|
| 7751 |
+
"learning_rate": 5.1465412542163777e-05,
|
| 7752 |
+
"loss": 2.0299,
|
| 7753 |
+
"step": 4965
|
| 7754 |
+
},
|
| 7755 |
+
{
|
| 7756 |
+
"epoch": 0.7224887338275913,
|
| 7757 |
+
"grad_norm": 2.802795648574829,
|
| 7758 |
+
"learning_rate": 5.138595122973702e-05,
|
| 7759 |
+
"loss": 2.1449,
|
| 7760 |
+
"step": 4970
|
| 7761 |
+
},
|
| 7762 |
+
{
|
| 7763 |
+
"epoch": 0.7232155836604157,
|
| 7764 |
+
"grad_norm": 2.422428846359253,
|
| 7765 |
+
"learning_rate": 5.130648667582e-05,
|
| 7766 |
+
"loss": 1.9257,
|
| 7767 |
+
"step": 4975
|
| 7768 |
+
},
|
| 7769 |
+
{
|
| 7770 |
+
"epoch": 0.7239424334932403,
|
| 7771 |
+
"grad_norm": 2.619701862335205,
|
| 7772 |
+
"learning_rate": 5.1227019080464614e-05,
|
| 7773 |
+
"loss": 2.1349,
|
| 7774 |
+
"step": 4980
|
| 7775 |
+
},
|
| 7776 |
+
{
|
| 7777 |
+
"epoch": 0.7246692833260648,
|
| 7778 |
+
"grad_norm": 2.259448289871216,
|
| 7779 |
+
"learning_rate": 5.114754864373048e-05,
|
| 7780 |
+
"loss": 1.9518,
|
| 7781 |
+
"step": 4985
|
| 7782 |
+
},
|
| 7783 |
+
{
|
| 7784 |
+
"epoch": 0.7253961331588894,
|
| 7785 |
+
"grad_norm": 2.466169834136963,
|
| 7786 |
+
"learning_rate": 5.106807556568429e-05,
|
| 7787 |
+
"loss": 2.0608,
|
| 7788 |
+
"step": 4990
|
| 7789 |
+
},
|
| 7790 |
+
{
|
| 7791 |
+
"epoch": 0.726122982991714,
|
| 7792 |
+
"grad_norm": 2.4360663890838623,
|
| 7793 |
+
"learning_rate": 5.098860004639943e-05,
|
| 7794 |
+
"loss": 2.0255,
|
| 7795 |
+
"step": 4995
|
| 7796 |
+
},
|
| 7797 |
+
{
|
| 7798 |
+
"epoch": 0.7268498328245384,
|
| 7799 |
+
"grad_norm": 2.5744364261627197,
|
| 7800 |
+
"learning_rate": 5.0909122285955454e-05,
|
| 7801 |
+
"loss": 2.0253,
|
| 7802 |
+
"step": 5000
|
| 7803 |
+
},
|
| 7804 |
+
{
|
| 7805 |
+
"epoch": 0.7268498328245384,
|
| 7806 |
+
"eval_loss": 1.8494781255722046,
|
| 7807 |
+
"eval_runtime": 19.054,
|
| 7808 |
+
"eval_samples_per_second": 173.245,
|
| 7809 |
+
"eval_steps_per_second": 10.864,
|
| 7810 |
+
"step": 5000
|
| 7811 |
}
|
| 7812 |
],
|
| 7813 |
"logging_steps": 5,
|
|
|
|
| 7836 |
"attributes": {}
|
| 7837 |
}
|
| 7838 |
},
|
| 7839 |
+
"total_flos": 1.3038086059374674e+18,
|
| 7840 |
"train_batch_size": 4,
|
| 7841 |
"trial_name": null,
|
| 7842 |
"trial_params": null
|