Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3050/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d30836e6cc8e9bc7e6ac4860b9a47462bf2b505763ca8f1f016f00dba39dd063
|
| 3 |
size 98088784
|
last-checkpoint/global_step3050/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ec2d173ff3f21519c19c4529cf49015f7351b455919aabd2f92d617dfea800e
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3050/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9878bd0908e7cc394ee31cc2cf96c0d61401413d535c5e837df907554c2c3be8
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step3050/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:679f30e0acdc8235795db4f308847a419b26f177124c1a9af50cace3d7f17a2b
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3050/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef8aa167f419865f888953e5a460286af57c9fd0ce1367970c1958895bac59da
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step3050/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54e7843e59f08e3aa7b5e877697b7647ba9834905436322ba9404fa0a4364927
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3050
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea14b9c6b3a6f295496ef7304910c6b324fa957ee6a873aa9c7ba3e19dee7493
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e7e6647b80c88b8b07c192378c0e9cf459bfbb39240b1f97ee2fd33a11b2d76
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb99390398ed2d2ad006f7fe54297d0a309628ca1217f738a1c29766f3fd0e64
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8f06d95fcd0ec8a7f1f05ddeeb19bb5d11c5c887705f6ecb787b504d8cc2514
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9b613fc00b8ea63138f8d70379fa6d9a68d4e6248cdb72d56dd145fcca5cc7d
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4688,6 +4688,84 @@
|
|
| 4688 |
"eval_samples_per_second": 126.096,
|
| 4689 |
"eval_steps_per_second": 15.77,
|
| 4690 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4691 |
}
|
| 4692 |
],
|
| 4693 |
"logging_steps": 5,
|
|
@@ -4716,7 +4794,7 @@
|
|
| 4716 |
"attributes": {}
|
| 4717 |
}
|
| 4718 |
},
|
| 4719 |
-
"total_flos": 1.
|
| 4720 |
"train_batch_size": 2,
|
| 4721 |
"trial_name": null,
|
| 4722 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6222960948944092,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.9895897121861603,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3050,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4688 |
"eval_samples_per_second": 126.096,
|
| 4689 |
"eval_steps_per_second": 15.77,
|
| 4690 |
"step": 3000
|
| 4691 |
+
},
|
| 4692 |
+
{
|
| 4693 |
+
"epoch": 2.9454990814451927,
|
| 4694 |
+
"grad_norm": 0.20588572323322296,
|
| 4695 |
+
"learning_rate": 3.95000485078324e-05,
|
| 4696 |
+
"loss": 0.6741,
|
| 4697 |
+
"step": 3005
|
| 4698 |
+
},
|
| 4699 |
+
{
|
| 4700 |
+
"epoch": 2.9503980404164114,
|
| 4701 |
+
"grad_norm": 0.21284903585910797,
|
| 4702 |
+
"learning_rate": 3.9330009094856485e-05,
|
| 4703 |
+
"loss": 0.6438,
|
| 4704 |
+
"step": 3010
|
| 4705 |
+
},
|
| 4706 |
+
{
|
| 4707 |
+
"epoch": 2.95529699938763,
|
| 4708 |
+
"grad_norm": 0.22855360805988312,
|
| 4709 |
+
"learning_rate": 3.916013481191187e-05,
|
| 4710 |
+
"loss": 0.65,
|
| 4711 |
+
"step": 3015
|
| 4712 |
+
},
|
| 4713 |
+
{
|
| 4714 |
+
"epoch": 2.960195958358849,
|
| 4715 |
+
"grad_norm": 0.19158318638801575,
|
| 4716 |
+
"learning_rate": 3.899042740471964e-05,
|
| 4717 |
+
"loss": 0.6593,
|
| 4718 |
+
"step": 3020
|
| 4719 |
+
},
|
| 4720 |
+
{
|
| 4721 |
+
"epoch": 2.9650949173300676,
|
| 4722 |
+
"grad_norm": 0.22519658505916595,
|
| 4723 |
+
"learning_rate": 3.8820888617286e-05,
|
| 4724 |
+
"loss": 0.6542,
|
| 4725 |
+
"step": 3025
|
| 4726 |
+
},
|
| 4727 |
+
{
|
| 4728 |
+
"epoch": 2.969993876301286,
|
| 4729 |
+
"grad_norm": 0.20841963589191437,
|
| 4730 |
+
"learning_rate": 3.865152019188429e-05,
|
| 4731 |
+
"loss": 0.6636,
|
| 4732 |
+
"step": 3030
|
| 4733 |
+
},
|
| 4734 |
+
{
|
| 4735 |
+
"epoch": 2.9748928352725046,
|
| 4736 |
+
"grad_norm": 0.22229060530662537,
|
| 4737 |
+
"learning_rate": 3.8482323869037134e-05,
|
| 4738 |
+
"loss": 0.6698,
|
| 4739 |
+
"step": 3035
|
| 4740 |
+
},
|
| 4741 |
+
{
|
| 4742 |
+
"epoch": 2.9797917942437233,
|
| 4743 |
+
"grad_norm": 0.24960780143737793,
|
| 4744 |
+
"learning_rate": 3.831330138749852e-05,
|
| 4745 |
+
"loss": 0.6707,
|
| 4746 |
+
"step": 3040
|
| 4747 |
+
},
|
| 4748 |
+
{
|
| 4749 |
+
"epoch": 2.9846907532149416,
|
| 4750 |
+
"grad_norm": 0.2418794333934784,
|
| 4751 |
+
"learning_rate": 3.814445448423598e-05,
|
| 4752 |
+
"loss": 0.6524,
|
| 4753 |
+
"step": 3045
|
| 4754 |
+
},
|
| 4755 |
+
{
|
| 4756 |
+
"epoch": 2.9895897121861603,
|
| 4757 |
+
"grad_norm": 0.22541861236095428,
|
| 4758 |
+
"learning_rate": 3.7975784894412676e-05,
|
| 4759 |
+
"loss": 0.659,
|
| 4760 |
+
"step": 3050
|
| 4761 |
+
},
|
| 4762 |
+
{
|
| 4763 |
+
"epoch": 2.9895897121861603,
|
| 4764 |
+
"eval_loss": 0.6222960948944092,
|
| 4765 |
+
"eval_runtime": 15.5064,
|
| 4766 |
+
"eval_samples_per_second": 126.335,
|
| 4767 |
+
"eval_steps_per_second": 15.8,
|
| 4768 |
+
"step": 3050
|
| 4769 |
}
|
| 4770 |
],
|
| 4771 |
"logging_steps": 5,
|
|
|
|
| 4794 |
"attributes": {}
|
| 4795 |
}
|
| 4796 |
},
|
| 4797 |
+
"total_flos": 1.5731372043460936e+18,
|
| 4798 |
"train_batch_size": 2,
|
| 4799 |
"trial_name": null,
|
| 4800 |
"trial_params": null
|