Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2550/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:770e6d839526903ca6b846f534df65727b662e5b56664f96fe6cd955f1dc8822
|
| 3 |
size 98088784
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21172ee054d5fd4119c31aa22ced2cf7cbc0ccab716a4028aba5d541db875786
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eca3c8c5fcefdf91dd66a32d4edbd9178ca63659b1d2966ed79a0e72e43f2093
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c3242e817ad02e18d3095218502a7cde7a66ac1c7dff5ab716bd0196cc13af0
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc076e4938086944fc370a10b991cb2888b4963abb17aba648d408c6310ca63
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step2550/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad4c693e060ff3c438a9b58e892ba4991060d88da653f73e491d17545f3c2957
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2550
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66211be425d4da3831f8ac51b4ac9b76c6a16a9fd9028ee5f537668644b51397
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0da79439109a7ca4ff852c66d6d76b157ff1736400ce389258ded492e285cb6
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:454d44cf834b4dd8a3667976ff0a436e09d2cfecad46b47a5d331c03fbc98f20
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85a76d6d4ecc8a1764784d40c16f9803e82e82729aea9b18d4e1bbc32327d327
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7df3463c38c65942460e58d5ffbf4db4eb0600c71f42d3180931fbb2157e70d
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3908,6 +3908,84 @@
|
|
| 3908 |
"eval_samples_per_second": 126.026,
|
| 3909 |
"eval_steps_per_second": 15.761,
|
| 3910 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3911 |
}
|
| 3912 |
],
|
| 3913 |
"logging_steps": 5,
|
|
@@ -3936,7 +4014,7 @@
|
|
| 3936 |
"attributes": {}
|
| 3937 |
}
|
| 3938 |
},
|
| 3939 |
-
"total_flos": 1.
|
| 3940 |
"train_batch_size": 2,
|
| 3941 |
"trial_name": null,
|
| 3942 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6343480944633484,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.499693815064299,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2550,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3908 |
"eval_samples_per_second": 126.026,
|
| 3909 |
"eval_steps_per_second": 15.761,
|
| 3910 |
"step": 2500
|
| 3911 |
+
},
|
| 3912 |
+
{
|
| 3913 |
+
"epoch": 2.4556031843233312,
|
| 3914 |
+
"grad_norm": 0.2111200988292694,
|
| 3915 |
+
"learning_rate": 5.7032246386046973e-05,
|
| 3916 |
+
"loss": 0.6545,
|
| 3917 |
+
"step": 2505
|
| 3918 |
+
},
|
| 3919 |
+
{
|
| 3920 |
+
"epoch": 2.46050214329455,
|
| 3921 |
+
"grad_norm": 0.22759990394115448,
|
| 3922 |
+
"learning_rate": 5.685472373358426e-05,
|
| 3923 |
+
"loss": 0.6724,
|
| 3924 |
+
"step": 2510
|
| 3925 |
+
},
|
| 3926 |
+
{
|
| 3927 |
+
"epoch": 2.4654011022657687,
|
| 3928 |
+
"grad_norm": 0.23847265541553497,
|
| 3929 |
+
"learning_rate": 5.667718611759539e-05,
|
| 3930 |
+
"loss": 0.6799,
|
| 3931 |
+
"step": 2515
|
| 3932 |
+
},
|
| 3933 |
+
{
|
| 3934 |
+
"epoch": 2.470300061236987,
|
| 3935 |
+
"grad_norm": 0.2177450954914093,
|
| 3936 |
+
"learning_rate": 5.649963536255404e-05,
|
| 3937 |
+
"loss": 0.6525,
|
| 3938 |
+
"step": 2520
|
| 3939 |
+
},
|
| 3940 |
+
{
|
| 3941 |
+
"epoch": 2.4751990202082057,
|
| 3942 |
+
"grad_norm": 0.22332416474819183,
|
| 3943 |
+
"learning_rate": 5.6322073293068914e-05,
|
| 3944 |
+
"loss": 0.6713,
|
| 3945 |
+
"step": 2525
|
| 3946 |
+
},
|
| 3947 |
+
{
|
| 3948 |
+
"epoch": 2.4800979791794244,
|
| 3949 |
+
"grad_norm": 0.19824838638305664,
|
| 3950 |
+
"learning_rate": 5.6144501733865016e-05,
|
| 3951 |
+
"loss": 0.6556,
|
| 3952 |
+
"step": 2530
|
| 3953 |
+
},
|
| 3954 |
+
{
|
| 3955 |
+
"epoch": 2.484996938150643,
|
| 3956 |
+
"grad_norm": 0.20995038747787476,
|
| 3957 |
+
"learning_rate": 5.5966922509764866e-05,
|
| 3958 |
+
"loss": 0.6598,
|
| 3959 |
+
"step": 2535
|
| 3960 |
+
},
|
| 3961 |
+
{
|
| 3962 |
+
"epoch": 2.4898958971218614,
|
| 3963 |
+
"grad_norm": 0.21509043872356415,
|
| 3964 |
+
"learning_rate": 5.578933744566973e-05,
|
| 3965 |
+
"loss": 0.6775,
|
| 3966 |
+
"step": 2540
|
| 3967 |
+
},
|
| 3968 |
+
{
|
| 3969 |
+
"epoch": 2.49479485609308,
|
| 3970 |
+
"grad_norm": 0.2389044612646103,
|
| 3971 |
+
"learning_rate": 5.561174836654091e-05,
|
| 3972 |
+
"loss": 0.6813,
|
| 3973 |
+
"step": 2545
|
| 3974 |
+
},
|
| 3975 |
+
{
|
| 3976 |
+
"epoch": 2.499693815064299,
|
| 3977 |
+
"grad_norm": 0.1885528862476349,
|
| 3978 |
+
"learning_rate": 5.543415709738095e-05,
|
| 3979 |
+
"loss": 0.6614,
|
| 3980 |
+
"step": 2550
|
| 3981 |
+
},
|
| 3982 |
+
{
|
| 3983 |
+
"epoch": 2.499693815064299,
|
| 3984 |
+
"eval_loss": 0.6343480944633484,
|
| 3985 |
+
"eval_runtime": 15.474,
|
| 3986 |
+
"eval_samples_per_second": 126.599,
|
| 3987 |
+
"eval_steps_per_second": 15.833,
|
| 3988 |
+
"step": 2550
|
| 3989 |
}
|
| 3990 |
],
|
| 3991 |
"logging_steps": 5,
|
|
|
|
| 4014 |
"attributes": {}
|
| 4015 |
}
|
| 4016 |
},
|
| 4017 |
+
"total_flos": 1.3143227784211988e+18,
|
| 4018 |
"train_batch_size": 2,
|
| 4019 |
"trial_name": null,
|
| 4020 |
"trial_params": null
|