Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/global_step1581/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +2 -2
- last-checkpoint/global_step1581/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +2 -2
- last-checkpoint/global_step1581/zero_pp_rank_0_mp_rank_00_model_states.pt +2 -2
- last-checkpoint/global_step1581/zero_pp_rank_1_mp_rank_00_model_states.pt +2 -2
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/trainer_state.json +36 -5
last-checkpoint/global_step1581/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91eacaa84680aa82b20a3df8182dc93d0ce047ed089f25b4731f846316837cb6
|
| 3 |
+
size 28315088
|
last-checkpoint/global_step1581/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c297b2e76489c10915e73396d64e91d8ba6c2dd74c95b2df3b59bea16e5b8948
|
| 3 |
+
size 28315088
|
last-checkpoint/global_step1581/zero_pp_rank_0_mp_rank_00_model_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0cfb139e4c8441e6ab6518b594be2262638a6362ae882d8fa66311577c967c5
|
| 3 |
+
size 2850543502
|
last-checkpoint/global_step1581/zero_pp_rank_1_mp_rank_00_model_states.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a28252787a7c5302116de0d3ed69eaebf466c0d5f045b2ab3ef114b4d7e96000
|
| 3 |
+
size 2850543310
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1581
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f08f5222c04ba8a25b70996b406c1e95dc834c73f15f3e7248da94c55fa3d6d3
|
| 3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:707f46a874ccec694d9a9ebdb8230159a79dc68e5bca12742f90f6e6d892b27e
|
| 3 |
size 14512
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 2.
|
| 3 |
-
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -32,13 +32,44 @@
|
|
| 32 |
"eval_steps_per_second": 0.126,
|
| 33 |
"eval_translation_length": 53098,
|
| 34 |
"step": 790
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
],
|
| 37 |
"logging_steps": 500,
|
| 38 |
"max_steps": 23700,
|
| 39 |
"num_train_epochs": 30,
|
| 40 |
"save_steps": 500,
|
| 41 |
-
"total_flos":
|
| 42 |
"trial_name": null,
|
| 43 |
"trial_params": null
|
| 44 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 2.288722038269043,
|
| 3 |
+
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-1581",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1581,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 32 |
"eval_steps_per_second": 0.126,
|
| 33 |
"eval_translation_length": 53098,
|
| 34 |
"step": 790
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"epoch": 1.27,
|
| 38 |
+
"learning_rate": 9.445572420019074e-05,
|
| 39 |
+
"loss": 2.5652,
|
| 40 |
+
"step": 1000
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 1.9,
|
| 44 |
+
"learning_rate": 9.999999999999999e-05,
|
| 45 |
+
"loss": 2.468,
|
| 46 |
+
"step": 1500
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"epoch": 2.0,
|
| 50 |
+
"eval_bleu": 1.0,
|
| 51 |
+
"eval_brevity_penalty": 1.0,
|
| 52 |
+
"eval_length_ratio": 1.0,
|
| 53 |
+
"eval_loss": 2.288722038269043,
|
| 54 |
+
"eval_precisions": [
|
| 55 |
+
1.0,
|
| 56 |
+
1.0,
|
| 57 |
+
1.0,
|
| 58 |
+
1.0
|
| 59 |
+
],
|
| 60 |
+
"eval_reference_length": 53391,
|
| 61 |
+
"eval_runtime": 1340.9293,
|
| 62 |
+
"eval_samples_per_second": 2.096,
|
| 63 |
+
"eval_steps_per_second": 0.131,
|
| 64 |
+
"eval_translation_length": 53391,
|
| 65 |
+
"step": 1581
|
| 66 |
}
|
| 67 |
],
|
| 68 |
"logging_steps": 500,
|
| 69 |
"max_steps": 23700,
|
| 70 |
"num_train_epochs": 30,
|
| 71 |
"save_steps": 500,
|
| 72 |
+
"total_flos": 771945142419456.0,
|
| 73 |
"trial_name": null,
|
| 74 |
"trial_params": null
|
| 75 |
}
|