Training in progress, step 550000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c26dfdb95640433391e289b9d54c29ba637889e22ac54f90b0e4758926e0b34
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:affd4afd21029b52701e8d0046ed5d64853de0037c32a35ed3d7e452fd7c0e84
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:668a667c0141b3007fbd5f23f3195d50d9a952d3b42af4d471955e90b35901bf
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed9a47d2a307e89814d3463ee5fc48f3b3365083fd9c82a5f581a3087e2941c8
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e8a638d577539d28ce2053ec5b75e3eed1d5ad4a147b887facb578c40f25088
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30723df40dcd4e911f2e4b8fee07e2767c8ee7ced5c90fa064aabaf279f01230
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8de7df7ff53e44669a043f69e39b55baa82d81ac1777f09e2f6159ffcb51dd66
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4002,11 +4002,85 @@
|
|
| 4002 |
"eval_samples_per_second": 849.063,
|
| 4003 |
"eval_steps_per_second": 13.585,
|
| 4004 |
"step": 540000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4005 |
}
|
| 4006 |
],
|
| 4007 |
"max_steps": 1000000,
|
| 4008 |
"num_train_epochs": 16,
|
| 4009 |
-
"total_flos": 3.
|
| 4010 |
"trial_name": null,
|
| 4011 |
"trial_params": null
|
| 4012 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.398613465267916,
|
| 5 |
+
"global_step": 550000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4002 |
"eval_samples_per_second": 849.063,
|
| 4003 |
"eval_steps_per_second": 13.585,
|
| 4004 |
"step": 540000
|
| 4005 |
+
},
|
| 4006 |
+
{
|
| 4007 |
+
"epoch": 8.26,
|
| 4008 |
+
"learning_rate": 7.629795553284005e-05,
|
| 4009 |
+
"loss": 0.2602,
|
| 4010 |
+
"step": 541000
|
| 4011 |
+
},
|
| 4012 |
+
{
|
| 4013 |
+
"epoch": 8.28,
|
| 4014 |
+
"learning_rate": 7.606681437777081e-05,
|
| 4015 |
+
"loss": 0.2605,
|
| 4016 |
+
"step": 542000
|
| 4017 |
+
},
|
| 4018 |
+
{
|
| 4019 |
+
"epoch": 8.29,
|
| 4020 |
+
"learning_rate": 7.583571623538939e-05,
|
| 4021 |
+
"loss": 0.26,
|
| 4022 |
+
"step": 543000
|
| 4023 |
+
},
|
| 4024 |
+
{
|
| 4025 |
+
"epoch": 8.31,
|
| 4026 |
+
"learning_rate": 7.560466363294806e-05,
|
| 4027 |
+
"loss": 0.2596,
|
| 4028 |
+
"step": 544000
|
| 4029 |
+
},
|
| 4030 |
+
{
|
| 4031 |
+
"epoch": 8.32,
|
| 4032 |
+
"learning_rate": 7.537365909720104e-05,
|
| 4033 |
+
"loss": 0.2595,
|
| 4034 |
+
"step": 545000
|
| 4035 |
+
},
|
| 4036 |
+
{
|
| 4037 |
+
"epoch": 8.32,
|
| 4038 |
+
"eval_runtime": 1.1629,
|
| 4039 |
+
"eval_samples_per_second": 859.911,
|
| 4040 |
+
"eval_steps_per_second": 13.759,
|
| 4041 |
+
"step": 545000
|
| 4042 |
+
},
|
| 4043 |
+
{
|
| 4044 |
+
"epoch": 8.34,
|
| 4045 |
+
"learning_rate": 7.514270515437691e-05,
|
| 4046 |
+
"loss": 0.2595,
|
| 4047 |
+
"step": 546000
|
| 4048 |
+
},
|
| 4049 |
+
{
|
| 4050 |
+
"epoch": 8.35,
|
| 4051 |
+
"learning_rate": 7.491180433015101e-05,
|
| 4052 |
+
"loss": 0.2594,
|
| 4053 |
+
"step": 547000
|
| 4054 |
+
},
|
| 4055 |
+
{
|
| 4056 |
+
"epoch": 8.37,
|
| 4057 |
+
"learning_rate": 7.468095914961777e-05,
|
| 4058 |
+
"loss": 0.2596,
|
| 4059 |
+
"step": 548000
|
| 4060 |
+
},
|
| 4061 |
+
{
|
| 4062 |
+
"epoch": 8.38,
|
| 4063 |
+
"learning_rate": 7.445017213726307e-05,
|
| 4064 |
+
"loss": 0.2596,
|
| 4065 |
+
"step": 549000
|
| 4066 |
+
},
|
| 4067 |
+
{
|
| 4068 |
+
"epoch": 8.4,
|
| 4069 |
+
"learning_rate": 7.421944581693674e-05,
|
| 4070 |
+
"loss": 0.2594,
|
| 4071 |
+
"step": 550000
|
| 4072 |
+
},
|
| 4073 |
+
{
|
| 4074 |
+
"epoch": 8.4,
|
| 4075 |
+
"eval_runtime": 0.9899,
|
| 4076 |
+
"eval_samples_per_second": 1010.184,
|
| 4077 |
+
"eval_steps_per_second": 16.163,
|
| 4078 |
+
"step": 550000
|
| 4079 |
}
|
| 4080 |
],
|
| 4081 |
"max_steps": 1000000,
|
| 4082 |
"num_train_epochs": 16,
|
| 4083 |
+
"total_flos": 3.855509100123903e+22,
|
| 4084 |
"trial_name": null,
|
| 4085 |
"trial_params": null
|
| 4086 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:affd4afd21029b52701e8d0046ed5d64853de0037c32a35ed3d7e452fd7c0e84
|
| 3 |
size 449471589
|