Training in progress, step 810000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:937c7c0f70a279ee503aacdb71e2ed944ca0935d892236cbc194ebb0a487078d
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b11d3700c444e5be7a81b810567d1ec12594038b95620633a1bd2bdea5720a82
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae78156aa426e9b178cf6fee9de070cdf8f42ae880d982b2d7744b9aef0151e2
|
| 3 |
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21facfb39767781c851d9e48c4797705b198dc5e6b8cbf69054469662fef7a97
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e6cb051a738441d99712588916bc3687b4f28c951201e05ef9935c6498e2add
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9386b24abfba6bc92416379b1f9ea9f1d4b9d09be1e413347b26a358344ede31
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ff99717fc66d7e2670093ac4b787c0d4e68c8bb6b50d5d8a0a59479daaf2a3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5926,11 +5926,85 @@
|
|
| 5926 |
"eval_samples_per_second": 1349.61,
|
| 5927 |
"eval_steps_per_second": 21.594,
|
| 5928 |
"step": 800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5929 |
}
|
| 5930 |
],
|
| 5931 |
"max_steps": 1000000,
|
| 5932 |
"num_train_epochs": 16,
|
| 5933 |
-
"total_flos": 5.
|
| 5934 |
"trial_name": null,
|
| 5935 |
"trial_params": null
|
| 5936 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.368867103394567,
|
| 5 |
+
"global_step": 810000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5926 |
"eval_samples_per_second": 1349.61,
|
| 5927 |
"eval_steps_per_second": 21.594,
|
| 5928 |
"step": 800000
|
| 5929 |
+
},
|
| 5930 |
+
{
|
| 5931 |
+
"epoch": 12.23,
|
| 5932 |
+
"learning_rate": 2.461828514290513e-05,
|
| 5933 |
+
"loss": 0.2364,
|
| 5934 |
+
"step": 801000
|
| 5935 |
+
},
|
| 5936 |
+
{
|
| 5937 |
+
"epoch": 12.25,
|
| 5938 |
+
"learning_rate": 2.447701158911855e-05,
|
| 5939 |
+
"loss": 0.2373,
|
| 5940 |
+
"step": 802000
|
| 5941 |
+
},
|
| 5942 |
+
{
|
| 5943 |
+
"epoch": 12.26,
|
| 5944 |
+
"learning_rate": 2.4336345225837658e-05,
|
| 5945 |
+
"loss": 0.2369,
|
| 5946 |
+
"step": 803000
|
| 5947 |
+
},
|
| 5948 |
+
{
|
| 5949 |
+
"epoch": 12.28,
|
| 5950 |
+
"learning_rate": 2.4196287591367296e-05,
|
| 5951 |
+
"loss": 0.2363,
|
| 5952 |
+
"step": 804000
|
| 5953 |
+
},
|
| 5954 |
+
{
|
| 5955 |
+
"epoch": 12.29,
|
| 5956 |
+
"learning_rate": 2.405684021735527e-05,
|
| 5957 |
+
"loss": 0.2366,
|
| 5958 |
+
"step": 805000
|
| 5959 |
+
},
|
| 5960 |
+
{
|
| 5961 |
+
"epoch": 12.29,
|
| 5962 |
+
"eval_runtime": 0.7797,
|
| 5963 |
+
"eval_samples_per_second": 1282.575,
|
| 5964 |
+
"eval_steps_per_second": 20.521,
|
| 5965 |
+
"step": 805000
|
| 5966 |
+
},
|
| 5967 |
+
{
|
| 5968 |
+
"epoch": 12.31,
|
| 5969 |
+
"learning_rate": 2.3918004628775736e-05,
|
| 5970 |
+
"loss": 0.2366,
|
| 5971 |
+
"step": 806000
|
| 5972 |
+
},
|
| 5973 |
+
{
|
| 5974 |
+
"epoch": 12.32,
|
| 5975 |
+
"learning_rate": 2.3779782343912463e-05,
|
| 5976 |
+
"loss": 0.2367,
|
| 5977 |
+
"step": 807000
|
| 5978 |
+
},
|
| 5979 |
+
{
|
| 5980 |
+
"epoch": 12.34,
|
| 5981 |
+
"learning_rate": 2.364217487434221e-05,
|
| 5982 |
+
"loss": 0.24,
|
| 5983 |
+
"step": 808000
|
| 5984 |
+
},
|
| 5985 |
+
{
|
| 5986 |
+
"epoch": 12.35,
|
| 5987 |
+
"learning_rate": 2.3505183724918196e-05,
|
| 5988 |
+
"loss": 0.2369,
|
| 5989 |
+
"step": 809000
|
| 5990 |
+
},
|
| 5991 |
+
{
|
| 5992 |
+
"epoch": 12.37,
|
| 5993 |
+
"learning_rate": 2.3368810393753687e-05,
|
| 5994 |
+
"loss": 0.2365,
|
| 5995 |
+
"step": 810000
|
| 5996 |
+
},
|
| 5997 |
+
{
|
| 5998 |
+
"epoch": 12.37,
|
| 5999 |
+
"eval_runtime": 0.7457,
|
| 6000 |
+
"eval_samples_per_second": 1341.078,
|
| 6001 |
+
"eval_steps_per_second": 21.457,
|
| 6002 |
+
"step": 810000
|
| 6003 |
}
|
| 6004 |
],
|
| 6005 |
"max_steps": 1000000,
|
| 6006 |
"num_train_epochs": 16,
|
| 6007 |
+
"total_flos": 5.678113258640334e+22,
|
| 6008 |
"trial_name": null,
|
| 6009 |
"trial_params": null
|
| 6010 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b11d3700c444e5be7a81b810567d1ec12594038b95620633a1bd2bdea5720a82
|
| 3 |
size 449471589
|