Training in progress, epoch 8
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a95e9685f7cd1888a964404d3a12c7901a29b2df58dbc1af6ad81e5615e00ee4
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d870ec0989535dca2b80429983b6169a4cd3e8ae4acc035c5e27e16231b19367
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37238f485a747bdf411140f3ae786aa1ea193668eeb905f979f7244c03f830a9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5049691cebbb5a2ee68a53b40104e081fc2c143f2cb3c29341094315e9153721
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f732e87d3656ba3aff9a6c3eb0f2055ff5280fda57513995d4337e7c7b5ef089
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5833d29c5bb0965b6ed6a386cf1f3ec2da591b9c1d9bdeb335707a9bec2c66f
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5901,11 +5901,854 @@
|
|
| 5901 |
"eval_samples_per_second": 970.412,
|
| 5902 |
"eval_steps_per_second": 40.434,
|
| 5903 |
"step": 486311
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5904 |
}
|
| 5905 |
],
|
| 5906 |
"max_steps": 972622,
|
| 5907 |
"num_train_epochs": 14,
|
| 5908 |
-
"total_flos":
|
| 5909 |
"trial_name": null,
|
| 5910 |
"trial_params": null
|
| 5911 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.0,
|
| 5 |
+
"global_step": 555784,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5901 |
"eval_samples_per_second": 970.412,
|
| 5902 |
"eval_steps_per_second": 40.434,
|
| 5903 |
"step": 486311
|
| 5904 |
+
},
|
| 5905 |
+
{
|
| 5906 |
+
"epoch": 7.0,
|
| 5907 |
+
"learning_rate": 5.050841472850103e-05,
|
| 5908 |
+
"loss": 2.8813,
|
| 5909 |
+
"step": 486500
|
| 5910 |
+
},
|
| 5911 |
+
{
|
| 5912 |
+
"epoch": 7.01,
|
| 5913 |
+
"learning_rate": 5.04564879867483e-05,
|
| 5914 |
+
"loss": 2.8798,
|
| 5915 |
+
"step": 487000
|
| 5916 |
+
},
|
| 5917 |
+
{
|
| 5918 |
+
"epoch": 7.02,
|
| 5919 |
+
"learning_rate": 5.040456124499556e-05,
|
| 5920 |
+
"loss": 2.8848,
|
| 5921 |
+
"step": 487500
|
| 5922 |
+
},
|
| 5923 |
+
{
|
| 5924 |
+
"epoch": 7.02,
|
| 5925 |
+
"learning_rate": 5.035263450324282e-05,
|
| 5926 |
+
"loss": 2.8811,
|
| 5927 |
+
"step": 488000
|
| 5928 |
+
},
|
| 5929 |
+
{
|
| 5930 |
+
"epoch": 7.03,
|
| 5931 |
+
"learning_rate": 5.03007077614901e-05,
|
| 5932 |
+
"loss": 2.8786,
|
| 5933 |
+
"step": 488500
|
| 5934 |
+
},
|
| 5935 |
+
{
|
| 5936 |
+
"epoch": 7.04,
|
| 5937 |
+
"learning_rate": 5.024878101973736e-05,
|
| 5938 |
+
"loss": 2.8777,
|
| 5939 |
+
"step": 489000
|
| 5940 |
+
},
|
| 5941 |
+
{
|
| 5942 |
+
"epoch": 7.05,
|
| 5943 |
+
"learning_rate": 5.019685427798462e-05,
|
| 5944 |
+
"loss": 2.8776,
|
| 5945 |
+
"step": 489500
|
| 5946 |
+
},
|
| 5947 |
+
{
|
| 5948 |
+
"epoch": 7.05,
|
| 5949 |
+
"learning_rate": 5.0145031389715393e-05,
|
| 5950 |
+
"loss": 2.8799,
|
| 5951 |
+
"step": 490000
|
| 5952 |
+
},
|
| 5953 |
+
{
|
| 5954 |
+
"epoch": 7.06,
|
| 5955 |
+
"learning_rate": 5.0093208501446166e-05,
|
| 5956 |
+
"loss": 2.883,
|
| 5957 |
+
"step": 490500
|
| 5958 |
+
},
|
| 5959 |
+
{
|
| 5960 |
+
"epoch": 7.07,
|
| 5961 |
+
"learning_rate": 5.004128175969343e-05,
|
| 5962 |
+
"loss": 2.8822,
|
| 5963 |
+
"step": 491000
|
| 5964 |
+
},
|
| 5965 |
+
{
|
| 5966 |
+
"epoch": 7.07,
|
| 5967 |
+
"learning_rate": 4.998935501794069e-05,
|
| 5968 |
+
"loss": 2.8739,
|
| 5969 |
+
"step": 491500
|
| 5970 |
+
},
|
| 5971 |
+
{
|
| 5972 |
+
"epoch": 7.08,
|
| 5973 |
+
"learning_rate": 4.993742827618796e-05,
|
| 5974 |
+
"loss": 2.8832,
|
| 5975 |
+
"step": 492000
|
| 5976 |
+
},
|
| 5977 |
+
{
|
| 5978 |
+
"epoch": 7.09,
|
| 5979 |
+
"learning_rate": 4.988550153443522e-05,
|
| 5980 |
+
"loss": 2.8728,
|
| 5981 |
+
"step": 492500
|
| 5982 |
+
},
|
| 5983 |
+
{
|
| 5984 |
+
"epoch": 7.1,
|
| 5985 |
+
"learning_rate": 4.983357479268249e-05,
|
| 5986 |
+
"loss": 2.8796,
|
| 5987 |
+
"step": 493000
|
| 5988 |
+
},
|
| 5989 |
+
{
|
| 5990 |
+
"epoch": 7.1,
|
| 5991 |
+
"learning_rate": 4.978164805092975e-05,
|
| 5992 |
+
"loss": 2.8773,
|
| 5993 |
+
"step": 493500
|
| 5994 |
+
},
|
| 5995 |
+
{
|
| 5996 |
+
"epoch": 7.11,
|
| 5997 |
+
"learning_rate": 4.972972130917701e-05,
|
| 5998 |
+
"loss": 2.8783,
|
| 5999 |
+
"step": 494000
|
| 6000 |
+
},
|
| 6001 |
+
{
|
| 6002 |
+
"epoch": 7.12,
|
| 6003 |
+
"learning_rate": 4.9677898420907784e-05,
|
| 6004 |
+
"loss": 2.8848,
|
| 6005 |
+
"step": 494500
|
| 6006 |
+
},
|
| 6007 |
+
{
|
| 6008 |
+
"epoch": 7.13,
|
| 6009 |
+
"learning_rate": 4.962607553263856e-05,
|
| 6010 |
+
"loss": 2.879,
|
| 6011 |
+
"step": 495000
|
| 6012 |
+
},
|
| 6013 |
+
{
|
| 6014 |
+
"epoch": 7.13,
|
| 6015 |
+
"learning_rate": 4.957414879088582e-05,
|
| 6016 |
+
"loss": 2.879,
|
| 6017 |
+
"step": 495500
|
| 6018 |
+
},
|
| 6019 |
+
{
|
| 6020 |
+
"epoch": 7.14,
|
| 6021 |
+
"learning_rate": 4.9522222049133086e-05,
|
| 6022 |
+
"loss": 2.8763,
|
| 6023 |
+
"step": 496000
|
| 6024 |
+
},
|
| 6025 |
+
{
|
| 6026 |
+
"epoch": 7.15,
|
| 6027 |
+
"learning_rate": 4.947029530738035e-05,
|
| 6028 |
+
"loss": 2.8815,
|
| 6029 |
+
"step": 496500
|
| 6030 |
+
},
|
| 6031 |
+
{
|
| 6032 |
+
"epoch": 7.15,
|
| 6033 |
+
"learning_rate": 4.9418368565627616e-05,
|
| 6034 |
+
"loss": 2.8765,
|
| 6035 |
+
"step": 497000
|
| 6036 |
+
},
|
| 6037 |
+
{
|
| 6038 |
+
"epoch": 7.16,
|
| 6039 |
+
"learning_rate": 4.9366441823874885e-05,
|
| 6040 |
+
"loss": 2.8775,
|
| 6041 |
+
"step": 497500
|
| 6042 |
+
},
|
| 6043 |
+
{
|
| 6044 |
+
"epoch": 7.17,
|
| 6045 |
+
"learning_rate": 4.931451508212214e-05,
|
| 6046 |
+
"loss": 2.8765,
|
| 6047 |
+
"step": 498000
|
| 6048 |
+
},
|
| 6049 |
+
{
|
| 6050 |
+
"epoch": 7.18,
|
| 6051 |
+
"learning_rate": 4.926258834036941e-05,
|
| 6052 |
+
"loss": 2.8789,
|
| 6053 |
+
"step": 498500
|
| 6054 |
+
},
|
| 6055 |
+
{
|
| 6056 |
+
"epoch": 7.18,
|
| 6057 |
+
"learning_rate": 4.9210765452100174e-05,
|
| 6058 |
+
"loss": 2.8767,
|
| 6059 |
+
"step": 499000
|
| 6060 |
+
},
|
| 6061 |
+
{
|
| 6062 |
+
"epoch": 7.19,
|
| 6063 |
+
"learning_rate": 4.915883871034744e-05,
|
| 6064 |
+
"loss": 2.8755,
|
| 6065 |
+
"step": 499500
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 7.2,
|
| 6069 |
+
"learning_rate": 4.910691196859471e-05,
|
| 6070 |
+
"loss": 2.8766,
|
| 6071 |
+
"step": 500000
|
| 6072 |
+
},
|
| 6073 |
+
{
|
| 6074 |
+
"epoch": 7.2,
|
| 6075 |
+
"learning_rate": 4.905498522684198e-05,
|
| 6076 |
+
"loss": 2.8829,
|
| 6077 |
+
"step": 500500
|
| 6078 |
+
},
|
| 6079 |
+
{
|
| 6080 |
+
"epoch": 7.21,
|
| 6081 |
+
"learning_rate": 4.9003162338572745e-05,
|
| 6082 |
+
"loss": 2.8784,
|
| 6083 |
+
"step": 501000
|
| 6084 |
+
},
|
| 6085 |
+
{
|
| 6086 |
+
"epoch": 7.22,
|
| 6087 |
+
"learning_rate": 4.895123559682001e-05,
|
| 6088 |
+
"loss": 2.8789,
|
| 6089 |
+
"step": 501500
|
| 6090 |
+
},
|
| 6091 |
+
{
|
| 6092 |
+
"epoch": 7.23,
|
| 6093 |
+
"learning_rate": 4.889941270855078e-05,
|
| 6094 |
+
"loss": 2.8769,
|
| 6095 |
+
"step": 502000
|
| 6096 |
+
},
|
| 6097 |
+
{
|
| 6098 |
+
"epoch": 7.23,
|
| 6099 |
+
"learning_rate": 4.884748596679805e-05,
|
| 6100 |
+
"loss": 2.8792,
|
| 6101 |
+
"step": 502500
|
| 6102 |
+
},
|
| 6103 |
+
{
|
| 6104 |
+
"epoch": 7.24,
|
| 6105 |
+
"learning_rate": 4.879555922504531e-05,
|
| 6106 |
+
"loss": 2.8722,
|
| 6107 |
+
"step": 503000
|
| 6108 |
+
},
|
| 6109 |
+
{
|
| 6110 |
+
"epoch": 7.25,
|
| 6111 |
+
"learning_rate": 4.874363248329257e-05,
|
| 6112 |
+
"loss": 2.8766,
|
| 6113 |
+
"step": 503500
|
| 6114 |
+
},
|
| 6115 |
+
{
|
| 6116 |
+
"epoch": 7.25,
|
| 6117 |
+
"learning_rate": 4.869170574153984e-05,
|
| 6118 |
+
"loss": 2.8786,
|
| 6119 |
+
"step": 504000
|
| 6120 |
+
},
|
| 6121 |
+
{
|
| 6122 |
+
"epoch": 7.26,
|
| 6123 |
+
"learning_rate": 4.86397789997871e-05,
|
| 6124 |
+
"loss": 2.8784,
|
| 6125 |
+
"step": 504500
|
| 6126 |
+
},
|
| 6127 |
+
{
|
| 6128 |
+
"epoch": 7.27,
|
| 6129 |
+
"learning_rate": 4.858785225803437e-05,
|
| 6130 |
+
"loss": 2.8774,
|
| 6131 |
+
"step": 505000
|
| 6132 |
+
},
|
| 6133 |
+
{
|
| 6134 |
+
"epoch": 7.28,
|
| 6135 |
+
"learning_rate": 4.853592551628163e-05,
|
| 6136 |
+
"loss": 2.8709,
|
| 6137 |
+
"step": 505500
|
| 6138 |
+
},
|
| 6139 |
+
{
|
| 6140 |
+
"epoch": 7.28,
|
| 6141 |
+
"learning_rate": 4.8484102628012403e-05,
|
| 6142 |
+
"loss": 2.8734,
|
| 6143 |
+
"step": 506000
|
| 6144 |
+
},
|
| 6145 |
+
{
|
| 6146 |
+
"epoch": 7.29,
|
| 6147 |
+
"learning_rate": 4.8432175886259665e-05,
|
| 6148 |
+
"loss": 2.874,
|
| 6149 |
+
"step": 506500
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 7.3,
|
| 6153 |
+
"learning_rate": 4.838024914450693e-05,
|
| 6154 |
+
"loss": 2.8691,
|
| 6155 |
+
"step": 507000
|
| 6156 |
+
},
|
| 6157 |
+
{
|
| 6158 |
+
"epoch": 7.3,
|
| 6159 |
+
"learning_rate": 4.8328322402754195e-05,
|
| 6160 |
+
"loss": 2.8739,
|
| 6161 |
+
"step": 507500
|
| 6162 |
+
},
|
| 6163 |
+
{
|
| 6164 |
+
"epoch": 7.31,
|
| 6165 |
+
"learning_rate": 4.827649951448497e-05,
|
| 6166 |
+
"loss": 2.8743,
|
| 6167 |
+
"step": 508000
|
| 6168 |
+
},
|
| 6169 |
+
{
|
| 6170 |
+
"epoch": 7.32,
|
| 6171 |
+
"learning_rate": 4.8224676626215734e-05,
|
| 6172 |
+
"loss": 2.8738,
|
| 6173 |
+
"step": 508500
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 7.33,
|
| 6177 |
+
"learning_rate": 4.8172749884463e-05,
|
| 6178 |
+
"loss": 2.8711,
|
| 6179 |
+
"step": 509000
|
| 6180 |
+
},
|
| 6181 |
+
{
|
| 6182 |
+
"epoch": 7.33,
|
| 6183 |
+
"learning_rate": 4.8120823142710264e-05,
|
| 6184 |
+
"loss": 2.8737,
|
| 6185 |
+
"step": 509500
|
| 6186 |
+
},
|
| 6187 |
+
{
|
| 6188 |
+
"epoch": 7.34,
|
| 6189 |
+
"learning_rate": 4.806889640095753e-05,
|
| 6190 |
+
"loss": 2.8775,
|
| 6191 |
+
"step": 510000
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 7.35,
|
| 6195 |
+
"learning_rate": 4.8016969659204794e-05,
|
| 6196 |
+
"loss": 2.8719,
|
| 6197 |
+
"step": 510500
|
| 6198 |
+
},
|
| 6199 |
+
{
|
| 6200 |
+
"epoch": 7.36,
|
| 6201 |
+
"learning_rate": 4.796504291745206e-05,
|
| 6202 |
+
"loss": 2.8738,
|
| 6203 |
+
"step": 511000
|
| 6204 |
+
},
|
| 6205 |
+
{
|
| 6206 |
+
"epoch": 7.36,
|
| 6207 |
+
"learning_rate": 4.791322002918283e-05,
|
| 6208 |
+
"loss": 2.8789,
|
| 6209 |
+
"step": 511500
|
| 6210 |
+
},
|
| 6211 |
+
{
|
| 6212 |
+
"epoch": 7.37,
|
| 6213 |
+
"learning_rate": 4.7861293287430096e-05,
|
| 6214 |
+
"loss": 2.873,
|
| 6215 |
+
"step": 512000
|
| 6216 |
+
},
|
| 6217 |
+
{
|
| 6218 |
+
"epoch": 7.38,
|
| 6219 |
+
"learning_rate": 4.780936654567736e-05,
|
| 6220 |
+
"loss": 2.8702,
|
| 6221 |
+
"step": 512500
|
| 6222 |
+
},
|
| 6223 |
+
{
|
| 6224 |
+
"epoch": 7.38,
|
| 6225 |
+
"learning_rate": 4.7757439803924626e-05,
|
| 6226 |
+
"loss": 2.8784,
|
| 6227 |
+
"step": 513000
|
| 6228 |
+
},
|
| 6229 |
+
{
|
| 6230 |
+
"epoch": 7.39,
|
| 6231 |
+
"learning_rate": 4.7705513062171895e-05,
|
| 6232 |
+
"loss": 2.868,
|
| 6233 |
+
"step": 513500
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 7.4,
|
| 6237 |
+
"learning_rate": 4.765358632041915e-05,
|
| 6238 |
+
"loss": 2.8754,
|
| 6239 |
+
"step": 514000
|
| 6240 |
+
},
|
| 6241 |
+
{
|
| 6242 |
+
"epoch": 7.41,
|
| 6243 |
+
"learning_rate": 4.760176343214993e-05,
|
| 6244 |
+
"loss": 2.8758,
|
| 6245 |
+
"step": 514500
|
| 6246 |
+
},
|
| 6247 |
+
{
|
| 6248 |
+
"epoch": 7.41,
|
| 6249 |
+
"learning_rate": 4.754983669039719e-05,
|
| 6250 |
+
"loss": 2.8736,
|
| 6251 |
+
"step": 515000
|
| 6252 |
+
},
|
| 6253 |
+
{
|
| 6254 |
+
"epoch": 7.42,
|
| 6255 |
+
"learning_rate": 4.749790994864445e-05,
|
| 6256 |
+
"loss": 2.8699,
|
| 6257 |
+
"step": 515500
|
| 6258 |
+
},
|
| 6259 |
+
{
|
| 6260 |
+
"epoch": 7.43,
|
| 6261 |
+
"learning_rate": 4.744598320689172e-05,
|
| 6262 |
+
"loss": 2.8705,
|
| 6263 |
+
"step": 516000
|
| 6264 |
+
},
|
| 6265 |
+
{
|
| 6266 |
+
"epoch": 7.43,
|
| 6267 |
+
"learning_rate": 4.739405646513899e-05,
|
| 6268 |
+
"loss": 2.8713,
|
| 6269 |
+
"step": 516500
|
| 6270 |
+
},
|
| 6271 |
+
{
|
| 6272 |
+
"epoch": 7.44,
|
| 6273 |
+
"learning_rate": 4.734212972338625e-05,
|
| 6274 |
+
"loss": 2.8763,
|
| 6275 |
+
"step": 517000
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 7.45,
|
| 6279 |
+
"learning_rate": 4.729020298163351e-05,
|
| 6280 |
+
"loss": 2.8744,
|
| 6281 |
+
"step": 517500
|
| 6282 |
+
},
|
| 6283 |
+
{
|
| 6284 |
+
"epoch": 7.46,
|
| 6285 |
+
"learning_rate": 4.7238380093364285e-05,
|
| 6286 |
+
"loss": 2.8726,
|
| 6287 |
+
"step": 518000
|
| 6288 |
+
},
|
| 6289 |
+
{
|
| 6290 |
+
"epoch": 7.46,
|
| 6291 |
+
"learning_rate": 4.7186453351611546e-05,
|
| 6292 |
+
"loss": 2.8754,
|
| 6293 |
+
"step": 518500
|
| 6294 |
+
},
|
| 6295 |
+
{
|
| 6296 |
+
"epoch": 7.47,
|
| 6297 |
+
"learning_rate": 4.7134526609858815e-05,
|
| 6298 |
+
"loss": 2.8741,
|
| 6299 |
+
"step": 519000
|
| 6300 |
+
},
|
| 6301 |
+
{
|
| 6302 |
+
"epoch": 7.48,
|
| 6303 |
+
"learning_rate": 4.708259986810608e-05,
|
| 6304 |
+
"loss": 2.8763,
|
| 6305 |
+
"step": 519500
|
| 6306 |
+
},
|
| 6307 |
+
{
|
| 6308 |
+
"epoch": 7.48,
|
| 6309 |
+
"learning_rate": 4.7030673126353344e-05,
|
| 6310 |
+
"loss": 2.8752,
|
| 6311 |
+
"step": 520000
|
| 6312 |
+
},
|
| 6313 |
+
{
|
| 6314 |
+
"epoch": 7.49,
|
| 6315 |
+
"learning_rate": 4.6978746384600606e-05,
|
| 6316 |
+
"loss": 2.8681,
|
| 6317 |
+
"step": 520500
|
| 6318 |
+
},
|
| 6319 |
+
{
|
| 6320 |
+
"epoch": 7.5,
|
| 6321 |
+
"learning_rate": 4.6926819642847874e-05,
|
| 6322 |
+
"loss": 2.8696,
|
| 6323 |
+
"step": 521000
|
| 6324 |
+
},
|
| 6325 |
+
{
|
| 6326 |
+
"epoch": 7.51,
|
| 6327 |
+
"learning_rate": 4.6874892901095136e-05,
|
| 6328 |
+
"loss": 2.8672,
|
| 6329 |
+
"step": 521500
|
| 6330 |
+
},
|
| 6331 |
+
{
|
| 6332 |
+
"epoch": 7.51,
|
| 6333 |
+
"learning_rate": 4.6823173866309413e-05,
|
| 6334 |
+
"loss": 2.8729,
|
| 6335 |
+
"step": 522000
|
| 6336 |
+
},
|
| 6337 |
+
{
|
| 6338 |
+
"epoch": 7.52,
|
| 6339 |
+
"learning_rate": 4.6771247124556675e-05,
|
| 6340 |
+
"loss": 2.8745,
|
| 6341 |
+
"step": 522500
|
| 6342 |
+
},
|
| 6343 |
+
{
|
| 6344 |
+
"epoch": 7.53,
|
| 6345 |
+
"learning_rate": 4.671932038280394e-05,
|
| 6346 |
+
"loss": 2.8692,
|
| 6347 |
+
"step": 523000
|
| 6348 |
+
},
|
| 6349 |
+
{
|
| 6350 |
+
"epoch": 7.54,
|
| 6351 |
+
"learning_rate": 4.6667393641051205e-05,
|
| 6352 |
+
"loss": 2.8736,
|
| 6353 |
+
"step": 523500
|
| 6354 |
+
},
|
| 6355 |
+
{
|
| 6356 |
+
"epoch": 7.54,
|
| 6357 |
+
"learning_rate": 4.661546689929847e-05,
|
| 6358 |
+
"loss": 2.8702,
|
| 6359 |
+
"step": 524000
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 7.55,
|
| 6363 |
+
"learning_rate": 4.6563540157545735e-05,
|
| 6364 |
+
"loss": 2.8747,
|
| 6365 |
+
"step": 524500
|
| 6366 |
+
},
|
| 6367 |
+
{
|
| 6368 |
+
"epoch": 7.56,
|
| 6369 |
+
"learning_rate": 4.6511613415793e-05,
|
| 6370 |
+
"loss": 2.8673,
|
| 6371 |
+
"step": 525000
|
| 6372 |
+
},
|
| 6373 |
+
{
|
| 6374 |
+
"epoch": 7.56,
|
| 6375 |
+
"learning_rate": 4.6459686674040264e-05,
|
| 6376 |
+
"loss": 2.8686,
|
| 6377 |
+
"step": 525500
|
| 6378 |
+
},
|
| 6379 |
+
{
|
| 6380 |
+
"epoch": 7.57,
|
| 6381 |
+
"learning_rate": 4.640786378577104e-05,
|
| 6382 |
+
"loss": 2.8671,
|
| 6383 |
+
"step": 526000
|
| 6384 |
+
},
|
| 6385 |
+
{
|
| 6386 |
+
"epoch": 7.58,
|
| 6387 |
+
"learning_rate": 4.63559370440183e-05,
|
| 6388 |
+
"loss": 2.8714,
|
| 6389 |
+
"step": 526500
|
| 6390 |
+
},
|
| 6391 |
+
{
|
| 6392 |
+
"epoch": 7.59,
|
| 6393 |
+
"learning_rate": 4.630401030226557e-05,
|
| 6394 |
+
"loss": 2.8695,
|
| 6395 |
+
"step": 527000
|
| 6396 |
+
},
|
| 6397 |
+
{
|
| 6398 |
+
"epoch": 7.59,
|
| 6399 |
+
"learning_rate": 4.6252083560512835e-05,
|
| 6400 |
+
"loss": 2.8713,
|
| 6401 |
+
"step": 527500
|
| 6402 |
+
},
|
| 6403 |
+
{
|
| 6404 |
+
"epoch": 7.6,
|
| 6405 |
+
"learning_rate": 4.62002606722436e-05,
|
| 6406 |
+
"loss": 2.8692,
|
| 6407 |
+
"step": 528000
|
| 6408 |
+
},
|
| 6409 |
+
{
|
| 6410 |
+
"epoch": 7.61,
|
| 6411 |
+
"learning_rate": 4.614833393049087e-05,
|
| 6412 |
+
"loss": 2.8701,
|
| 6413 |
+
"step": 528500
|
| 6414 |
+
},
|
| 6415 |
+
{
|
| 6416 |
+
"epoch": 7.61,
|
| 6417 |
+
"learning_rate": 4.6096511042221636e-05,
|
| 6418 |
+
"loss": 2.8685,
|
| 6419 |
+
"step": 529000
|
| 6420 |
+
},
|
| 6421 |
+
{
|
| 6422 |
+
"epoch": 7.62,
|
| 6423 |
+
"learning_rate": 4.6044584300468905e-05,
|
| 6424 |
+
"loss": 2.8695,
|
| 6425 |
+
"step": 529500
|
| 6426 |
+
},
|
| 6427 |
+
{
|
| 6428 |
+
"epoch": 7.63,
|
| 6429 |
+
"learning_rate": 4.599276141219967e-05,
|
| 6430 |
+
"loss": 2.8624,
|
| 6431 |
+
"step": 530000
|
| 6432 |
+
},
|
| 6433 |
+
{
|
| 6434 |
+
"epoch": 7.64,
|
| 6435 |
+
"learning_rate": 4.594083467044694e-05,
|
| 6436 |
+
"loss": 2.8658,
|
| 6437 |
+
"step": 530500
|
| 6438 |
+
},
|
| 6439 |
+
{
|
| 6440 |
+
"epoch": 7.64,
|
| 6441 |
+
"learning_rate": 4.58889079286942e-05,
|
| 6442 |
+
"loss": 2.8725,
|
| 6443 |
+
"step": 531000
|
| 6444 |
+
},
|
| 6445 |
+
{
|
| 6446 |
+
"epoch": 7.65,
|
| 6447 |
+
"learning_rate": 4.583698118694146e-05,
|
| 6448 |
+
"loss": 2.8695,
|
| 6449 |
+
"step": 531500
|
| 6450 |
+
},
|
| 6451 |
+
{
|
| 6452 |
+
"epoch": 7.66,
|
| 6453 |
+
"learning_rate": 4.578505444518873e-05,
|
| 6454 |
+
"loss": 2.862,
|
| 6455 |
+
"step": 532000
|
| 6456 |
+
},
|
| 6457 |
+
{
|
| 6458 |
+
"epoch": 7.66,
|
| 6459 |
+
"learning_rate": 4.5733127703436e-05,
|
| 6460 |
+
"loss": 2.8737,
|
| 6461 |
+
"step": 532500
|
| 6462 |
+
},
|
| 6463 |
+
{
|
| 6464 |
+
"epoch": 7.67,
|
| 6465 |
+
"learning_rate": 4.568120096168326e-05,
|
| 6466 |
+
"loss": 2.8715,
|
| 6467 |
+
"step": 533000
|
| 6468 |
+
},
|
| 6469 |
+
{
|
| 6470 |
+
"epoch": 7.68,
|
| 6471 |
+
"learning_rate": 4.562927421993052e-05,
|
| 6472 |
+
"loss": 2.8704,
|
| 6473 |
+
"step": 533500
|
| 6474 |
+
},
|
| 6475 |
+
{
|
| 6476 |
+
"epoch": 7.69,
|
| 6477 |
+
"learning_rate": 4.557734747817779e-05,
|
| 6478 |
+
"loss": 2.8641,
|
| 6479 |
+
"step": 534000
|
| 6480 |
+
},
|
| 6481 |
+
{
|
| 6482 |
+
"epoch": 7.69,
|
| 6483 |
+
"learning_rate": 4.552542073642506e-05,
|
| 6484 |
+
"loss": 2.8709,
|
| 6485 |
+
"step": 534500
|
| 6486 |
+
},
|
| 6487 |
+
{
|
| 6488 |
+
"epoch": 7.7,
|
| 6489 |
+
"learning_rate": 4.547349399467232e-05,
|
| 6490 |
+
"loss": 2.8693,
|
| 6491 |
+
"step": 535000
|
| 6492 |
+
},
|
| 6493 |
+
{
|
| 6494 |
+
"epoch": 7.71,
|
| 6495 |
+
"learning_rate": 4.542156725291958e-05,
|
| 6496 |
+
"loss": 2.8679,
|
| 6497 |
+
"step": 535500
|
| 6498 |
+
},
|
| 6499 |
+
{
|
| 6500 |
+
"epoch": 7.72,
|
| 6501 |
+
"learning_rate": 4.5369744364650354e-05,
|
| 6502 |
+
"loss": 2.8706,
|
| 6503 |
+
"step": 536000
|
| 6504 |
+
},
|
| 6505 |
+
{
|
| 6506 |
+
"epoch": 7.72,
|
| 6507 |
+
"learning_rate": 4.5317817622897616e-05,
|
| 6508 |
+
"loss": 2.8684,
|
| 6509 |
+
"step": 536500
|
| 6510 |
+
},
|
| 6511 |
+
{
|
| 6512 |
+
"epoch": 7.73,
|
| 6513 |
+
"learning_rate": 4.5265890881144884e-05,
|
| 6514 |
+
"loss": 2.8682,
|
| 6515 |
+
"step": 537000
|
| 6516 |
+
},
|
| 6517 |
+
{
|
| 6518 |
+
"epoch": 7.74,
|
| 6519 |
+
"learning_rate": 4.5213964139392146e-05,
|
| 6520 |
+
"loss": 2.8671,
|
| 6521 |
+
"step": 537500
|
| 6522 |
+
},
|
| 6523 |
+
{
|
| 6524 |
+
"epoch": 7.74,
|
| 6525 |
+
"learning_rate": 4.516214125112292e-05,
|
| 6526 |
+
"loss": 2.8749,
|
| 6527 |
+
"step": 538000
|
| 6528 |
+
},
|
| 6529 |
+
{
|
| 6530 |
+
"epoch": 7.75,
|
| 6531 |
+
"learning_rate": 4.511021450937018e-05,
|
| 6532 |
+
"loss": 2.8687,
|
| 6533 |
+
"step": 538500
|
| 6534 |
+
},
|
| 6535 |
+
{
|
| 6536 |
+
"epoch": 7.76,
|
| 6537 |
+
"learning_rate": 4.505828776761745e-05,
|
| 6538 |
+
"loss": 2.8699,
|
| 6539 |
+
"step": 539000
|
| 6540 |
+
},
|
| 6541 |
+
{
|
| 6542 |
+
"epoch": 7.77,
|
| 6543 |
+
"learning_rate": 4.500646487934822e-05,
|
| 6544 |
+
"loss": 2.8659,
|
| 6545 |
+
"step": 539500
|
| 6546 |
+
},
|
| 6547 |
+
{
|
| 6548 |
+
"epoch": 7.77,
|
| 6549 |
+
"learning_rate": 4.495453813759548e-05,
|
| 6550 |
+
"loss": 2.8665,
|
| 6551 |
+
"step": 540000
|
| 6552 |
+
},
|
| 6553 |
+
{
|
| 6554 |
+
"epoch": 7.78,
|
| 6555 |
+
"learning_rate": 4.4902611395842745e-05,
|
| 6556 |
+
"loss": 2.8668,
|
| 6557 |
+
"step": 540500
|
| 6558 |
+
},
|
| 6559 |
+
{
|
| 6560 |
+
"epoch": 7.79,
|
| 6561 |
+
"learning_rate": 4.485068465409001e-05,
|
| 6562 |
+
"loss": 2.872,
|
| 6563 |
+
"step": 541000
|
| 6564 |
+
},
|
| 6565 |
+
{
|
| 6566 |
+
"epoch": 7.79,
|
| 6567 |
+
"learning_rate": 4.4798757912337274e-05,
|
| 6568 |
+
"loss": 2.8627,
|
| 6569 |
+
"step": 541500
|
| 6570 |
+
},
|
| 6571 |
+
{
|
| 6572 |
+
"epoch": 7.8,
|
| 6573 |
+
"learning_rate": 4.474683117058454e-05,
|
| 6574 |
+
"loss": 2.8638,
|
| 6575 |
+
"step": 542000
|
| 6576 |
+
},
|
| 6577 |
+
{
|
| 6578 |
+
"epoch": 7.81,
|
| 6579 |
+
"learning_rate": 4.4694904428831804e-05,
|
| 6580 |
+
"loss": 2.8647,
|
| 6581 |
+
"step": 542500
|
| 6582 |
+
},
|
| 6583 |
+
{
|
| 6584 |
+
"epoch": 7.82,
|
| 6585 |
+
"learning_rate": 4.4642977687079066e-05,
|
| 6586 |
+
"loss": 2.8621,
|
| 6587 |
+
"step": 543000
|
| 6588 |
+
},
|
| 6589 |
+
{
|
| 6590 |
+
"epoch": 7.82,
|
| 6591 |
+
"learning_rate": 4.4591154798809845e-05,
|
| 6592 |
+
"loss": 2.8669,
|
| 6593 |
+
"step": 543500
|
| 6594 |
+
},
|
| 6595 |
+
{
|
| 6596 |
+
"epoch": 7.83,
|
| 6597 |
+
"learning_rate": 4.453922805705711e-05,
|
| 6598 |
+
"loss": 2.8648,
|
| 6599 |
+
"step": 544000
|
| 6600 |
+
},
|
| 6601 |
+
{
|
| 6602 |
+
"epoch": 7.84,
|
| 6603 |
+
"learning_rate": 4.448730131530437e-05,
|
| 6604 |
+
"loss": 2.8584,
|
| 6605 |
+
"step": 544500
|
| 6606 |
+
},
|
| 6607 |
+
{
|
| 6608 |
+
"epoch": 7.84,
|
| 6609 |
+
"learning_rate": 4.443537457355164e-05,
|
| 6610 |
+
"loss": 2.8608,
|
| 6611 |
+
"step": 545000
|
| 6612 |
+
},
|
| 6613 |
+
{
|
| 6614 |
+
"epoch": 7.85,
|
| 6615 |
+
"learning_rate": 4.43835516852824e-05,
|
| 6616 |
+
"loss": 2.8663,
|
| 6617 |
+
"step": 545500
|
| 6618 |
+
},
|
| 6619 |
+
{
|
| 6620 |
+
"epoch": 7.86,
|
| 6621 |
+
"learning_rate": 4.433162494352967e-05,
|
| 6622 |
+
"loss": 2.8612,
|
| 6623 |
+
"step": 546000
|
| 6624 |
+
},
|
| 6625 |
+
{
|
| 6626 |
+
"epoch": 7.87,
|
| 6627 |
+
"learning_rate": 4.427969820177694e-05,
|
| 6628 |
+
"loss": 2.8626,
|
| 6629 |
+
"step": 546500
|
| 6630 |
+
},
|
| 6631 |
+
{
|
| 6632 |
+
"epoch": 7.87,
|
| 6633 |
+
"learning_rate": 4.4227771460024194e-05,
|
| 6634 |
+
"loss": 2.8659,
|
| 6635 |
+
"step": 547000
|
| 6636 |
+
},
|
| 6637 |
+
{
|
| 6638 |
+
"epoch": 7.88,
|
| 6639 |
+
"learning_rate": 4.4175948571754974e-05,
|
| 6640 |
+
"loss": 2.8647,
|
| 6641 |
+
"step": 547500
|
| 6642 |
+
},
|
| 6643 |
+
{
|
| 6644 |
+
"epoch": 7.89,
|
| 6645 |
+
"learning_rate": 4.4124021830002236e-05,
|
| 6646 |
+
"loss": 2.8618,
|
| 6647 |
+
"step": 548000
|
| 6648 |
+
},
|
| 6649 |
+
{
|
| 6650 |
+
"epoch": 7.9,
|
| 6651 |
+
"learning_rate": 4.407219894173301e-05,
|
| 6652 |
+
"loss": 2.8597,
|
| 6653 |
+
"step": 548500
|
| 6654 |
+
},
|
| 6655 |
+
{
|
| 6656 |
+
"epoch": 7.9,
|
| 6657 |
+
"learning_rate": 4.402027219998027e-05,
|
| 6658 |
+
"loss": 2.8669,
|
| 6659 |
+
"step": 549000
|
| 6660 |
+
},
|
| 6661 |
+
{
|
| 6662 |
+
"epoch": 7.91,
|
| 6663 |
+
"learning_rate": 4.396834545822753e-05,
|
| 6664 |
+
"loss": 2.8605,
|
| 6665 |
+
"step": 549500
|
| 6666 |
+
},
|
| 6667 |
+
{
|
| 6668 |
+
"epoch": 7.92,
|
| 6669 |
+
"learning_rate": 4.39164187164748e-05,
|
| 6670 |
+
"loss": 2.8683,
|
| 6671 |
+
"step": 550000
|
| 6672 |
+
},
|
| 6673 |
+
{
|
| 6674 |
+
"epoch": 7.92,
|
| 6675 |
+
"learning_rate": 4.386449197472207e-05,
|
| 6676 |
+
"loss": 2.8648,
|
| 6677 |
+
"step": 550500
|
| 6678 |
+
},
|
| 6679 |
+
{
|
| 6680 |
+
"epoch": 7.93,
|
| 6681 |
+
"learning_rate": 4.381256523296933e-05,
|
| 6682 |
+
"loss": 2.8607,
|
| 6683 |
+
"step": 551000
|
| 6684 |
+
},
|
| 6685 |
+
{
|
| 6686 |
+
"epoch": 7.94,
|
| 6687 |
+
"learning_rate": 4.376063849121659e-05,
|
| 6688 |
+
"loss": 2.8627,
|
| 6689 |
+
"step": 551500
|
| 6690 |
+
},
|
| 6691 |
+
{
|
| 6692 |
+
"epoch": 7.95,
|
| 6693 |
+
"learning_rate": 4.370871174946386e-05,
|
| 6694 |
+
"loss": 2.8654,
|
| 6695 |
+
"step": 552000
|
| 6696 |
+
},
|
| 6697 |
+
{
|
| 6698 |
+
"epoch": 7.95,
|
| 6699 |
+
"learning_rate": 4.3656888861194626e-05,
|
| 6700 |
+
"loss": 2.8593,
|
| 6701 |
+
"step": 552500
|
| 6702 |
+
},
|
| 6703 |
+
{
|
| 6704 |
+
"epoch": 7.96,
|
| 6705 |
+
"learning_rate": 4.36050659729254e-05,
|
| 6706 |
+
"loss": 2.8665,
|
| 6707 |
+
"step": 553000
|
| 6708 |
+
},
|
| 6709 |
+
{
|
| 6710 |
+
"epoch": 7.97,
|
| 6711 |
+
"learning_rate": 4.355313923117266e-05,
|
| 6712 |
+
"loss": 2.8612,
|
| 6713 |
+
"step": 553500
|
| 6714 |
+
},
|
| 6715 |
+
{
|
| 6716 |
+
"epoch": 7.97,
|
| 6717 |
+
"learning_rate": 4.350121248941993e-05,
|
| 6718 |
+
"loss": 2.8664,
|
| 6719 |
+
"step": 554000
|
| 6720 |
+
},
|
| 6721 |
+
{
|
| 6722 |
+
"epoch": 7.98,
|
| 6723 |
+
"learning_rate": 4.34492857476672e-05,
|
| 6724 |
+
"loss": 2.8526,
|
| 6725 |
+
"step": 554500
|
| 6726 |
+
},
|
| 6727 |
+
{
|
| 6728 |
+
"epoch": 7.99,
|
| 6729 |
+
"learning_rate": 4.339735900591446e-05,
|
| 6730 |
+
"loss": 2.8589,
|
| 6731 |
+
"step": 555000
|
| 6732 |
+
},
|
| 6733 |
+
{
|
| 6734 |
+
"epoch": 8.0,
|
| 6735 |
+
"learning_rate": 4.334553611764523e-05,
|
| 6736 |
+
"loss": 2.8609,
|
| 6737 |
+
"step": 555500
|
| 6738 |
+
},
|
| 6739 |
+
{
|
| 6740 |
+
"epoch": 8.0,
|
| 6741 |
+
"eval_accuracy": 0.49625970451459295,
|
| 6742 |
+
"eval_loss": 2.6880686283111572,
|
| 6743 |
+
"eval_runtime": 555.8516,
|
| 6744 |
+
"eval_samples_per_second": 969.57,
|
| 6745 |
+
"eval_steps_per_second": 40.399,
|
| 6746 |
+
"step": 555784
|
| 6747 |
}
|
| 6748 |
],
|
| 6749 |
"max_steps": 972622,
|
| 6750 |
"num_train_epochs": 14,
|
| 6751 |
+
"total_flos": 3.418009686270542e+18,
|
| 6752 |
"trial_name": null,
|
| 6753 |
"trial_params": null
|
| 6754 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:066b48db75cbbc3697b8e7f9e071c6b08dc6498bd4e37d936177ef5dfe4b202f
|
| 3 |
+
size 183718
|