Training in progress, epoch 12
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef2ed6e9c5769612ee83bbcff1434945866238417fae08918c197f74b46418b8
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd9765584191118ad624ada4294aa4c7c4b446e8275a08afe002751786a580c3
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85a3eb859005f1a2e2b3cdf3c05d91017c106fb92e1c7a7c1999929d8f053b63
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76b7fc01a18c41ee4d5f6a3a44379550533c02e5ae590743297a4e13668b3d82
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb87d42e1d664298b59b2f6667ddacc7adf88687ba59853e16a623f40650b2d6
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd0be3c29b40bb10e427fd603007010cb5d019dfe3909eb92cb96939b7adcda3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9273,11 +9273,854 @@
|
|
| 9273 |
"eval_samples_per_second": 969.38,
|
| 9274 |
"eval_steps_per_second": 40.391,
|
| 9275 |
"step": 764203
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9276 |
}
|
| 9277 |
],
|
| 9278 |
"max_steps": 972622,
|
| 9279 |
"num_train_epochs": 14,
|
| 9280 |
-
"total_flos":
|
| 9281 |
"trial_name": null,
|
| 9282 |
"trial_params": null
|
| 9283 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.0,
|
| 5 |
+
"global_step": 833676,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9273 |
"eval_samples_per_second": 969.38,
|
| 9274 |
"eval_steps_per_second": 40.391,
|
| 9275 |
"step": 764203
|
| 9276 |
+
},
|
| 9277 |
+
{
|
| 9278 |
+
"epoch": 11.0,
|
| 9279 |
+
"learning_rate": 2.1648362490198828e-05,
|
| 9280 |
+
"loss": 2.7987,
|
| 9281 |
+
"step": 764500
|
| 9282 |
+
},
|
| 9283 |
+
{
|
| 9284 |
+
"epoch": 11.01,
|
| 9285 |
+
"learning_rate": 2.15965396019296e-05,
|
| 9286 |
+
"loss": 2.7989,
|
| 9287 |
+
"step": 765000
|
| 9288 |
+
},
|
| 9289 |
+
{
|
| 9290 |
+
"epoch": 11.02,
|
| 9291 |
+
"learning_rate": 2.1544612860176862e-05,
|
| 9292 |
+
"loss": 2.7949,
|
| 9293 |
+
"step": 765500
|
| 9294 |
+
},
|
| 9295 |
+
{
|
| 9296 |
+
"epoch": 11.03,
|
| 9297 |
+
"learning_rate": 2.149268611842413e-05,
|
| 9298 |
+
"loss": 2.8003,
|
| 9299 |
+
"step": 766000
|
| 9300 |
+
},
|
| 9301 |
+
{
|
| 9302 |
+
"epoch": 11.03,
|
| 9303 |
+
"learning_rate": 2.1440759376671392e-05,
|
| 9304 |
+
"loss": 2.7988,
|
| 9305 |
+
"step": 766500
|
| 9306 |
+
},
|
| 9307 |
+
{
|
| 9308 |
+
"epoch": 11.04,
|
| 9309 |
+
"learning_rate": 2.1388936488402165e-05,
|
| 9310 |
+
"loss": 2.796,
|
| 9311 |
+
"step": 767000
|
| 9312 |
+
},
|
| 9313 |
+
{
|
| 9314 |
+
"epoch": 11.05,
|
| 9315 |
+
"learning_rate": 2.1337009746649427e-05,
|
| 9316 |
+
"loss": 2.7986,
|
| 9317 |
+
"step": 767500
|
| 9318 |
+
},
|
| 9319 |
+
{
|
| 9320 |
+
"epoch": 11.05,
|
| 9321 |
+
"learning_rate": 2.128508300489669e-05,
|
| 9322 |
+
"loss": 2.7976,
|
| 9323 |
+
"step": 768000
|
| 9324 |
+
},
|
| 9325 |
+
{
|
| 9326 |
+
"epoch": 11.06,
|
| 9327 |
+
"learning_rate": 2.1233156263143956e-05,
|
| 9328 |
+
"loss": 2.7985,
|
| 9329 |
+
"step": 768500
|
| 9330 |
+
},
|
| 9331 |
+
{
|
| 9332 |
+
"epoch": 11.07,
|
| 9333 |
+
"learning_rate": 2.118122952139122e-05,
|
| 9334 |
+
"loss": 2.7967,
|
| 9335 |
+
"step": 769000
|
| 9336 |
+
},
|
| 9337 |
+
{
|
| 9338 |
+
"epoch": 11.08,
|
| 9339 |
+
"learning_rate": 2.112930277963849e-05,
|
| 9340 |
+
"loss": 2.7995,
|
| 9341 |
+
"step": 769500
|
| 9342 |
+
},
|
| 9343 |
+
{
|
| 9344 |
+
"epoch": 11.08,
|
| 9345 |
+
"learning_rate": 2.1077479891369256e-05,
|
| 9346 |
+
"loss": 2.7985,
|
| 9347 |
+
"step": 770000
|
| 9348 |
+
},
|
| 9349 |
+
{
|
| 9350 |
+
"epoch": 11.09,
|
| 9351 |
+
"learning_rate": 2.1025553149616524e-05,
|
| 9352 |
+
"loss": 2.7961,
|
| 9353 |
+
"step": 770500
|
| 9354 |
+
},
|
| 9355 |
+
{
|
| 9356 |
+
"epoch": 11.1,
|
| 9357 |
+
"learning_rate": 2.0973626407863786e-05,
|
| 9358 |
+
"loss": 2.7965,
|
| 9359 |
+
"step": 771000
|
| 9360 |
+
},
|
| 9361 |
+
{
|
| 9362 |
+
"epoch": 11.11,
|
| 9363 |
+
"learning_rate": 2.0921699666111054e-05,
|
| 9364 |
+
"loss": 2.7944,
|
| 9365 |
+
"step": 771500
|
| 9366 |
+
},
|
| 9367 |
+
{
|
| 9368 |
+
"epoch": 11.11,
|
| 9369 |
+
"learning_rate": 2.0869772924358315e-05,
|
| 9370 |
+
"loss": 2.7952,
|
| 9371 |
+
"step": 772000
|
| 9372 |
+
},
|
| 9373 |
+
{
|
| 9374 |
+
"epoch": 11.12,
|
| 9375 |
+
"learning_rate": 2.081784618260558e-05,
|
| 9376 |
+
"loss": 2.7922,
|
| 9377 |
+
"step": 772500
|
| 9378 |
+
},
|
| 9379 |
+
{
|
| 9380 |
+
"epoch": 11.13,
|
| 9381 |
+
"learning_rate": 2.0765919440852845e-05,
|
| 9382 |
+
"loss": 2.7979,
|
| 9383 |
+
"step": 773000
|
| 9384 |
+
},
|
| 9385 |
+
{
|
| 9386 |
+
"epoch": 11.13,
|
| 9387 |
+
"learning_rate": 2.0714096552583618e-05,
|
| 9388 |
+
"loss": 2.7943,
|
| 9389 |
+
"step": 773500
|
| 9390 |
+
},
|
| 9391 |
+
{
|
| 9392 |
+
"epoch": 11.14,
|
| 9393 |
+
"learning_rate": 2.066216981083088e-05,
|
| 9394 |
+
"loss": 2.7962,
|
| 9395 |
+
"step": 774000
|
| 9396 |
+
},
|
| 9397 |
+
{
|
| 9398 |
+
"epoch": 11.15,
|
| 9399 |
+
"learning_rate": 2.0610243069078145e-05,
|
| 9400 |
+
"loss": 2.7974,
|
| 9401 |
+
"step": 774500
|
| 9402 |
+
},
|
| 9403 |
+
{
|
| 9404 |
+
"epoch": 11.16,
|
| 9405 |
+
"learning_rate": 2.055831632732541e-05,
|
| 9406 |
+
"loss": 2.7969,
|
| 9407 |
+
"step": 775000
|
| 9408 |
+
},
|
| 9409 |
+
{
|
| 9410 |
+
"epoch": 11.16,
|
| 9411 |
+
"learning_rate": 2.050649343905618e-05,
|
| 9412 |
+
"loss": 2.7929,
|
| 9413 |
+
"step": 775500
|
| 9414 |
+
},
|
| 9415 |
+
{
|
| 9416 |
+
"epoch": 11.17,
|
| 9417 |
+
"learning_rate": 2.0454566697303444e-05,
|
| 9418 |
+
"loss": 2.7957,
|
| 9419 |
+
"step": 776000
|
| 9420 |
+
},
|
| 9421 |
+
{
|
| 9422 |
+
"epoch": 11.18,
|
| 9423 |
+
"learning_rate": 2.040263995555071e-05,
|
| 9424 |
+
"loss": 2.7974,
|
| 9425 |
+
"step": 776500
|
| 9426 |
+
},
|
| 9427 |
+
{
|
| 9428 |
+
"epoch": 11.18,
|
| 9429 |
+
"learning_rate": 2.0350713213797977e-05,
|
| 9430 |
+
"loss": 2.8017,
|
| 9431 |
+
"step": 777000
|
| 9432 |
+
},
|
| 9433 |
+
{
|
| 9434 |
+
"epoch": 11.19,
|
| 9435 |
+
"learning_rate": 2.029878647204524e-05,
|
| 9436 |
+
"loss": 2.7986,
|
| 9437 |
+
"step": 777500
|
| 9438 |
+
},
|
| 9439 |
+
{
|
| 9440 |
+
"epoch": 11.2,
|
| 9441 |
+
"learning_rate": 2.0246859730292507e-05,
|
| 9442 |
+
"loss": 2.7928,
|
| 9443 |
+
"step": 778000
|
| 9444 |
+
},
|
| 9445 |
+
{
|
| 9446 |
+
"epoch": 11.21,
|
| 9447 |
+
"learning_rate": 2.019493298853977e-05,
|
| 9448 |
+
"loss": 2.7981,
|
| 9449 |
+
"step": 778500
|
| 9450 |
+
},
|
| 9451 |
+
{
|
| 9452 |
+
"epoch": 11.21,
|
| 9453 |
+
"learning_rate": 2.014311010027054e-05,
|
| 9454 |
+
"loss": 2.7985,
|
| 9455 |
+
"step": 779000
|
| 9456 |
+
},
|
| 9457 |
+
{
|
| 9458 |
+
"epoch": 11.22,
|
| 9459 |
+
"learning_rate": 2.0091183358517803e-05,
|
| 9460 |
+
"loss": 2.7979,
|
| 9461 |
+
"step": 779500
|
| 9462 |
+
},
|
| 9463 |
+
{
|
| 9464 |
+
"epoch": 11.23,
|
| 9465 |
+
"learning_rate": 2.0039256616765068e-05,
|
| 9466 |
+
"loss": 2.8037,
|
| 9467 |
+
"step": 780000
|
| 9468 |
+
},
|
| 9469 |
+
{
|
| 9470 |
+
"epoch": 11.23,
|
| 9471 |
+
"learning_rate": 1.9987329875012333e-05,
|
| 9472 |
+
"loss": 2.7996,
|
| 9473 |
+
"step": 780500
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 11.24,
|
| 9477 |
+
"learning_rate": 1.9935403133259598e-05,
|
| 9478 |
+
"loss": 2.7954,
|
| 9479 |
+
"step": 781000
|
| 9480 |
+
},
|
| 9481 |
+
{
|
| 9482 |
+
"epoch": 11.25,
|
| 9483 |
+
"learning_rate": 1.9883476391506863e-05,
|
| 9484 |
+
"loss": 2.8006,
|
| 9485 |
+
"step": 781500
|
| 9486 |
+
},
|
| 9487 |
+
{
|
| 9488 |
+
"epoch": 11.26,
|
| 9489 |
+
"learning_rate": 1.9831549649754128e-05,
|
| 9490 |
+
"loss": 2.7932,
|
| 9491 |
+
"step": 782000
|
| 9492 |
+
},
|
| 9493 |
+
{
|
| 9494 |
+
"epoch": 11.26,
|
| 9495 |
+
"learning_rate": 1.9779622908001393e-05,
|
| 9496 |
+
"loss": 2.7921,
|
| 9497 |
+
"step": 782500
|
| 9498 |
+
},
|
| 9499 |
+
{
|
| 9500 |
+
"epoch": 11.27,
|
| 9501 |
+
"learning_rate": 1.972790387321567e-05,
|
| 9502 |
+
"loss": 2.7929,
|
| 9503 |
+
"step": 783000
|
| 9504 |
+
},
|
| 9505 |
+
{
|
| 9506 |
+
"epoch": 11.28,
|
| 9507 |
+
"learning_rate": 1.9675977131462932e-05,
|
| 9508 |
+
"loss": 2.7967,
|
| 9509 |
+
"step": 783500
|
| 9510 |
+
},
|
| 9511 |
+
{
|
| 9512 |
+
"epoch": 11.28,
|
| 9513 |
+
"learning_rate": 1.9624050389710197e-05,
|
| 9514 |
+
"loss": 2.7964,
|
| 9515 |
+
"step": 784000
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 11.29,
|
| 9519 |
+
"learning_rate": 1.957212364795746e-05,
|
| 9520 |
+
"loss": 2.7967,
|
| 9521 |
+
"step": 784500
|
| 9522 |
+
},
|
| 9523 |
+
{
|
| 9524 |
+
"epoch": 11.3,
|
| 9525 |
+
"learning_rate": 1.9520196906204726e-05,
|
| 9526 |
+
"loss": 2.7948,
|
| 9527 |
+
"step": 785000
|
| 9528 |
+
},
|
| 9529 |
+
{
|
| 9530 |
+
"epoch": 11.31,
|
| 9531 |
+
"learning_rate": 1.94683740179355e-05,
|
| 9532 |
+
"loss": 2.7907,
|
| 9533 |
+
"step": 785500
|
| 9534 |
+
},
|
| 9535 |
+
{
|
| 9536 |
+
"epoch": 11.31,
|
| 9537 |
+
"learning_rate": 1.941644727618276e-05,
|
| 9538 |
+
"loss": 2.7975,
|
| 9539 |
+
"step": 786000
|
| 9540 |
+
},
|
| 9541 |
+
{
|
| 9542 |
+
"epoch": 11.32,
|
| 9543 |
+
"learning_rate": 1.936452053443003e-05,
|
| 9544 |
+
"loss": 2.7976,
|
| 9545 |
+
"step": 786500
|
| 9546 |
+
},
|
| 9547 |
+
{
|
| 9548 |
+
"epoch": 11.33,
|
| 9549 |
+
"learning_rate": 1.931259379267729e-05,
|
| 9550 |
+
"loss": 2.797,
|
| 9551 |
+
"step": 787000
|
| 9552 |
+
},
|
| 9553 |
+
{
|
| 9554 |
+
"epoch": 11.34,
|
| 9555 |
+
"learning_rate": 1.926066705092456e-05,
|
| 9556 |
+
"loss": 2.7934,
|
| 9557 |
+
"step": 787500
|
| 9558 |
+
},
|
| 9559 |
+
{
|
| 9560 |
+
"epoch": 11.34,
|
| 9561 |
+
"learning_rate": 1.920874030917182e-05,
|
| 9562 |
+
"loss": 2.7977,
|
| 9563 |
+
"step": 788000
|
| 9564 |
+
},
|
| 9565 |
+
{
|
| 9566 |
+
"epoch": 11.35,
|
| 9567 |
+
"learning_rate": 1.9156917420902594e-05,
|
| 9568 |
+
"loss": 2.7952,
|
| 9569 |
+
"step": 788500
|
| 9570 |
+
},
|
| 9571 |
+
{
|
| 9572 |
+
"epoch": 11.36,
|
| 9573 |
+
"learning_rate": 1.9104990679149855e-05,
|
| 9574 |
+
"loss": 2.7913,
|
| 9575 |
+
"step": 789000
|
| 9576 |
+
},
|
| 9577 |
+
{
|
| 9578 |
+
"epoch": 11.36,
|
| 9579 |
+
"learning_rate": 1.905306393739712e-05,
|
| 9580 |
+
"loss": 2.7928,
|
| 9581 |
+
"step": 789500
|
| 9582 |
+
},
|
| 9583 |
+
{
|
| 9584 |
+
"epoch": 11.37,
|
| 9585 |
+
"learning_rate": 1.9001137195644385e-05,
|
| 9586 |
+
"loss": 2.7925,
|
| 9587 |
+
"step": 790000
|
| 9588 |
+
},
|
| 9589 |
+
{
|
| 9590 |
+
"epoch": 11.38,
|
| 9591 |
+
"learning_rate": 1.894921045389165e-05,
|
| 9592 |
+
"loss": 2.7978,
|
| 9593 |
+
"step": 790500
|
| 9594 |
+
},
|
| 9595 |
+
{
|
| 9596 |
+
"epoch": 11.39,
|
| 9597 |
+
"learning_rate": 1.8897283712138915e-05,
|
| 9598 |
+
"loss": 2.7974,
|
| 9599 |
+
"step": 791000
|
| 9600 |
+
},
|
| 9601 |
+
{
|
| 9602 |
+
"epoch": 11.39,
|
| 9603 |
+
"learning_rate": 1.884535697038618e-05,
|
| 9604 |
+
"loss": 2.7968,
|
| 9605 |
+
"step": 791500
|
| 9606 |
+
},
|
| 9607 |
+
{
|
| 9608 |
+
"epoch": 11.4,
|
| 9609 |
+
"learning_rate": 1.8793430228633445e-05,
|
| 9610 |
+
"loss": 2.7926,
|
| 9611 |
+
"step": 792000
|
| 9612 |
+
},
|
| 9613 |
+
{
|
| 9614 |
+
"epoch": 11.41,
|
| 9615 |
+
"learning_rate": 1.8741607340364214e-05,
|
| 9616 |
+
"loss": 2.7963,
|
| 9617 |
+
"step": 792500
|
| 9618 |
+
},
|
| 9619 |
+
{
|
| 9620 |
+
"epoch": 11.41,
|
| 9621 |
+
"learning_rate": 1.8689680598611483e-05,
|
| 9622 |
+
"loss": 2.7893,
|
| 9623 |
+
"step": 793000
|
| 9624 |
+
},
|
| 9625 |
+
{
|
| 9626 |
+
"epoch": 11.42,
|
| 9627 |
+
"learning_rate": 1.863785771034225e-05,
|
| 9628 |
+
"loss": 2.7877,
|
| 9629 |
+
"step": 793500
|
| 9630 |
+
},
|
| 9631 |
+
{
|
| 9632 |
+
"epoch": 11.43,
|
| 9633 |
+
"learning_rate": 1.8585930968589517e-05,
|
| 9634 |
+
"loss": 2.796,
|
| 9635 |
+
"step": 794000
|
| 9636 |
+
},
|
| 9637 |
+
{
|
| 9638 |
+
"epoch": 11.44,
|
| 9639 |
+
"learning_rate": 1.853400422683678e-05,
|
| 9640 |
+
"loss": 2.7906,
|
| 9641 |
+
"step": 794500
|
| 9642 |
+
},
|
| 9643 |
+
{
|
| 9644 |
+
"epoch": 11.44,
|
| 9645 |
+
"learning_rate": 1.8482077485084047e-05,
|
| 9646 |
+
"loss": 2.7987,
|
| 9647 |
+
"step": 795000
|
| 9648 |
+
},
|
| 9649 |
+
{
|
| 9650 |
+
"epoch": 11.45,
|
| 9651 |
+
"learning_rate": 1.843015074333131e-05,
|
| 9652 |
+
"loss": 2.7931,
|
| 9653 |
+
"step": 795500
|
| 9654 |
+
},
|
| 9655 |
+
{
|
| 9656 |
+
"epoch": 11.46,
|
| 9657 |
+
"learning_rate": 1.837832785506208e-05,
|
| 9658 |
+
"loss": 2.7866,
|
| 9659 |
+
"step": 796000
|
| 9660 |
+
},
|
| 9661 |
+
{
|
| 9662 |
+
"epoch": 11.46,
|
| 9663 |
+
"learning_rate": 1.8326401113309343e-05,
|
| 9664 |
+
"loss": 2.7934,
|
| 9665 |
+
"step": 796500
|
| 9666 |
+
},
|
| 9667 |
+
{
|
| 9668 |
+
"epoch": 11.47,
|
| 9669 |
+
"learning_rate": 1.8274474371556608e-05,
|
| 9670 |
+
"loss": 2.7929,
|
| 9671 |
+
"step": 797000
|
| 9672 |
+
},
|
| 9673 |
+
{
|
| 9674 |
+
"epoch": 11.48,
|
| 9675 |
+
"learning_rate": 1.8222547629803873e-05,
|
| 9676 |
+
"loss": 2.7929,
|
| 9677 |
+
"step": 797500
|
| 9678 |
+
},
|
| 9679 |
+
{
|
| 9680 |
+
"epoch": 11.49,
|
| 9681 |
+
"learning_rate": 1.8170620888051138e-05,
|
| 9682 |
+
"loss": 2.7977,
|
| 9683 |
+
"step": 798000
|
| 9684 |
+
},
|
| 9685 |
+
{
|
| 9686 |
+
"epoch": 11.49,
|
| 9687 |
+
"learning_rate": 1.8118694146298403e-05,
|
| 9688 |
+
"loss": 2.7898,
|
| 9689 |
+
"step": 798500
|
| 9690 |
+
},
|
| 9691 |
+
{
|
| 9692 |
+
"epoch": 11.5,
|
| 9693 |
+
"learning_rate": 1.8066871258029172e-05,
|
| 9694 |
+
"loss": 2.787,
|
| 9695 |
+
"step": 799000
|
| 9696 |
+
},
|
| 9697 |
+
{
|
| 9698 |
+
"epoch": 11.51,
|
| 9699 |
+
"learning_rate": 1.8014944516276437e-05,
|
| 9700 |
+
"loss": 2.7933,
|
| 9701 |
+
"step": 799500
|
| 9702 |
+
},
|
| 9703 |
+
{
|
| 9704 |
+
"epoch": 11.52,
|
| 9705 |
+
"learning_rate": 1.7963017774523702e-05,
|
| 9706 |
+
"loss": 2.7907,
|
| 9707 |
+
"step": 800000
|
| 9708 |
+
},
|
| 9709 |
+
{
|
| 9710 |
+
"epoch": 11.52,
|
| 9711 |
+
"learning_rate": 1.791109103277097e-05,
|
| 9712 |
+
"loss": 2.7924,
|
| 9713 |
+
"step": 800500
|
| 9714 |
+
},
|
| 9715 |
+
{
|
| 9716 |
+
"epoch": 11.53,
|
| 9717 |
+
"learning_rate": 1.7859164291018232e-05,
|
| 9718 |
+
"loss": 2.7948,
|
| 9719 |
+
"step": 801000
|
| 9720 |
+
},
|
| 9721 |
+
{
|
| 9722 |
+
"epoch": 11.54,
|
| 9723 |
+
"learning_rate": 1.7807237549265497e-05,
|
| 9724 |
+
"loss": 2.7944,
|
| 9725 |
+
"step": 801500
|
| 9726 |
+
},
|
| 9727 |
+
{
|
| 9728 |
+
"epoch": 11.54,
|
| 9729 |
+
"learning_rate": 1.775531080751276e-05,
|
| 9730 |
+
"loss": 2.795,
|
| 9731 |
+
"step": 802000
|
| 9732 |
+
},
|
| 9733 |
+
{
|
| 9734 |
+
"epoch": 11.55,
|
| 9735 |
+
"learning_rate": 1.7703487919243535e-05,
|
| 9736 |
+
"loss": 2.7972,
|
| 9737 |
+
"step": 802500
|
| 9738 |
+
},
|
| 9739 |
+
{
|
| 9740 |
+
"epoch": 11.56,
|
| 9741 |
+
"learning_rate": 1.7651561177490796e-05,
|
| 9742 |
+
"loss": 2.7953,
|
| 9743 |
+
"step": 803000
|
| 9744 |
+
},
|
| 9745 |
+
{
|
| 9746 |
+
"epoch": 11.57,
|
| 9747 |
+
"learning_rate": 1.759963443573806e-05,
|
| 9748 |
+
"loss": 2.7893,
|
| 9749 |
+
"step": 803500
|
| 9750 |
+
},
|
| 9751 |
+
{
|
| 9752 |
+
"epoch": 11.57,
|
| 9753 |
+
"learning_rate": 1.7547707693985326e-05,
|
| 9754 |
+
"loss": 2.7897,
|
| 9755 |
+
"step": 804000
|
| 9756 |
+
},
|
| 9757 |
+
{
|
| 9758 |
+
"epoch": 11.58,
|
| 9759 |
+
"learning_rate": 1.749578095223259e-05,
|
| 9760 |
+
"loss": 2.792,
|
| 9761 |
+
"step": 804500
|
| 9762 |
+
},
|
| 9763 |
+
{
|
| 9764 |
+
"epoch": 11.59,
|
| 9765 |
+
"learning_rate": 1.7443854210479856e-05,
|
| 9766 |
+
"loss": 2.791,
|
| 9767 |
+
"step": 805000
|
| 9768 |
+
},
|
| 9769 |
+
{
|
| 9770 |
+
"epoch": 11.59,
|
| 9771 |
+
"learning_rate": 1.7392031322210625e-05,
|
| 9772 |
+
"loss": 2.7957,
|
| 9773 |
+
"step": 805500
|
| 9774 |
+
},
|
| 9775 |
+
{
|
| 9776 |
+
"epoch": 11.6,
|
| 9777 |
+
"learning_rate": 1.734010458045789e-05,
|
| 9778 |
+
"loss": 2.7909,
|
| 9779 |
+
"step": 806000
|
| 9780 |
+
},
|
| 9781 |
+
{
|
| 9782 |
+
"epoch": 11.61,
|
| 9783 |
+
"learning_rate": 1.7288177838705155e-05,
|
| 9784 |
+
"loss": 2.7908,
|
| 9785 |
+
"step": 806500
|
| 9786 |
+
},
|
| 9787 |
+
{
|
| 9788 |
+
"epoch": 11.62,
|
| 9789 |
+
"learning_rate": 1.723625109695242e-05,
|
| 9790 |
+
"loss": 2.7928,
|
| 9791 |
+
"step": 807000
|
| 9792 |
+
},
|
| 9793 |
+
{
|
| 9794 |
+
"epoch": 11.62,
|
| 9795 |
+
"learning_rate": 1.7184324355199685e-05,
|
| 9796 |
+
"loss": 2.7933,
|
| 9797 |
+
"step": 807500
|
| 9798 |
+
},
|
| 9799 |
+
{
|
| 9800 |
+
"epoch": 11.63,
|
| 9801 |
+
"learning_rate": 1.713239761344695e-05,
|
| 9802 |
+
"loss": 2.7879,
|
| 9803 |
+
"step": 808000
|
| 9804 |
+
},
|
| 9805 |
+
{
|
| 9806 |
+
"epoch": 11.64,
|
| 9807 |
+
"learning_rate": 1.7080470871694215e-05,
|
| 9808 |
+
"loss": 2.7841,
|
| 9809 |
+
"step": 808500
|
| 9810 |
+
},
|
| 9811 |
+
{
|
| 9812 |
+
"epoch": 11.64,
|
| 9813 |
+
"learning_rate": 1.7028647983424984e-05,
|
| 9814 |
+
"loss": 2.7956,
|
| 9815 |
+
"step": 809000
|
| 9816 |
+
},
|
| 9817 |
+
{
|
| 9818 |
+
"epoch": 11.65,
|
| 9819 |
+
"learning_rate": 1.697672124167225e-05,
|
| 9820 |
+
"loss": 2.789,
|
| 9821 |
+
"step": 809500
|
| 9822 |
+
},
|
| 9823 |
+
{
|
| 9824 |
+
"epoch": 11.66,
|
| 9825 |
+
"learning_rate": 1.6924794499919514e-05,
|
| 9826 |
+
"loss": 2.7915,
|
| 9827 |
+
"step": 810000
|
| 9828 |
+
},
|
| 9829 |
+
{
|
| 9830 |
+
"epoch": 11.67,
|
| 9831 |
+
"learning_rate": 1.687286775816678e-05,
|
| 9832 |
+
"loss": 2.7944,
|
| 9833 |
+
"step": 810500
|
| 9834 |
+
},
|
| 9835 |
+
{
|
| 9836 |
+
"epoch": 11.67,
|
| 9837 |
+
"learning_rate": 1.6820941016414044e-05,
|
| 9838 |
+
"loss": 2.7884,
|
| 9839 |
+
"step": 811000
|
| 9840 |
+
},
|
| 9841 |
+
{
|
| 9842 |
+
"epoch": 11.68,
|
| 9843 |
+
"learning_rate": 1.676901427466131e-05,
|
| 9844 |
+
"loss": 2.786,
|
| 9845 |
+
"step": 811500
|
| 9846 |
+
},
|
| 9847 |
+
{
|
| 9848 |
+
"epoch": 11.69,
|
| 9849 |
+
"learning_rate": 1.6717087532908574e-05,
|
| 9850 |
+
"loss": 2.791,
|
| 9851 |
+
"step": 812000
|
| 9852 |
+
},
|
| 9853 |
+
{
|
| 9854 |
+
"epoch": 11.7,
|
| 9855 |
+
"learning_rate": 1.666516079115584e-05,
|
| 9856 |
+
"loss": 2.7934,
|
| 9857 |
+
"step": 812500
|
| 9858 |
+
},
|
| 9859 |
+
{
|
| 9860 |
+
"epoch": 11.7,
|
| 9861 |
+
"learning_rate": 1.661333790288661e-05,
|
| 9862 |
+
"loss": 2.7899,
|
| 9863 |
+
"step": 813000
|
| 9864 |
+
},
|
| 9865 |
+
{
|
| 9866 |
+
"epoch": 11.71,
|
| 9867 |
+
"learning_rate": 1.6561515014617378e-05,
|
| 9868 |
+
"loss": 2.7997,
|
| 9869 |
+
"step": 813500
|
| 9870 |
+
},
|
| 9871 |
+
{
|
| 9872 |
+
"epoch": 11.72,
|
| 9873 |
+
"learning_rate": 1.6509588272864643e-05,
|
| 9874 |
+
"loss": 2.7874,
|
| 9875 |
+
"step": 814000
|
| 9876 |
+
},
|
| 9877 |
+
{
|
| 9878 |
+
"epoch": 11.72,
|
| 9879 |
+
"learning_rate": 1.6457661531111908e-05,
|
| 9880 |
+
"loss": 2.788,
|
| 9881 |
+
"step": 814500
|
| 9882 |
+
},
|
| 9883 |
+
{
|
| 9884 |
+
"epoch": 11.73,
|
| 9885 |
+
"learning_rate": 1.6405734789359173e-05,
|
| 9886 |
+
"loss": 2.7897,
|
| 9887 |
+
"step": 815000
|
| 9888 |
+
},
|
| 9889 |
+
{
|
| 9890 |
+
"epoch": 11.74,
|
| 9891 |
+
"learning_rate": 1.6353911901089942e-05,
|
| 9892 |
+
"loss": 2.7917,
|
| 9893 |
+
"step": 815500
|
| 9894 |
+
},
|
| 9895 |
+
{
|
| 9896 |
+
"epoch": 11.75,
|
| 9897 |
+
"learning_rate": 1.6301985159337207e-05,
|
| 9898 |
+
"loss": 2.7895,
|
| 9899 |
+
"step": 816000
|
| 9900 |
+
},
|
| 9901 |
+
{
|
| 9902 |
+
"epoch": 11.75,
|
| 9903 |
+
"learning_rate": 1.6250058417584475e-05,
|
| 9904 |
+
"loss": 2.7891,
|
| 9905 |
+
"step": 816500
|
| 9906 |
+
},
|
| 9907 |
+
{
|
| 9908 |
+
"epoch": 11.76,
|
| 9909 |
+
"learning_rate": 1.6198131675831737e-05,
|
| 9910 |
+
"loss": 2.7911,
|
| 9911 |
+
"step": 817000
|
| 9912 |
+
},
|
| 9913 |
+
{
|
| 9914 |
+
"epoch": 11.77,
|
| 9915 |
+
"learning_rate": 1.614630878756251e-05,
|
| 9916 |
+
"loss": 2.7892,
|
| 9917 |
+
"step": 817500
|
| 9918 |
+
},
|
| 9919 |
+
{
|
| 9920 |
+
"epoch": 11.77,
|
| 9921 |
+
"learning_rate": 1.609438204580977e-05,
|
| 9922 |
+
"loss": 2.7903,
|
| 9923 |
+
"step": 818000
|
| 9924 |
+
},
|
| 9925 |
+
{
|
| 9926 |
+
"epoch": 11.78,
|
| 9927 |
+
"learning_rate": 1.6042559157540545e-05,
|
| 9928 |
+
"loss": 2.7909,
|
| 9929 |
+
"step": 818500
|
| 9930 |
+
},
|
| 9931 |
+
{
|
| 9932 |
+
"epoch": 11.79,
|
| 9933 |
+
"learning_rate": 1.5990632415787806e-05,
|
| 9934 |
+
"loss": 2.7937,
|
| 9935 |
+
"step": 819000
|
| 9936 |
+
},
|
| 9937 |
+
{
|
| 9938 |
+
"epoch": 11.8,
|
| 9939 |
+
"learning_rate": 1.5938705674035074e-05,
|
| 9940 |
+
"loss": 2.7903,
|
| 9941 |
+
"step": 819500
|
| 9942 |
+
},
|
| 9943 |
+
{
|
| 9944 |
+
"epoch": 11.8,
|
| 9945 |
+
"learning_rate": 1.5886778932282336e-05,
|
| 9946 |
+
"loss": 2.7915,
|
| 9947 |
+
"step": 820000
|
| 9948 |
+
},
|
| 9949 |
+
{
|
| 9950 |
+
"epoch": 11.81,
|
| 9951 |
+
"learning_rate": 1.58348521905296e-05,
|
| 9952 |
+
"loss": 2.7842,
|
| 9953 |
+
"step": 820500
|
| 9954 |
+
},
|
| 9955 |
+
{
|
| 9956 |
+
"epoch": 11.82,
|
| 9957 |
+
"learning_rate": 1.5782925448776866e-05,
|
| 9958 |
+
"loss": 2.7848,
|
| 9959 |
+
"step": 821000
|
| 9960 |
+
},
|
| 9961 |
+
{
|
| 9962 |
+
"epoch": 11.82,
|
| 9963 |
+
"learning_rate": 1.573099870702413e-05,
|
| 9964 |
+
"loss": 2.7886,
|
| 9965 |
+
"step": 821500
|
| 9966 |
+
},
|
| 9967 |
+
{
|
| 9968 |
+
"epoch": 11.83,
|
| 9969 |
+
"learning_rate": 1.5679071965271395e-05,
|
| 9970 |
+
"loss": 2.7883,
|
| 9971 |
+
"step": 822000
|
| 9972 |
+
},
|
| 9973 |
+
{
|
| 9974 |
+
"epoch": 11.84,
|
| 9975 |
+
"learning_rate": 1.562714522351866e-05,
|
| 9976 |
+
"loss": 2.787,
|
| 9977 |
+
"step": 822500
|
| 9978 |
+
},
|
| 9979 |
+
{
|
| 9980 |
+
"epoch": 11.85,
|
| 9981 |
+
"learning_rate": 1.5575218481765925e-05,
|
| 9982 |
+
"loss": 2.7858,
|
| 9983 |
+
"step": 823000
|
| 9984 |
+
},
|
| 9985 |
+
{
|
| 9986 |
+
"epoch": 11.85,
|
| 9987 |
+
"learning_rate": 1.552329174001319e-05,
|
| 9988 |
+
"loss": 2.7867,
|
| 9989 |
+
"step": 823500
|
| 9990 |
+
},
|
| 9991 |
+
{
|
| 9992 |
+
"epoch": 11.86,
|
| 9993 |
+
"learning_rate": 1.5471468851743963e-05,
|
| 9994 |
+
"loss": 2.7886,
|
| 9995 |
+
"step": 824000
|
| 9996 |
+
},
|
| 9997 |
+
{
|
| 9998 |
+
"epoch": 11.87,
|
| 9999 |
+
"learning_rate": 1.5419542109991225e-05,
|
| 10000 |
+
"loss": 2.7878,
|
| 10001 |
+
"step": 824500
|
| 10002 |
+
},
|
| 10003 |
+
{
|
| 10004 |
+
"epoch": 11.88,
|
| 10005 |
+
"learning_rate": 1.536761536823849e-05,
|
| 10006 |
+
"loss": 2.7858,
|
| 10007 |
+
"step": 825000
|
| 10008 |
+
},
|
| 10009 |
+
{
|
| 10010 |
+
"epoch": 11.88,
|
| 10011 |
+
"learning_rate": 1.5315688626485755e-05,
|
| 10012 |
+
"loss": 2.7834,
|
| 10013 |
+
"step": 825500
|
| 10014 |
+
},
|
| 10015 |
+
{
|
| 10016 |
+
"epoch": 11.89,
|
| 10017 |
+
"learning_rate": 1.5263865738216528e-05,
|
| 10018 |
+
"loss": 2.7869,
|
| 10019 |
+
"step": 826000
|
| 10020 |
+
},
|
| 10021 |
+
{
|
| 10022 |
+
"epoch": 11.9,
|
| 10023 |
+
"learning_rate": 1.5211938996463789e-05,
|
| 10024 |
+
"loss": 2.7848,
|
| 10025 |
+
"step": 826500
|
| 10026 |
+
},
|
| 10027 |
+
{
|
| 10028 |
+
"epoch": 11.9,
|
| 10029 |
+
"learning_rate": 1.5160012254711054e-05,
|
| 10030 |
+
"loss": 2.7913,
|
| 10031 |
+
"step": 827000
|
| 10032 |
+
},
|
| 10033 |
+
{
|
| 10034 |
+
"epoch": 11.91,
|
| 10035 |
+
"learning_rate": 1.510808551295832e-05,
|
| 10036 |
+
"loss": 2.7855,
|
| 10037 |
+
"step": 827500
|
| 10038 |
+
},
|
| 10039 |
+
{
|
| 10040 |
+
"epoch": 11.92,
|
| 10041 |
+
"learning_rate": 1.5056158771205584e-05,
|
| 10042 |
+
"loss": 2.7869,
|
| 10043 |
+
"step": 828000
|
| 10044 |
+
},
|
| 10045 |
+
{
|
| 10046 |
+
"epoch": 11.93,
|
| 10047 |
+
"learning_rate": 1.5004335882936355e-05,
|
| 10048 |
+
"loss": 2.7922,
|
| 10049 |
+
"step": 828500
|
| 10050 |
+
},
|
| 10051 |
+
{
|
| 10052 |
+
"epoch": 11.93,
|
| 10053 |
+
"learning_rate": 1.4952409141183618e-05,
|
| 10054 |
+
"loss": 2.7892,
|
| 10055 |
+
"step": 829000
|
| 10056 |
+
},
|
| 10057 |
+
{
|
| 10058 |
+
"epoch": 11.94,
|
| 10059 |
+
"learning_rate": 1.4900482399430885e-05,
|
| 10060 |
+
"loss": 2.7889,
|
| 10061 |
+
"step": 829500
|
| 10062 |
+
},
|
| 10063 |
+
{
|
| 10064 |
+
"epoch": 11.95,
|
| 10065 |
+
"learning_rate": 1.4848555657678148e-05,
|
| 10066 |
+
"loss": 2.7905,
|
| 10067 |
+
"step": 830000
|
| 10068 |
+
},
|
| 10069 |
+
{
|
| 10070 |
+
"epoch": 11.95,
|
| 10071 |
+
"learning_rate": 1.479673276940892e-05,
|
| 10072 |
+
"loss": 2.786,
|
| 10073 |
+
"step": 830500
|
| 10074 |
+
},
|
| 10075 |
+
{
|
| 10076 |
+
"epoch": 11.96,
|
| 10077 |
+
"learning_rate": 1.4744806027656183e-05,
|
| 10078 |
+
"loss": 2.7905,
|
| 10079 |
+
"step": 831000
|
| 10080 |
+
},
|
| 10081 |
+
{
|
| 10082 |
+
"epoch": 11.97,
|
| 10083 |
+
"learning_rate": 1.469287928590345e-05,
|
| 10084 |
+
"loss": 2.7838,
|
| 10085 |
+
"step": 831500
|
| 10086 |
+
},
|
| 10087 |
+
{
|
| 10088 |
+
"epoch": 11.98,
|
| 10089 |
+
"learning_rate": 1.4640952544150712e-05,
|
| 10090 |
+
"loss": 2.786,
|
| 10091 |
+
"step": 832000
|
| 10092 |
+
},
|
| 10093 |
+
{
|
| 10094 |
+
"epoch": 11.98,
|
| 10095 |
+
"learning_rate": 1.4589129655881484e-05,
|
| 10096 |
+
"loss": 2.7855,
|
| 10097 |
+
"step": 832500
|
| 10098 |
+
},
|
| 10099 |
+
{
|
| 10100 |
+
"epoch": 11.99,
|
| 10101 |
+
"learning_rate": 1.4537202914128747e-05,
|
| 10102 |
+
"loss": 2.7836,
|
| 10103 |
+
"step": 833000
|
| 10104 |
+
},
|
| 10105 |
+
{
|
| 10106 |
+
"epoch": 12.0,
|
| 10107 |
+
"learning_rate": 1.4485276172376014e-05,
|
| 10108 |
+
"loss": 2.7894,
|
| 10109 |
+
"step": 833500
|
| 10110 |
+
},
|
| 10111 |
+
{
|
| 10112 |
+
"epoch": 12.0,
|
| 10113 |
+
"eval_accuracy": 0.5055095615843279,
|
| 10114 |
+
"eval_loss": 2.6172478199005127,
|
| 10115 |
+
"eval_runtime": 556.0647,
|
| 10116 |
+
"eval_samples_per_second": 969.198,
|
| 10117 |
+
"eval_steps_per_second": 40.384,
|
| 10118 |
+
"step": 833676
|
| 10119 |
}
|
| 10120 |
],
|
| 10121 |
"max_steps": 972622,
|
| 10122 |
"num_train_epochs": 14,
|
| 10123 |
+
"total_flos": 5.126958960674341e+18,
|
| 10124 |
"trial_name": null,
|
| 10125 |
"trial_params": null
|
| 10126 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3853a5a36f581057de1289aa6295c5ae41dbb1a1d29e8d6a716b1229b4dd2ec3
|
| 3 |
+
size 273994
|