Training in progress, step 39000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319352826
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa8d092167263760751be7d64325984a75b7703a340e78d50675e80c3268d124
|
| 3 |
size 319352826
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 900372486
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d61ec51d8d7118e96f0dea2c3a25213ed4312020f0758f383f9360478751ced5
|
| 3 |
size 900372486
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa7750a42bfcb857f2a8e98481152c1819f20fb160a29916eafdca1fe7427f50
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50dcdff9fd2af9b5bad6141fb049dadfa5202e5b29d58dd20b462f210d858734
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21f1502aa78044a36b5816c5c55d8e465c4014bfb315f8f493433e3ca45e7ae7
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a17f91aa15ca7b8a7e9989ef3ca40593acdf2c68462ffb083c0202e8a25a1ee
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa403d369b1ca2463cb498c59fd6da0c0d97bf23ae0774f6dd0f5ad165ffa16e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -13308,6 +13308,356 @@
|
|
| 13308 |
"learning_rate": 4.907397438075633e-05,
|
| 13309 |
"loss": 22.7662,
|
| 13310 |
"step": 38000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13311 |
}
|
| 13312 |
],
|
| 13313 |
"logging_steps": 20,
|
|
@@ -13327,7 +13677,7 @@
|
|
| 13327 |
"attributes": {}
|
| 13328 |
}
|
| 13329 |
},
|
| 13330 |
-
"total_flos": 7.
|
| 13331 |
"train_batch_size": 48,
|
| 13332 |
"trial_name": null,
|
| 13333 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.057771273160355276,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 39000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 13308 |
"learning_rate": 4.907397438075633e-05,
|
| 13309 |
"loss": 22.7662,
|
| 13310 |
"step": 38000
|
| 13311 |
+
},
|
| 13312 |
+
{
|
| 13313 |
+
"epoch": 0.0563195847578643,
|
| 13314 |
+
"grad_norm": 7.8125,
|
| 13315 |
+
"learning_rate": 4.9073480487255576e-05,
|
| 13316 |
+
"loss": 22.8013,
|
| 13317 |
+
"step": 38020
|
| 13318 |
+
},
|
| 13319 |
+
{
|
| 13320 |
+
"epoch": 0.056349211051792684,
|
| 13321 |
+
"grad_norm": 8.375,
|
| 13322 |
+
"learning_rate": 4.907298659375481e-05,
|
| 13323 |
+
"loss": 22.7253,
|
| 13324 |
+
"step": 38040
|
| 13325 |
+
},
|
| 13326 |
+
{
|
| 13327 |
+
"epoch": 0.05637883734572107,
|
| 13328 |
+
"grad_norm": 8.0,
|
| 13329 |
+
"learning_rate": 4.907249270025406e-05,
|
| 13330 |
+
"loss": 22.8673,
|
| 13331 |
+
"step": 38060
|
| 13332 |
+
},
|
| 13333 |
+
{
|
| 13334 |
+
"epoch": 0.056408463639649464,
|
| 13335 |
+
"grad_norm": 8.5,
|
| 13336 |
+
"learning_rate": 4.907199880675331e-05,
|
| 13337 |
+
"loss": 22.773,
|
| 13338 |
+
"step": 38080
|
| 13339 |
+
},
|
| 13340 |
+
{
|
| 13341 |
+
"epoch": 0.05643808993357785,
|
| 13342 |
+
"grad_norm": 8.3125,
|
| 13343 |
+
"learning_rate": 4.907150491325255e-05,
|
| 13344 |
+
"loss": 22.7193,
|
| 13345 |
+
"step": 38100
|
| 13346 |
+
},
|
| 13347 |
+
{
|
| 13348 |
+
"epoch": 0.05646771622750624,
|
| 13349 |
+
"grad_norm": 8.1875,
|
| 13350 |
+
"learning_rate": 4.907101101975179e-05,
|
| 13351 |
+
"loss": 22.8162,
|
| 13352 |
+
"step": 38120
|
| 13353 |
+
},
|
| 13354 |
+
{
|
| 13355 |
+
"epoch": 0.056497342521434624,
|
| 13356 |
+
"grad_norm": 8.9375,
|
| 13357 |
+
"learning_rate": 4.907051712625104e-05,
|
| 13358 |
+
"loss": 22.7384,
|
| 13359 |
+
"step": 38140
|
| 13360 |
+
},
|
| 13361 |
+
{
|
| 13362 |
+
"epoch": 0.05652696881536301,
|
| 13363 |
+
"grad_norm": 8.6875,
|
| 13364 |
+
"learning_rate": 4.907002323275028e-05,
|
| 13365 |
+
"loss": 22.7555,
|
| 13366 |
+
"step": 38160
|
| 13367 |
+
},
|
| 13368 |
+
{
|
| 13369 |
+
"epoch": 0.0565565951092914,
|
| 13370 |
+
"grad_norm": 7.3125,
|
| 13371 |
+
"learning_rate": 4.906952933924952e-05,
|
| 13372 |
+
"loss": 22.7421,
|
| 13373 |
+
"step": 38180
|
| 13374 |
+
},
|
| 13375 |
+
{
|
| 13376 |
+
"epoch": 0.05658622140321978,
|
| 13377 |
+
"grad_norm": 8.75,
|
| 13378 |
+
"learning_rate": 4.906903544574876e-05,
|
| 13379 |
+
"loss": 22.7838,
|
| 13380 |
+
"step": 38200
|
| 13381 |
+
},
|
| 13382 |
+
{
|
| 13383 |
+
"epoch": 0.056615847697148176,
|
| 13384 |
+
"grad_norm": 8.4375,
|
| 13385 |
+
"learning_rate": 4.906854155224801e-05,
|
| 13386 |
+
"loss": 22.7588,
|
| 13387 |
+
"step": 38220
|
| 13388 |
+
},
|
| 13389 |
+
{
|
| 13390 |
+
"epoch": 0.05664547399107656,
|
| 13391 |
+
"grad_norm": 7.375,
|
| 13392 |
+
"learning_rate": 4.9068047658747254e-05,
|
| 13393 |
+
"loss": 22.7246,
|
| 13394 |
+
"step": 38240
|
| 13395 |
+
},
|
| 13396 |
+
{
|
| 13397 |
+
"epoch": 0.05667510028500495,
|
| 13398 |
+
"grad_norm": 8.3125,
|
| 13399 |
+
"learning_rate": 4.906755376524649e-05,
|
| 13400 |
+
"loss": 22.7522,
|
| 13401 |
+
"step": 38260
|
| 13402 |
+
},
|
| 13403 |
+
{
|
| 13404 |
+
"epoch": 0.056704726578933336,
|
| 13405 |
+
"grad_norm": 8.875,
|
| 13406 |
+
"learning_rate": 4.906705987174574e-05,
|
| 13407 |
+
"loss": 22.7606,
|
| 13408 |
+
"step": 38280
|
| 13409 |
+
},
|
| 13410 |
+
{
|
| 13411 |
+
"epoch": 0.05673435287286172,
|
| 13412 |
+
"grad_norm": 12.25,
|
| 13413 |
+
"learning_rate": 4.9066565978244984e-05,
|
| 13414 |
+
"loss": 22.7347,
|
| 13415 |
+
"step": 38300
|
| 13416 |
+
},
|
| 13417 |
+
{
|
| 13418 |
+
"epoch": 0.05676397916679011,
|
| 13419 |
+
"grad_norm": 8.8125,
|
| 13420 |
+
"learning_rate": 4.906607208474423e-05,
|
| 13421 |
+
"loss": 22.7115,
|
| 13422 |
+
"step": 38320
|
| 13423 |
+
},
|
| 13424 |
+
{
|
| 13425 |
+
"epoch": 0.056793605460718495,
|
| 13426 |
+
"grad_norm": 9.0,
|
| 13427 |
+
"learning_rate": 4.9065578191243465e-05,
|
| 13428 |
+
"loss": 22.6453,
|
| 13429 |
+
"step": 38340
|
| 13430 |
+
},
|
| 13431 |
+
{
|
| 13432 |
+
"epoch": 0.05682323175464688,
|
| 13433 |
+
"grad_norm": 12.875,
|
| 13434 |
+
"learning_rate": 4.9065084297742715e-05,
|
| 13435 |
+
"loss": 22.7791,
|
| 13436 |
+
"step": 38360
|
| 13437 |
+
},
|
| 13438 |
+
{
|
| 13439 |
+
"epoch": 0.056852858048575275,
|
| 13440 |
+
"grad_norm": 9.75,
|
| 13441 |
+
"learning_rate": 4.906459040424196e-05,
|
| 13442 |
+
"loss": 22.6924,
|
| 13443 |
+
"step": 38380
|
| 13444 |
+
},
|
| 13445 |
+
{
|
| 13446 |
+
"epoch": 0.05688248434250366,
|
| 13447 |
+
"grad_norm": 9.1875,
|
| 13448 |
+
"learning_rate": 4.9064096510741195e-05,
|
| 13449 |
+
"loss": 22.6704,
|
| 13450 |
+
"step": 38400
|
| 13451 |
+
},
|
| 13452 |
+
{
|
| 13453 |
+
"epoch": 0.05691211063643205,
|
| 13454 |
+
"grad_norm": 7.5625,
|
| 13455 |
+
"learning_rate": 4.906360261724044e-05,
|
| 13456 |
+
"loss": 22.7041,
|
| 13457 |
+
"step": 38420
|
| 13458 |
+
},
|
| 13459 |
+
{
|
| 13460 |
+
"epoch": 0.056941736930360434,
|
| 13461 |
+
"grad_norm": 7.59375,
|
| 13462 |
+
"learning_rate": 4.906310872373969e-05,
|
| 13463 |
+
"loss": 22.7381,
|
| 13464 |
+
"step": 38440
|
| 13465 |
+
},
|
| 13466 |
+
{
|
| 13467 |
+
"epoch": 0.05697136322428882,
|
| 13468 |
+
"grad_norm": 8.25,
|
| 13469 |
+
"learning_rate": 4.906261483023893e-05,
|
| 13470 |
+
"loss": 22.8227,
|
| 13471 |
+
"step": 38460
|
| 13472 |
+
},
|
| 13473 |
+
{
|
| 13474 |
+
"epoch": 0.05700098951821721,
|
| 13475 |
+
"grad_norm": 8.4375,
|
| 13476 |
+
"learning_rate": 4.906212093673817e-05,
|
| 13477 |
+
"loss": 22.7251,
|
| 13478 |
+
"step": 38480
|
| 13479 |
+
},
|
| 13480 |
+
{
|
| 13481 |
+
"epoch": 0.057030615812145594,
|
| 13482 |
+
"grad_norm": 7.6875,
|
| 13483 |
+
"learning_rate": 4.906162704323741e-05,
|
| 13484 |
+
"loss": 22.6936,
|
| 13485 |
+
"step": 38500
|
| 13486 |
+
},
|
| 13487 |
+
{
|
| 13488 |
+
"epoch": 0.05706024210607398,
|
| 13489 |
+
"grad_norm": 8.8125,
|
| 13490 |
+
"learning_rate": 4.906113314973666e-05,
|
| 13491 |
+
"loss": 22.7815,
|
| 13492 |
+
"step": 38520
|
| 13493 |
+
},
|
| 13494 |
+
{
|
| 13495 |
+
"epoch": 0.05708986840000237,
|
| 13496 |
+
"grad_norm": 8.875,
|
| 13497 |
+
"learning_rate": 4.90606392562359e-05,
|
| 13498 |
+
"loss": 22.7159,
|
| 13499 |
+
"step": 38540
|
| 13500 |
+
},
|
| 13501 |
+
{
|
| 13502 |
+
"epoch": 0.05711949469393076,
|
| 13503 |
+
"grad_norm": 8.875,
|
| 13504 |
+
"learning_rate": 4.906014536273514e-05,
|
| 13505 |
+
"loss": 22.7325,
|
| 13506 |
+
"step": 38560
|
| 13507 |
+
},
|
| 13508 |
+
{
|
| 13509 |
+
"epoch": 0.057149120987859146,
|
| 13510 |
+
"grad_norm": 8.375,
|
| 13511 |
+
"learning_rate": 4.905965146923439e-05,
|
| 13512 |
+
"loss": 22.7178,
|
| 13513 |
+
"step": 38580
|
| 13514 |
+
},
|
| 13515 |
+
{
|
| 13516 |
+
"epoch": 0.05717874728178753,
|
| 13517 |
+
"grad_norm": 8.3125,
|
| 13518 |
+
"learning_rate": 4.9059157575733636e-05,
|
| 13519 |
+
"loss": 22.7357,
|
| 13520 |
+
"step": 38600
|
| 13521 |
+
},
|
| 13522 |
+
{
|
| 13523 |
+
"epoch": 0.05720837357571592,
|
| 13524 |
+
"grad_norm": 8.0,
|
| 13525 |
+
"learning_rate": 4.905866368223287e-05,
|
| 13526 |
+
"loss": 22.7835,
|
| 13527 |
+
"step": 38620
|
| 13528 |
+
},
|
| 13529 |
+
{
|
| 13530 |
+
"epoch": 0.057237999869644306,
|
| 13531 |
+
"grad_norm": 8.0,
|
| 13532 |
+
"learning_rate": 4.9058169788732116e-05,
|
| 13533 |
+
"loss": 22.7622,
|
| 13534 |
+
"step": 38640
|
| 13535 |
+
},
|
| 13536 |
+
{
|
| 13537 |
+
"epoch": 0.05726762616357269,
|
| 13538 |
+
"grad_norm": 11.0,
|
| 13539 |
+
"learning_rate": 4.9057675895231366e-05,
|
| 13540 |
+
"loss": 22.7288,
|
| 13541 |
+
"step": 38660
|
| 13542 |
+
},
|
| 13543 |
+
{
|
| 13544 |
+
"epoch": 0.05729725245750108,
|
| 13545 |
+
"grad_norm": 8.0,
|
| 13546 |
+
"learning_rate": 4.90571820017306e-05,
|
| 13547 |
+
"loss": 22.8271,
|
| 13548 |
+
"step": 38680
|
| 13549 |
+
},
|
| 13550 |
+
{
|
| 13551 |
+
"epoch": 0.05732687875142947,
|
| 13552 |
+
"grad_norm": 8.8125,
|
| 13553 |
+
"learning_rate": 4.9056688108229846e-05,
|
| 13554 |
+
"loss": 22.8034,
|
| 13555 |
+
"step": 38700
|
| 13556 |
+
},
|
| 13557 |
+
{
|
| 13558 |
+
"epoch": 0.05735650504535786,
|
| 13559 |
+
"grad_norm": 7.75,
|
| 13560 |
+
"learning_rate": 4.905619421472909e-05,
|
| 13561 |
+
"loss": 22.7918,
|
| 13562 |
+
"step": 38720
|
| 13563 |
+
},
|
| 13564 |
+
{
|
| 13565 |
+
"epoch": 0.057386131339286245,
|
| 13566 |
+
"grad_norm": 8.375,
|
| 13567 |
+
"learning_rate": 4.905570032122834e-05,
|
| 13568 |
+
"loss": 22.7912,
|
| 13569 |
+
"step": 38740
|
| 13570 |
+
},
|
| 13571 |
+
{
|
| 13572 |
+
"epoch": 0.05741575763321463,
|
| 13573 |
+
"grad_norm": 7.25,
|
| 13574 |
+
"learning_rate": 4.9055206427727576e-05,
|
| 13575 |
+
"loss": 22.6727,
|
| 13576 |
+
"step": 38760
|
| 13577 |
+
},
|
| 13578 |
+
{
|
| 13579 |
+
"epoch": 0.05744538392714302,
|
| 13580 |
+
"grad_norm": 8.0,
|
| 13581 |
+
"learning_rate": 4.905471253422682e-05,
|
| 13582 |
+
"loss": 22.799,
|
| 13583 |
+
"step": 38780
|
| 13584 |
+
},
|
| 13585 |
+
{
|
| 13586 |
+
"epoch": 0.057475010221071404,
|
| 13587 |
+
"grad_norm": 7.9375,
|
| 13588 |
+
"learning_rate": 4.905421864072606e-05,
|
| 13589 |
+
"loss": 22.7086,
|
| 13590 |
+
"step": 38800
|
| 13591 |
+
},
|
| 13592 |
+
{
|
| 13593 |
+
"epoch": 0.05750463651499979,
|
| 13594 |
+
"grad_norm": 8.6875,
|
| 13595 |
+
"learning_rate": 4.9053724747225314e-05,
|
| 13596 |
+
"loss": 22.7619,
|
| 13597 |
+
"step": 38820
|
| 13598 |
+
},
|
| 13599 |
+
{
|
| 13600 |
+
"epoch": 0.05753426280892818,
|
| 13601 |
+
"grad_norm": 9.5,
|
| 13602 |
+
"learning_rate": 4.905323085372455e-05,
|
| 13603 |
+
"loss": 22.7321,
|
| 13604 |
+
"step": 38840
|
| 13605 |
+
},
|
| 13606 |
+
{
|
| 13607 |
+
"epoch": 0.05756388910285657,
|
| 13608 |
+
"grad_norm": 7.03125,
|
| 13609 |
+
"learning_rate": 4.9052736960223794e-05,
|
| 13610 |
+
"loss": 22.7381,
|
| 13611 |
+
"step": 38860
|
| 13612 |
+
},
|
| 13613 |
+
{
|
| 13614 |
+
"epoch": 0.05759351539678496,
|
| 13615 |
+
"grad_norm": 7.84375,
|
| 13616 |
+
"learning_rate": 4.9052243066723044e-05,
|
| 13617 |
+
"loss": 22.6783,
|
| 13618 |
+
"step": 38880
|
| 13619 |
+
},
|
| 13620 |
+
{
|
| 13621 |
+
"epoch": 0.05762314169071334,
|
| 13622 |
+
"grad_norm": 7.75,
|
| 13623 |
+
"learning_rate": 4.905174917322228e-05,
|
| 13624 |
+
"loss": 22.7077,
|
| 13625 |
+
"step": 38900
|
| 13626 |
+
},
|
| 13627 |
+
{
|
| 13628 |
+
"epoch": 0.05765276798464173,
|
| 13629 |
+
"grad_norm": 8.1875,
|
| 13630 |
+
"learning_rate": 4.9051255279721524e-05,
|
| 13631 |
+
"loss": 22.7116,
|
| 13632 |
+
"step": 38920
|
| 13633 |
+
},
|
| 13634 |
+
{
|
| 13635 |
+
"epoch": 0.057682394278570116,
|
| 13636 |
+
"grad_norm": 8.375,
|
| 13637 |
+
"learning_rate": 4.905076138622077e-05,
|
| 13638 |
+
"loss": 22.6759,
|
| 13639 |
+
"step": 38940
|
| 13640 |
+
},
|
| 13641 |
+
{
|
| 13642 |
+
"epoch": 0.0577120205724985,
|
| 13643 |
+
"grad_norm": 9.25,
|
| 13644 |
+
"learning_rate": 4.905026749272002e-05,
|
| 13645 |
+
"loss": 22.7974,
|
| 13646 |
+
"step": 38960
|
| 13647 |
+
},
|
| 13648 |
+
{
|
| 13649 |
+
"epoch": 0.05774164686642689,
|
| 13650 |
+
"grad_norm": 8.625,
|
| 13651 |
+
"learning_rate": 4.9049773599219254e-05,
|
| 13652 |
+
"loss": 22.7558,
|
| 13653 |
+
"step": 38980
|
| 13654 |
+
},
|
| 13655 |
+
{
|
| 13656 |
+
"epoch": 0.057771273160355276,
|
| 13657 |
+
"grad_norm": 10.5625,
|
| 13658 |
+
"learning_rate": 4.90492797057185e-05,
|
| 13659 |
+
"loss": 22.7169,
|
| 13660 |
+
"step": 39000
|
| 13661 |
}
|
| 13662 |
],
|
| 13663 |
"logging_steps": 20,
|
|
|
|
| 13677 |
"attributes": {}
|
| 13678 |
}
|
| 13679 |
},
|
| 13680 |
+
"total_flos": 7.216003991715447e+19,
|
| 13681 |
"train_batch_size": 48,
|
| 13682 |
"trial_name": null,
|
| 13683 |
"trial_params": null
|