Training in progress, step 82000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fdea62ab3807d5c83f086f5151ea04cbf9ee9578a7b8e17883bf2d371b73c59
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a88a14f277e372edcbef1004c41517572ec49368da6f457c37072a723ef15a5b
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e1adfa857ebdc86ec7fd943675fc57102e813288b2aafc927551a884f4b79c6
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57a30f5d80cfa3dad198a9cbb5668d7cb89aba9aa41f1f44032001d4e5f9fed3
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a1704d8cd0ae1d1da5487260fd6a9d83621f0daf881048aad7559ce485f0af
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:512ff917732395bd5049da89bf880b825c8d71316cd69e7277929f7763966d5b
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13c457672f739f0501828eb93166275a4b9a832449b61ea1951a9a198cb7e4de
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -28358,6 +28358,356 @@
|
|
| 28358 |
"learning_rate": 0.0004801210335412954,
|
| 28359 |
"loss": 16.2376,
|
| 28360 |
"step": 81000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28361 |
}
|
| 28362 |
],
|
| 28363 |
"logging_steps": 20,
|
|
@@ -28377,7 +28727,7 @@
|
|
| 28377 |
"attributes": {}
|
| 28378 |
}
|
| 28379 |
},
|
| 28380 |
-
"total_flos":
|
| 28381 |
"train_batch_size": 48,
|
| 28382 |
"trial_name": null,
|
| 28383 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.12146780510638802,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 82000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 28358 |
"learning_rate": 0.0004801210335412954,
|
| 28359 |
"loss": 16.2376,
|
| 28360 |
"step": 81000
|
| 28361 |
+
},
|
| 28362 |
+
{
|
| 28363 |
+
"epoch": 0.12001611670389704,
|
| 28364 |
+
"grad_norm": 6.78125,
|
| 28365 |
+
"learning_rate": 0.00048011609460628786,
|
| 28366 |
+
"loss": 16.2201,
|
| 28367 |
+
"step": 81020
|
| 28368 |
+
},
|
| 28369 |
+
{
|
| 28370 |
+
"epoch": 0.12004574299782543,
|
| 28371 |
+
"grad_norm": 7.0625,
|
| 28372 |
+
"learning_rate": 0.00048011115567128025,
|
| 28373 |
+
"loss": 16.2081,
|
| 28374 |
+
"step": 81040
|
| 28375 |
+
},
|
| 28376 |
+
{
|
| 28377 |
+
"epoch": 0.12007536929175382,
|
| 28378 |
+
"grad_norm": 6.40625,
|
| 28379 |
+
"learning_rate": 0.00048010621673627276,
|
| 28380 |
+
"loss": 16.1728,
|
| 28381 |
+
"step": 81060
|
| 28382 |
+
},
|
| 28383 |
+
{
|
| 28384 |
+
"epoch": 0.1201049955856822,
|
| 28385 |
+
"grad_norm": 5.96875,
|
| 28386 |
+
"learning_rate": 0.00048010127780126515,
|
| 28387 |
+
"loss": 16.2446,
|
| 28388 |
+
"step": 81080
|
| 28389 |
+
},
|
| 28390 |
+
{
|
| 28391 |
+
"epoch": 0.12013462187961059,
|
| 28392 |
+
"grad_norm": 6.8125,
|
| 28393 |
+
"learning_rate": 0.0004800963388662576,
|
| 28394 |
+
"loss": 16.2341,
|
| 28395 |
+
"step": 81100
|
| 28396 |
+
},
|
| 28397 |
+
{
|
| 28398 |
+
"epoch": 0.12016424817353898,
|
| 28399 |
+
"grad_norm": 7.21875,
|
| 28400 |
+
"learning_rate": 0.00048009139993125004,
|
| 28401 |
+
"loss": 16.2402,
|
| 28402 |
+
"step": 81120
|
| 28403 |
+
},
|
| 28404 |
+
{
|
| 28405 |
+
"epoch": 0.12019387446746736,
|
| 28406 |
+
"grad_norm": 6.3125,
|
| 28407 |
+
"learning_rate": 0.0004800864609962425,
|
| 28408 |
+
"loss": 16.2117,
|
| 28409 |
+
"step": 81140
|
| 28410 |
+
},
|
| 28411 |
+
{
|
| 28412 |
+
"epoch": 0.12022350076139575,
|
| 28413 |
+
"grad_norm": 6.09375,
|
| 28414 |
+
"learning_rate": 0.0004800815220612349,
|
| 28415 |
+
"loss": 16.2817,
|
| 28416 |
+
"step": 81160
|
| 28417 |
+
},
|
| 28418 |
+
{
|
| 28419 |
+
"epoch": 0.12025312705532414,
|
| 28420 |
+
"grad_norm": 6.59375,
|
| 28421 |
+
"learning_rate": 0.00048007658312622733,
|
| 28422 |
+
"loss": 16.2479,
|
| 28423 |
+
"step": 81180
|
| 28424 |
+
},
|
| 28425 |
+
{
|
| 28426 |
+
"epoch": 0.12028275334925252,
|
| 28427 |
+
"grad_norm": 7.03125,
|
| 28428 |
+
"learning_rate": 0.0004800716441912198,
|
| 28429 |
+
"loss": 16.2402,
|
| 28430 |
+
"step": 81200
|
| 28431 |
+
},
|
| 28432 |
+
{
|
| 28433 |
+
"epoch": 0.12031237964318092,
|
| 28434 |
+
"grad_norm": 6.59375,
|
| 28435 |
+
"learning_rate": 0.00048006670525621223,
|
| 28436 |
+
"loss": 16.2495,
|
| 28437 |
+
"step": 81220
|
| 28438 |
+
},
|
| 28439 |
+
{
|
| 28440 |
+
"epoch": 0.12034200593710931,
|
| 28441 |
+
"grad_norm": 6.75,
|
| 28442 |
+
"learning_rate": 0.0004800617663212046,
|
| 28443 |
+
"loss": 16.2373,
|
| 28444 |
+
"step": 81240
|
| 28445 |
+
},
|
| 28446 |
+
{
|
| 28447 |
+
"epoch": 0.1203716322310377,
|
| 28448 |
+
"grad_norm": 7.0625,
|
| 28449 |
+
"learning_rate": 0.00048005682738619707,
|
| 28450 |
+
"loss": 16.1696,
|
| 28451 |
+
"step": 81260
|
| 28452 |
+
},
|
| 28453 |
+
{
|
| 28454 |
+
"epoch": 0.12040125852496608,
|
| 28455 |
+
"grad_norm": 6.375,
|
| 28456 |
+
"learning_rate": 0.0004800518884511895,
|
| 28457 |
+
"loss": 16.1894,
|
| 28458 |
+
"step": 81280
|
| 28459 |
+
},
|
| 28460 |
+
{
|
| 28461 |
+
"epoch": 0.12043088481889447,
|
| 28462 |
+
"grad_norm": 7.0,
|
| 28463 |
+
"learning_rate": 0.00048004694951618197,
|
| 28464 |
+
"loss": 16.214,
|
| 28465 |
+
"step": 81300
|
| 28466 |
+
},
|
| 28467 |
+
{
|
| 28468 |
+
"epoch": 0.12046051111282285,
|
| 28469 |
+
"grad_norm": 7.125,
|
| 28470 |
+
"learning_rate": 0.00048004201058117436,
|
| 28471 |
+
"loss": 16.2855,
|
| 28472 |
+
"step": 81320
|
| 28473 |
+
},
|
| 28474 |
+
{
|
| 28475 |
+
"epoch": 0.12049013740675124,
|
| 28476 |
+
"grad_norm": 6.84375,
|
| 28477 |
+
"learning_rate": 0.00048003707164616675,
|
| 28478 |
+
"loss": 16.2033,
|
| 28479 |
+
"step": 81340
|
| 28480 |
+
},
|
| 28481 |
+
{
|
| 28482 |
+
"epoch": 0.12051976370067963,
|
| 28483 |
+
"grad_norm": 7.25,
|
| 28484 |
+
"learning_rate": 0.00048003213271115926,
|
| 28485 |
+
"loss": 16.1961,
|
| 28486 |
+
"step": 81360
|
| 28487 |
+
},
|
| 28488 |
+
{
|
| 28489 |
+
"epoch": 0.12054938999460801,
|
| 28490 |
+
"grad_norm": 9.8125,
|
| 28491 |
+
"learning_rate": 0.00048002719377615165,
|
| 28492 |
+
"loss": 16.1959,
|
| 28493 |
+
"step": 81380
|
| 28494 |
+
},
|
| 28495 |
+
{
|
| 28496 |
+
"epoch": 0.1205790162885364,
|
| 28497 |
+
"grad_norm": 6.0625,
|
| 28498 |
+
"learning_rate": 0.0004800222548411441,
|
| 28499 |
+
"loss": 16.2231,
|
| 28500 |
+
"step": 81400
|
| 28501 |
+
},
|
| 28502 |
+
{
|
| 28503 |
+
"epoch": 0.12060864258246479,
|
| 28504 |
+
"grad_norm": 6.6875,
|
| 28505 |
+
"learning_rate": 0.00048001731590613655,
|
| 28506 |
+
"loss": 16.206,
|
| 28507 |
+
"step": 81420
|
| 28508 |
+
},
|
| 28509 |
+
{
|
| 28510 |
+
"epoch": 0.12063826887639317,
|
| 28511 |
+
"grad_norm": 6.375,
|
| 28512 |
+
"learning_rate": 0.000480012376971129,
|
| 28513 |
+
"loss": 16.2459,
|
| 28514 |
+
"step": 81440
|
| 28515 |
+
},
|
| 28516 |
+
{
|
| 28517 |
+
"epoch": 0.12066789517032156,
|
| 28518 |
+
"grad_norm": 5.90625,
|
| 28519 |
+
"learning_rate": 0.0004800074380361214,
|
| 28520 |
+
"loss": 16.2108,
|
| 28521 |
+
"step": 81460
|
| 28522 |
+
},
|
| 28523 |
+
{
|
| 28524 |
+
"epoch": 0.12069752146424995,
|
| 28525 |
+
"grad_norm": 6.3125,
|
| 28526 |
+
"learning_rate": 0.00048000249910111383,
|
| 28527 |
+
"loss": 16.2263,
|
| 28528 |
+
"step": 81480
|
| 28529 |
+
},
|
| 28530 |
+
{
|
| 28531 |
+
"epoch": 0.12072714775817833,
|
| 28532 |
+
"grad_norm": 6.65625,
|
| 28533 |
+
"learning_rate": 0.0004799975601661063,
|
| 28534 |
+
"loss": 16.2835,
|
| 28535 |
+
"step": 81500
|
| 28536 |
+
},
|
| 28537 |
+
{
|
| 28538 |
+
"epoch": 0.12075677405210672,
|
| 28539 |
+
"grad_norm": 6.625,
|
| 28540 |
+
"learning_rate": 0.00047999262123109873,
|
| 28541 |
+
"loss": 16.246,
|
| 28542 |
+
"step": 81520
|
| 28543 |
+
},
|
| 28544 |
+
{
|
| 28545 |
+
"epoch": 0.12078640034603512,
|
| 28546 |
+
"grad_norm": 7.71875,
|
| 28547 |
+
"learning_rate": 0.0004799876822960911,
|
| 28548 |
+
"loss": 16.2665,
|
| 28549 |
+
"step": 81540
|
| 28550 |
+
},
|
| 28551 |
+
{
|
| 28552 |
+
"epoch": 0.1208160266399635,
|
| 28553 |
+
"grad_norm": 6.46875,
|
| 28554 |
+
"learning_rate": 0.00047998274336108357,
|
| 28555 |
+
"loss": 16.2341,
|
| 28556 |
+
"step": 81560
|
| 28557 |
+
},
|
| 28558 |
+
{
|
| 28559 |
+
"epoch": 0.12084565293389189,
|
| 28560 |
+
"grad_norm": 6.84375,
|
| 28561 |
+
"learning_rate": 0.000479977804426076,
|
| 28562 |
+
"loss": 16.2343,
|
| 28563 |
+
"step": 81580
|
| 28564 |
+
},
|
| 28565 |
+
{
|
| 28566 |
+
"epoch": 0.12087527922782028,
|
| 28567 |
+
"grad_norm": 6.375,
|
| 28568 |
+
"learning_rate": 0.00047997286549106847,
|
| 28569 |
+
"loss": 16.1953,
|
| 28570 |
+
"step": 81600
|
| 28571 |
+
},
|
| 28572 |
+
{
|
| 28573 |
+
"epoch": 0.12090490552174867,
|
| 28574 |
+
"grad_norm": 8.125,
|
| 28575 |
+
"learning_rate": 0.00047996792655606086,
|
| 28576 |
+
"loss": 16.2518,
|
| 28577 |
+
"step": 81620
|
| 28578 |
+
},
|
| 28579 |
+
{
|
| 28580 |
+
"epoch": 0.12093453181567705,
|
| 28581 |
+
"grad_norm": 8.0625,
|
| 28582 |
+
"learning_rate": 0.0004799629876210533,
|
| 28583 |
+
"loss": 16.1682,
|
| 28584 |
+
"step": 81640
|
| 28585 |
+
},
|
| 28586 |
+
{
|
| 28587 |
+
"epoch": 0.12096415810960544,
|
| 28588 |
+
"grad_norm": 6.59375,
|
| 28589 |
+
"learning_rate": 0.00047995804868604576,
|
| 28590 |
+
"loss": 16.2499,
|
| 28591 |
+
"step": 81660
|
| 28592 |
+
},
|
| 28593 |
+
{
|
| 28594 |
+
"epoch": 0.12099378440353382,
|
| 28595 |
+
"grad_norm": 9.5,
|
| 28596 |
+
"learning_rate": 0.00047995310975103815,
|
| 28597 |
+
"loss": 16.2143,
|
| 28598 |
+
"step": 81680
|
| 28599 |
+
},
|
| 28600 |
+
{
|
| 28601 |
+
"epoch": 0.12102341069746221,
|
| 28602 |
+
"grad_norm": 6.59375,
|
| 28603 |
+
"learning_rate": 0.0004799481708160306,
|
| 28604 |
+
"loss": 16.178,
|
| 28605 |
+
"step": 81700
|
| 28606 |
+
},
|
| 28607 |
+
{
|
| 28608 |
+
"epoch": 0.1210530369913906,
|
| 28609 |
+
"grad_norm": 7.03125,
|
| 28610 |
+
"learning_rate": 0.00047994323188102305,
|
| 28611 |
+
"loss": 16.2018,
|
| 28612 |
+
"step": 81720
|
| 28613 |
+
},
|
| 28614 |
+
{
|
| 28615 |
+
"epoch": 0.12108266328531898,
|
| 28616 |
+
"grad_norm": 6.9375,
|
| 28617 |
+
"learning_rate": 0.0004799382929460155,
|
| 28618 |
+
"loss": 16.2976,
|
| 28619 |
+
"step": 81740
|
| 28620 |
+
},
|
| 28621 |
+
{
|
| 28622 |
+
"epoch": 0.12111228957924737,
|
| 28623 |
+
"grad_norm": 6.4375,
|
| 28624 |
+
"learning_rate": 0.0004799333540110079,
|
| 28625 |
+
"loss": 16.2988,
|
| 28626 |
+
"step": 81760
|
| 28627 |
+
},
|
| 28628 |
+
{
|
| 28629 |
+
"epoch": 0.12114191587317576,
|
| 28630 |
+
"grad_norm": 8.8125,
|
| 28631 |
+
"learning_rate": 0.00047992841507600033,
|
| 28632 |
+
"loss": 16.2317,
|
| 28633 |
+
"step": 81780
|
| 28634 |
+
},
|
| 28635 |
+
{
|
| 28636 |
+
"epoch": 0.12117154216710414,
|
| 28637 |
+
"grad_norm": 7.6875,
|
| 28638 |
+
"learning_rate": 0.0004799234761409928,
|
| 28639 |
+
"loss": 16.1929,
|
| 28640 |
+
"step": 81800
|
| 28641 |
+
},
|
| 28642 |
+
{
|
| 28643 |
+
"epoch": 0.12120116846103253,
|
| 28644 |
+
"grad_norm": 6.1875,
|
| 28645 |
+
"learning_rate": 0.00047991853720598523,
|
| 28646 |
+
"loss": 16.1955,
|
| 28647 |
+
"step": 81820
|
| 28648 |
+
},
|
| 28649 |
+
{
|
| 28650 |
+
"epoch": 0.12123079475496092,
|
| 28651 |
+
"grad_norm": 6.09375,
|
| 28652 |
+
"learning_rate": 0.0004799135982709776,
|
| 28653 |
+
"loss": 16.2418,
|
| 28654 |
+
"step": 81840
|
| 28655 |
+
},
|
| 28656 |
+
{
|
| 28657 |
+
"epoch": 0.12126042104888932,
|
| 28658 |
+
"grad_norm": 6.90625,
|
| 28659 |
+
"learning_rate": 0.00047990865933597007,
|
| 28660 |
+
"loss": 16.218,
|
| 28661 |
+
"step": 81860
|
| 28662 |
+
},
|
| 28663 |
+
{
|
| 28664 |
+
"epoch": 0.1212900473428177,
|
| 28665 |
+
"grad_norm": 9.5625,
|
| 28666 |
+
"learning_rate": 0.0004799037204009625,
|
| 28667 |
+
"loss": 16.265,
|
| 28668 |
+
"step": 81880
|
| 28669 |
+
},
|
| 28670 |
+
{
|
| 28671 |
+
"epoch": 0.12131967363674609,
|
| 28672 |
+
"grad_norm": 6.78125,
|
| 28673 |
+
"learning_rate": 0.00047989878146595497,
|
| 28674 |
+
"loss": 16.2145,
|
| 28675 |
+
"step": 81900
|
| 28676 |
+
},
|
| 28677 |
+
{
|
| 28678 |
+
"epoch": 0.12134929993067448,
|
| 28679 |
+
"grad_norm": 6.59375,
|
| 28680 |
+
"learning_rate": 0.00047989384253094736,
|
| 28681 |
+
"loss": 16.1485,
|
| 28682 |
+
"step": 81920
|
| 28683 |
+
},
|
| 28684 |
+
{
|
| 28685 |
+
"epoch": 0.12137892622460286,
|
| 28686 |
+
"grad_norm": 6.40625,
|
| 28687 |
+
"learning_rate": 0.0004798889035959398,
|
| 28688 |
+
"loss": 16.2334,
|
| 28689 |
+
"step": 81940
|
| 28690 |
+
},
|
| 28691 |
+
{
|
| 28692 |
+
"epoch": 0.12140855251853125,
|
| 28693 |
+
"grad_norm": 7.5625,
|
| 28694 |
+
"learning_rate": 0.00047988396466093226,
|
| 28695 |
+
"loss": 16.1556,
|
| 28696 |
+
"step": 81960
|
| 28697 |
+
},
|
| 28698 |
+
{
|
| 28699 |
+
"epoch": 0.12143817881245964,
|
| 28700 |
+
"grad_norm": 6.9375,
|
| 28701 |
+
"learning_rate": 0.0004798790257259247,
|
| 28702 |
+
"loss": 16.1977,
|
| 28703 |
+
"step": 81980
|
| 28704 |
+
},
|
| 28705 |
+
{
|
| 28706 |
+
"epoch": 0.12146780510638802,
|
| 28707 |
+
"grad_norm": 6.625,
|
| 28708 |
+
"learning_rate": 0.0004798740867909171,
|
| 28709 |
+
"loss": 16.2152,
|
| 28710 |
+
"step": 82000
|
| 28711 |
}
|
| 28712 |
],
|
| 28713 |
"logging_steps": 20,
|
|
|
|
| 28727 |
"attributes": {}
|
| 28728 |
}
|
| 28729 |
},
|
| 28730 |
+
"total_flos": 6.02907206476586e+19,
|
| 28731 |
"train_batch_size": 48,
|
| 28732 |
"trial_name": null,
|
| 28733 |
"trial_params": null
|