Training in progress, step 68000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:855097e18de16f85c46f8b027e1873d375c3a4edc034e8bed8a7f0b58970ad94
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6210b328c6e30eb767412099efb2004508322ff25c3e6056826eba5d995bc2b
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1fe05f5b470f95761cfc3fed3146b8c8e8a912646d05e70e539792b7f745a3f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49ad9d6f5fe6b13eeb9343f8fae928ab75997e82b569c4a8977d808cdc884b1e
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4711ff133c23ad6d8a7643a31e0e727444cc5280990eabd826bfc8c92e7cdf77
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d66ecdc5ab3f9e8ebc655822c33c54e4023463dd04074044db32f0a8095e3378
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:730a17924aec965fee0684191a1f8a93d017e71268086042298dd7299e09c6f3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -23458,6 +23458,356 @@
|
|
| 23458 |
"learning_rate": 0.0004835782880465919,
|
| 23459 |
"loss": 16.6314,
|
| 23460 |
"step": 67000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23461 |
}
|
| 23462 |
],
|
| 23463 |
"logging_steps": 20,
|
|
@@ -23477,7 +23827,7 @@
|
|
| 23477 |
"attributes": {}
|
| 23478 |
}
|
| 23479 |
},
|
| 23480 |
-
"total_flos": 4.
|
| 23481 |
"train_batch_size": 48,
|
| 23482 |
"trial_name": null,
|
| 23483 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1007293993565169,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 68000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 23458 |
"learning_rate": 0.0004835782880465919,
|
| 23459 |
"loss": 16.6314,
|
| 23460 |
"step": 67000
|
| 23461 |
+
},
|
| 23462 |
+
{
|
| 23463 |
+
"epoch": 0.09927771095402592,
|
| 23464 |
+
"grad_norm": 6.46875,
|
| 23465 |
+
"learning_rate": 0.0004835733491115844,
|
| 23466 |
+
"loss": 16.6278,
|
| 23467 |
+
"step": 67020
|
| 23468 |
+
},
|
| 23469 |
+
{
|
| 23470 |
+
"epoch": 0.0993073372479543,
|
| 23471 |
+
"grad_norm": 6.78125,
|
| 23472 |
+
"learning_rate": 0.0004835684101765768,
|
| 23473 |
+
"loss": 16.5956,
|
| 23474 |
+
"step": 67040
|
| 23475 |
+
},
|
| 23476 |
+
{
|
| 23477 |
+
"epoch": 0.09933696354188269,
|
| 23478 |
+
"grad_norm": 6.75,
|
| 23479 |
+
"learning_rate": 0.00048356347124156926,
|
| 23480 |
+
"loss": 16.6496,
|
| 23481 |
+
"step": 67060
|
| 23482 |
+
},
|
| 23483 |
+
{
|
| 23484 |
+
"epoch": 0.09936658983581108,
|
| 23485 |
+
"grad_norm": 6.65625,
|
| 23486 |
+
"learning_rate": 0.00048355853230656166,
|
| 23487 |
+
"loss": 16.6581,
|
| 23488 |
+
"step": 67080
|
| 23489 |
+
},
|
| 23490 |
+
{
|
| 23491 |
+
"epoch": 0.09939621612973946,
|
| 23492 |
+
"grad_norm": 6.4375,
|
| 23493 |
+
"learning_rate": 0.00048355359337155416,
|
| 23494 |
+
"loss": 16.6221,
|
| 23495 |
+
"step": 67100
|
| 23496 |
+
},
|
| 23497 |
+
{
|
| 23498 |
+
"epoch": 0.09942584242366785,
|
| 23499 |
+
"grad_norm": 6.71875,
|
| 23500 |
+
"learning_rate": 0.00048354865443654655,
|
| 23501 |
+
"loss": 16.6772,
|
| 23502 |
+
"step": 67120
|
| 23503 |
+
},
|
| 23504 |
+
{
|
| 23505 |
+
"epoch": 0.09945546871759624,
|
| 23506 |
+
"grad_norm": 7.25,
|
| 23507 |
+
"learning_rate": 0.00048354371550153895,
|
| 23508 |
+
"loss": 16.6104,
|
| 23509 |
+
"step": 67140
|
| 23510 |
+
},
|
| 23511 |
+
{
|
| 23512 |
+
"epoch": 0.09948509501152462,
|
| 23513 |
+
"grad_norm": 6.75,
|
| 23514 |
+
"learning_rate": 0.0004835387765665314,
|
| 23515 |
+
"loss": 16.6313,
|
| 23516 |
+
"step": 67160
|
| 23517 |
+
},
|
| 23518 |
+
{
|
| 23519 |
+
"epoch": 0.09951472130545301,
|
| 23520 |
+
"grad_norm": 6.53125,
|
| 23521 |
+
"learning_rate": 0.00048353383763152384,
|
| 23522 |
+
"loss": 16.6284,
|
| 23523 |
+
"step": 67180
|
| 23524 |
+
},
|
| 23525 |
+
{
|
| 23526 |
+
"epoch": 0.0995443475993814,
|
| 23527 |
+
"grad_norm": 7.6875,
|
| 23528 |
+
"learning_rate": 0.0004835288986965163,
|
| 23529 |
+
"loss": 16.6672,
|
| 23530 |
+
"step": 67200
|
| 23531 |
+
},
|
| 23532 |
+
{
|
| 23533 |
+
"epoch": 0.09957397389330978,
|
| 23534 |
+
"grad_norm": 6.75,
|
| 23535 |
+
"learning_rate": 0.0004835239597615087,
|
| 23536 |
+
"loss": 16.6011,
|
| 23537 |
+
"step": 67220
|
| 23538 |
+
},
|
| 23539 |
+
{
|
| 23540 |
+
"epoch": 0.09960360018723818,
|
| 23541 |
+
"grad_norm": 6.4375,
|
| 23542 |
+
"learning_rate": 0.0004835190208265012,
|
| 23543 |
+
"loss": 16.6609,
|
| 23544 |
+
"step": 67240
|
| 23545 |
+
},
|
| 23546 |
+
{
|
| 23547 |
+
"epoch": 0.09963322648116657,
|
| 23548 |
+
"grad_norm": 7.40625,
|
| 23549 |
+
"learning_rate": 0.0004835140818914936,
|
| 23550 |
+
"loss": 16.6259,
|
| 23551 |
+
"step": 67260
|
| 23552 |
+
},
|
| 23553 |
+
{
|
| 23554 |
+
"epoch": 0.09966285277509496,
|
| 23555 |
+
"grad_norm": 6.8125,
|
| 23556 |
+
"learning_rate": 0.000483509142956486,
|
| 23557 |
+
"loss": 16.6092,
|
| 23558 |
+
"step": 67280
|
| 23559 |
+
},
|
| 23560 |
+
{
|
| 23561 |
+
"epoch": 0.09969247906902334,
|
| 23562 |
+
"grad_norm": 6.3125,
|
| 23563 |
+
"learning_rate": 0.0004835042040214784,
|
| 23564 |
+
"loss": 16.6204,
|
| 23565 |
+
"step": 67300
|
| 23566 |
+
},
|
| 23567 |
+
{
|
| 23568 |
+
"epoch": 0.09972210536295173,
|
| 23569 |
+
"grad_norm": 6.65625,
|
| 23570 |
+
"learning_rate": 0.0004834992650864709,
|
| 23571 |
+
"loss": 16.6558,
|
| 23572 |
+
"step": 67320
|
| 23573 |
+
},
|
| 23574 |
+
{
|
| 23575 |
+
"epoch": 0.09975173165688012,
|
| 23576 |
+
"grad_norm": 6.875,
|
| 23577 |
+
"learning_rate": 0.0004834943261514633,
|
| 23578 |
+
"loss": 16.6027,
|
| 23579 |
+
"step": 67340
|
| 23580 |
+
},
|
| 23581 |
+
{
|
| 23582 |
+
"epoch": 0.0997813579508085,
|
| 23583 |
+
"grad_norm": 6.53125,
|
| 23584 |
+
"learning_rate": 0.00048348938721645576,
|
| 23585 |
+
"loss": 16.5951,
|
| 23586 |
+
"step": 67360
|
| 23587 |
+
},
|
| 23588 |
+
{
|
| 23589 |
+
"epoch": 0.09981098424473689,
|
| 23590 |
+
"grad_norm": 7.1875,
|
| 23591 |
+
"learning_rate": 0.00048348444828144816,
|
| 23592 |
+
"loss": 16.6149,
|
| 23593 |
+
"step": 67380
|
| 23594 |
+
},
|
| 23595 |
+
{
|
| 23596 |
+
"epoch": 0.09984061053866528,
|
| 23597 |
+
"grad_norm": 6.78125,
|
| 23598 |
+
"learning_rate": 0.00048347950934644066,
|
| 23599 |
+
"loss": 16.5772,
|
| 23600 |
+
"step": 67400
|
| 23601 |
+
},
|
| 23602 |
+
{
|
| 23603 |
+
"epoch": 0.09987023683259366,
|
| 23604 |
+
"grad_norm": 6.8125,
|
| 23605 |
+
"learning_rate": 0.00048347457041143305,
|
| 23606 |
+
"loss": 16.6317,
|
| 23607 |
+
"step": 67420
|
| 23608 |
+
},
|
| 23609 |
+
{
|
| 23610 |
+
"epoch": 0.09989986312652205,
|
| 23611 |
+
"grad_norm": 7.0625,
|
| 23612 |
+
"learning_rate": 0.0004834696314764255,
|
| 23613 |
+
"loss": 16.5701,
|
| 23614 |
+
"step": 67440
|
| 23615 |
+
},
|
| 23616 |
+
{
|
| 23617 |
+
"epoch": 0.09992948942045043,
|
| 23618 |
+
"grad_norm": 6.65625,
|
| 23619 |
+
"learning_rate": 0.0004834646925414179,
|
| 23620 |
+
"loss": 16.6727,
|
| 23621 |
+
"step": 67460
|
| 23622 |
+
},
|
| 23623 |
+
{
|
| 23624 |
+
"epoch": 0.09995911571437882,
|
| 23625 |
+
"grad_norm": 6.84375,
|
| 23626 |
+
"learning_rate": 0.00048345975360641034,
|
| 23627 |
+
"loss": 16.6282,
|
| 23628 |
+
"step": 67480
|
| 23629 |
+
},
|
| 23630 |
+
{
|
| 23631 |
+
"epoch": 0.09998874200830721,
|
| 23632 |
+
"grad_norm": 7.59375,
|
| 23633 |
+
"learning_rate": 0.0004834548146714028,
|
| 23634 |
+
"loss": 16.58,
|
| 23635 |
+
"step": 67500
|
| 23636 |
+
},
|
| 23637 |
+
{
|
| 23638 |
+
"epoch": 0.1000183683022356,
|
| 23639 |
+
"grad_norm": 8.375,
|
| 23640 |
+
"learning_rate": 0.0004834498757363952,
|
| 23641 |
+
"loss": 16.657,
|
| 23642 |
+
"step": 67520
|
| 23643 |
+
},
|
| 23644 |
+
{
|
| 23645 |
+
"epoch": 0.100047994596164,
|
| 23646 |
+
"grad_norm": 6.4375,
|
| 23647 |
+
"learning_rate": 0.0004834449368013877,
|
| 23648 |
+
"loss": 16.6031,
|
| 23649 |
+
"step": 67540
|
| 23650 |
+
},
|
| 23651 |
+
{
|
| 23652 |
+
"epoch": 0.10007762089009238,
|
| 23653 |
+
"grad_norm": 6.46875,
|
| 23654 |
+
"learning_rate": 0.0004834399978663801,
|
| 23655 |
+
"loss": 16.5908,
|
| 23656 |
+
"step": 67560
|
| 23657 |
+
},
|
| 23658 |
+
{
|
| 23659 |
+
"epoch": 0.10010724718402077,
|
| 23660 |
+
"grad_norm": 6.875,
|
| 23661 |
+
"learning_rate": 0.0004834350589313725,
|
| 23662 |
+
"loss": 16.5555,
|
| 23663 |
+
"step": 67580
|
| 23664 |
+
},
|
| 23665 |
+
{
|
| 23666 |
+
"epoch": 0.10013687347794915,
|
| 23667 |
+
"grad_norm": 8.375,
|
| 23668 |
+
"learning_rate": 0.0004834301199963649,
|
| 23669 |
+
"loss": 16.6778,
|
| 23670 |
+
"step": 67600
|
| 23671 |
+
},
|
| 23672 |
+
{
|
| 23673 |
+
"epoch": 0.10016649977187754,
|
| 23674 |
+
"grad_norm": 6.90625,
|
| 23675 |
+
"learning_rate": 0.0004834251810613574,
|
| 23676 |
+
"loss": 16.6049,
|
| 23677 |
+
"step": 67620
|
| 23678 |
+
},
|
| 23679 |
+
{
|
| 23680 |
+
"epoch": 0.10019612606580593,
|
| 23681 |
+
"grad_norm": 6.96875,
|
| 23682 |
+
"learning_rate": 0.0004834202421263498,
|
| 23683 |
+
"loss": 16.6337,
|
| 23684 |
+
"step": 67640
|
| 23685 |
+
},
|
| 23686 |
+
{
|
| 23687 |
+
"epoch": 0.10022575235973431,
|
| 23688 |
+
"grad_norm": 7.09375,
|
| 23689 |
+
"learning_rate": 0.00048341530319134226,
|
| 23690 |
+
"loss": 16.6476,
|
| 23691 |
+
"step": 67660
|
| 23692 |
+
},
|
| 23693 |
+
{
|
| 23694 |
+
"epoch": 0.1002553786536627,
|
| 23695 |
+
"grad_norm": 6.03125,
|
| 23696 |
+
"learning_rate": 0.00048341036425633466,
|
| 23697 |
+
"loss": 16.6637,
|
| 23698 |
+
"step": 67680
|
| 23699 |
+
},
|
| 23700 |
+
{
|
| 23701 |
+
"epoch": 0.10028500494759109,
|
| 23702 |
+
"grad_norm": 6.25,
|
| 23703 |
+
"learning_rate": 0.00048340542532132716,
|
| 23704 |
+
"loss": 16.6126,
|
| 23705 |
+
"step": 67700
|
| 23706 |
+
},
|
| 23707 |
+
{
|
| 23708 |
+
"epoch": 0.10031463124151947,
|
| 23709 |
+
"grad_norm": 7.0,
|
| 23710 |
+
"learning_rate": 0.00048340048638631955,
|
| 23711 |
+
"loss": 16.6287,
|
| 23712 |
+
"step": 67720
|
| 23713 |
+
},
|
| 23714 |
+
{
|
| 23715 |
+
"epoch": 0.10034425753544786,
|
| 23716 |
+
"grad_norm": 7.71875,
|
| 23717 |
+
"learning_rate": 0.000483395547451312,
|
| 23718 |
+
"loss": 16.6074,
|
| 23719 |
+
"step": 67740
|
| 23720 |
+
},
|
| 23721 |
+
{
|
| 23722 |
+
"epoch": 0.10037388382937625,
|
| 23723 |
+
"grad_norm": 6.46875,
|
| 23724 |
+
"learning_rate": 0.0004833906085163044,
|
| 23725 |
+
"loss": 16.5914,
|
| 23726 |
+
"step": 67760
|
| 23727 |
+
},
|
| 23728 |
+
{
|
| 23729 |
+
"epoch": 0.10040351012330463,
|
| 23730 |
+
"grad_norm": 6.71875,
|
| 23731 |
+
"learning_rate": 0.0004833856695812969,
|
| 23732 |
+
"loss": 16.6092,
|
| 23733 |
+
"step": 67780
|
| 23734 |
+
},
|
| 23735 |
+
{
|
| 23736 |
+
"epoch": 0.10043313641723302,
|
| 23737 |
+
"grad_norm": 6.5625,
|
| 23738 |
+
"learning_rate": 0.0004833807306462893,
|
| 23739 |
+
"loss": 16.6073,
|
| 23740 |
+
"step": 67800
|
| 23741 |
+
},
|
| 23742 |
+
{
|
| 23743 |
+
"epoch": 0.1004627627111614,
|
| 23744 |
+
"grad_norm": 6.40625,
|
| 23745 |
+
"learning_rate": 0.0004833757917112817,
|
| 23746 |
+
"loss": 16.5553,
|
| 23747 |
+
"step": 67820
|
| 23748 |
+
},
|
| 23749 |
+
{
|
| 23750 |
+
"epoch": 0.10049238900508979,
|
| 23751 |
+
"grad_norm": 6.65625,
|
| 23752 |
+
"learning_rate": 0.0004833708527762742,
|
| 23753 |
+
"loss": 16.6536,
|
| 23754 |
+
"step": 67840
|
| 23755 |
+
},
|
| 23756 |
+
{
|
| 23757 |
+
"epoch": 0.10052201529901819,
|
| 23758 |
+
"grad_norm": 6.5625,
|
| 23759 |
+
"learning_rate": 0.0004833659138412666,
|
| 23760 |
+
"loss": 16.6156,
|
| 23761 |
+
"step": 67860
|
| 23762 |
+
},
|
| 23763 |
+
{
|
| 23764 |
+
"epoch": 0.10055164159294658,
|
| 23765 |
+
"grad_norm": 6.8125,
|
| 23766 |
+
"learning_rate": 0.00048336097490625903,
|
| 23767 |
+
"loss": 16.527,
|
| 23768 |
+
"step": 67880
|
| 23769 |
+
},
|
| 23770 |
+
{
|
| 23771 |
+
"epoch": 0.10058126788687496,
|
| 23772 |
+
"grad_norm": 6.8125,
|
| 23773 |
+
"learning_rate": 0.0004833560359712514,
|
| 23774 |
+
"loss": 16.5762,
|
| 23775 |
+
"step": 67900
|
| 23776 |
+
},
|
| 23777 |
+
{
|
| 23778 |
+
"epoch": 0.10061089418080335,
|
| 23779 |
+
"grad_norm": 6.96875,
|
| 23780 |
+
"learning_rate": 0.0004833510970362439,
|
| 23781 |
+
"loss": 16.5745,
|
| 23782 |
+
"step": 67920
|
| 23783 |
+
},
|
| 23784 |
+
{
|
| 23785 |
+
"epoch": 0.10064052047473174,
|
| 23786 |
+
"grad_norm": 6.84375,
|
| 23787 |
+
"learning_rate": 0.0004833461581012363,
|
| 23788 |
+
"loss": 16.6078,
|
| 23789 |
+
"step": 67940
|
| 23790 |
+
},
|
| 23791 |
+
{
|
| 23792 |
+
"epoch": 0.10067014676866012,
|
| 23793 |
+
"grad_norm": 6.4375,
|
| 23794 |
+
"learning_rate": 0.00048334121916622876,
|
| 23795 |
+
"loss": 16.581,
|
| 23796 |
+
"step": 67960
|
| 23797 |
+
},
|
| 23798 |
+
{
|
| 23799 |
+
"epoch": 0.10069977306258851,
|
| 23800 |
+
"grad_norm": 6.53125,
|
| 23801 |
+
"learning_rate": 0.00048333628023122116,
|
| 23802 |
+
"loss": 16.6142,
|
| 23803 |
+
"step": 67980
|
| 23804 |
+
},
|
| 23805 |
+
{
|
| 23806 |
+
"epoch": 0.1007293993565169,
|
| 23807 |
+
"grad_norm": 6.40625,
|
| 23808 |
+
"learning_rate": 0.00048333134129621366,
|
| 23809 |
+
"loss": 16.5557,
|
| 23810 |
+
"step": 68000
|
| 23811 |
}
|
| 23812 |
],
|
| 23813 |
"logging_steps": 20,
|
|
|
|
| 23827 |
"attributes": {}
|
| 23828 |
}
|
| 23829 |
},
|
| 23830 |
+
"total_flos": 4.999562170735998e+19,
|
| 23831 |
"train_batch_size": 48,
|
| 23832 |
"trial_name": null,
|
| 23833 |
"trial_params": null
|