Training in progress, step 77000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:456f25e6949b3d7d11abfc7016f20c519ec2224b5939a593f25e94ab538895d0
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:583ce6afe4697cbc9bee02b6ce9c574f4dc14c85f97be8a32cab3b7f02347cff
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d61a2372056f2b32f6ae2b2c7745d9d5c6ac967a32622f315e73c700a55b59c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0305d043d3c89d1352e924dd8f0e87b43a3b6eaaaf9859b3bc689a1146bd169b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:974b9922d6267aa5fa0a64e6a68535833054f08cce85f66dc5aaf99a834c1951
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cc97b8ced844bab48e2e6688594701ea9aba44b688ce227a136172614ec21f5
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd8038700a783d3dbbe90c65ffd9f9176aad3cbeda38c8a7508ae0b5dcd99468
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -26608,6 +26608,356 @@
|
|
| 26608 |
"learning_rate": 0.00048135576729318704,
|
| 26609 |
"loss": 16.3442,
|
| 26610 |
"step": 76000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26611 |
}
|
| 26612 |
],
|
| 26613 |
"logging_steps": 20,
|
|
@@ -26627,7 +26977,7 @@
|
|
| 26627 |
"attributes": {}
|
| 26628 |
}
|
| 26629 |
},
|
| 26630 |
-
"total_flos": 5.
|
| 26631 |
"train_batch_size": 48,
|
| 26632 |
"trial_name": null,
|
| 26633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11406123162429119,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 77000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 26608 |
"learning_rate": 0.00048135576729318704,
|
| 26609 |
"loss": 16.3442,
|
| 26610 |
"step": 76000
|
| 26611 |
+
},
|
| 26612 |
+
{
|
| 26613 |
+
"epoch": 0.11260954322180021,
|
| 26614 |
+
"grad_norm": 6.1875,
|
| 26615 |
+
"learning_rate": 0.0004813508283581795,
|
| 26616 |
+
"loss": 16.3857,
|
| 26617 |
+
"step": 76020
|
| 26618 |
+
},
|
| 26619 |
+
{
|
| 26620 |
+
"epoch": 0.1126391695157286,
|
| 26621 |
+
"grad_norm": 6.8125,
|
| 26622 |
+
"learning_rate": 0.00048134588942317193,
|
| 26623 |
+
"loss": 16.4228,
|
| 26624 |
+
"step": 76040
|
| 26625 |
+
},
|
| 26626 |
+
{
|
| 26627 |
+
"epoch": 0.11266879580965698,
|
| 26628 |
+
"grad_norm": 6.6875,
|
| 26629 |
+
"learning_rate": 0.0004813409504881643,
|
| 26630 |
+
"loss": 16.4156,
|
| 26631 |
+
"step": 76060
|
| 26632 |
+
},
|
| 26633 |
+
{
|
| 26634 |
+
"epoch": 0.11269842210358537,
|
| 26635 |
+
"grad_norm": 7.40625,
|
| 26636 |
+
"learning_rate": 0.0004813360115531568,
|
| 26637 |
+
"loss": 16.3843,
|
| 26638 |
+
"step": 76080
|
| 26639 |
+
},
|
| 26640 |
+
{
|
| 26641 |
+
"epoch": 0.11272804839751376,
|
| 26642 |
+
"grad_norm": 6.53125,
|
| 26643 |
+
"learning_rate": 0.0004813310726181492,
|
| 26644 |
+
"loss": 16.3459,
|
| 26645 |
+
"step": 76100
|
| 26646 |
+
},
|
| 26647 |
+
{
|
| 26648 |
+
"epoch": 0.11275767469144214,
|
| 26649 |
+
"grad_norm": 7.09375,
|
| 26650 |
+
"learning_rate": 0.00048132613368314167,
|
| 26651 |
+
"loss": 16.3973,
|
| 26652 |
+
"step": 76120
|
| 26653 |
+
},
|
| 26654 |
+
{
|
| 26655 |
+
"epoch": 0.11278730098537054,
|
| 26656 |
+
"grad_norm": 8.1875,
|
| 26657 |
+
"learning_rate": 0.00048132119474813406,
|
| 26658 |
+
"loss": 16.3245,
|
| 26659 |
+
"step": 76140
|
| 26660 |
+
},
|
| 26661 |
+
{
|
| 26662 |
+
"epoch": 0.11281692727929893,
|
| 26663 |
+
"grad_norm": 7.65625,
|
| 26664 |
+
"learning_rate": 0.0004813162558131265,
|
| 26665 |
+
"loss": 16.3705,
|
| 26666 |
+
"step": 76160
|
| 26667 |
+
},
|
| 26668 |
+
{
|
| 26669 |
+
"epoch": 0.11284655357322732,
|
| 26670 |
+
"grad_norm": 6.8125,
|
| 26671 |
+
"learning_rate": 0.00048131131687811896,
|
| 26672 |
+
"loss": 16.3244,
|
| 26673 |
+
"step": 76180
|
| 26674 |
+
},
|
| 26675 |
+
{
|
| 26676 |
+
"epoch": 0.1128761798671557,
|
| 26677 |
+
"grad_norm": 7.0625,
|
| 26678 |
+
"learning_rate": 0.0004813063779431114,
|
| 26679 |
+
"loss": 16.3466,
|
| 26680 |
+
"step": 76200
|
| 26681 |
+
},
|
| 26682 |
+
{
|
| 26683 |
+
"epoch": 0.11290580616108409,
|
| 26684 |
+
"grad_norm": 6.28125,
|
| 26685 |
+
"learning_rate": 0.0004813014390081038,
|
| 26686 |
+
"loss": 16.345,
|
| 26687 |
+
"step": 76220
|
| 26688 |
+
},
|
| 26689 |
+
{
|
| 26690 |
+
"epoch": 0.11293543245501247,
|
| 26691 |
+
"grad_norm": 6.46875,
|
| 26692 |
+
"learning_rate": 0.00048129650007309625,
|
| 26693 |
+
"loss": 16.3821,
|
| 26694 |
+
"step": 76240
|
| 26695 |
+
},
|
| 26696 |
+
{
|
| 26697 |
+
"epoch": 0.11296505874894086,
|
| 26698 |
+
"grad_norm": 6.78125,
|
| 26699 |
+
"learning_rate": 0.0004812915611380887,
|
| 26700 |
+
"loss": 16.3541,
|
| 26701 |
+
"step": 76260
|
| 26702 |
+
},
|
| 26703 |
+
{
|
| 26704 |
+
"epoch": 0.11299468504286925,
|
| 26705 |
+
"grad_norm": 7.0,
|
| 26706 |
+
"learning_rate": 0.0004812866222030811,
|
| 26707 |
+
"loss": 16.2916,
|
| 26708 |
+
"step": 76280
|
| 26709 |
+
},
|
| 26710 |
+
{
|
| 26711 |
+
"epoch": 0.11302431133679763,
|
| 26712 |
+
"grad_norm": 6.59375,
|
| 26713 |
+
"learning_rate": 0.00048128168326807354,
|
| 26714 |
+
"loss": 16.2938,
|
| 26715 |
+
"step": 76300
|
| 26716 |
+
},
|
| 26717 |
+
{
|
| 26718 |
+
"epoch": 0.11305393763072602,
|
| 26719 |
+
"grad_norm": 6.5,
|
| 26720 |
+
"learning_rate": 0.000481276744333066,
|
| 26721 |
+
"loss": 16.4026,
|
| 26722 |
+
"step": 76320
|
| 26723 |
+
},
|
| 26724 |
+
{
|
| 26725 |
+
"epoch": 0.1130835639246544,
|
| 26726 |
+
"grad_norm": 6.34375,
|
| 26727 |
+
"learning_rate": 0.00048127180539805843,
|
| 26728 |
+
"loss": 16.3102,
|
| 26729 |
+
"step": 76340
|
| 26730 |
+
},
|
| 26731 |
+
{
|
| 26732 |
+
"epoch": 0.1131131902185828,
|
| 26733 |
+
"grad_norm": 6.71875,
|
| 26734 |
+
"learning_rate": 0.0004812668664630508,
|
| 26735 |
+
"loss": 16.338,
|
| 26736 |
+
"step": 76360
|
| 26737 |
+
},
|
| 26738 |
+
{
|
| 26739 |
+
"epoch": 0.11314281651251118,
|
| 26740 |
+
"grad_norm": 6.0,
|
| 26741 |
+
"learning_rate": 0.0004812619275280433,
|
| 26742 |
+
"loss": 16.3304,
|
| 26743 |
+
"step": 76380
|
| 26744 |
+
},
|
| 26745 |
+
{
|
| 26746 |
+
"epoch": 0.11317244280643957,
|
| 26747 |
+
"grad_norm": 6.4375,
|
| 26748 |
+
"learning_rate": 0.0004812569885930357,
|
| 26749 |
+
"loss": 16.353,
|
| 26750 |
+
"step": 76400
|
| 26751 |
+
},
|
| 26752 |
+
{
|
| 26753 |
+
"epoch": 0.11320206910036795,
|
| 26754 |
+
"grad_norm": 7.03125,
|
| 26755 |
+
"learning_rate": 0.00048125204965802817,
|
| 26756 |
+
"loss": 16.3146,
|
| 26757 |
+
"step": 76420
|
| 26758 |
+
},
|
| 26759 |
+
{
|
| 26760 |
+
"epoch": 0.11323169539429635,
|
| 26761 |
+
"grad_norm": 7.125,
|
| 26762 |
+
"learning_rate": 0.00048124711072302056,
|
| 26763 |
+
"loss": 16.3556,
|
| 26764 |
+
"step": 76440
|
| 26765 |
+
},
|
| 26766 |
+
{
|
| 26767 |
+
"epoch": 0.11326132168822474,
|
| 26768 |
+
"grad_norm": 7.03125,
|
| 26769 |
+
"learning_rate": 0.000481242171788013,
|
| 26770 |
+
"loss": 16.4044,
|
| 26771 |
+
"step": 76460
|
| 26772 |
+
},
|
| 26773 |
+
{
|
| 26774 |
+
"epoch": 0.11329094798215313,
|
| 26775 |
+
"grad_norm": 6.125,
|
| 26776 |
+
"learning_rate": 0.00048123723285300546,
|
| 26777 |
+
"loss": 16.2708,
|
| 26778 |
+
"step": 76480
|
| 26779 |
+
},
|
| 26780 |
+
{
|
| 26781 |
+
"epoch": 0.11332057427608151,
|
| 26782 |
+
"grad_norm": 6.96875,
|
| 26783 |
+
"learning_rate": 0.0004812322939179979,
|
| 26784 |
+
"loss": 16.3705,
|
| 26785 |
+
"step": 76500
|
| 26786 |
+
},
|
| 26787 |
+
{
|
| 26788 |
+
"epoch": 0.1133502005700099,
|
| 26789 |
+
"grad_norm": 6.46875,
|
| 26790 |
+
"learning_rate": 0.0004812273549829903,
|
| 26791 |
+
"loss": 16.3579,
|
| 26792 |
+
"step": 76520
|
| 26793 |
+
},
|
| 26794 |
+
{
|
| 26795 |
+
"epoch": 0.11337982686393829,
|
| 26796 |
+
"grad_norm": 9.4375,
|
| 26797 |
+
"learning_rate": 0.00048122241604798275,
|
| 26798 |
+
"loss": 16.3504,
|
| 26799 |
+
"step": 76540
|
| 26800 |
+
},
|
| 26801 |
+
{
|
| 26802 |
+
"epoch": 0.11340945315786667,
|
| 26803 |
+
"grad_norm": 6.84375,
|
| 26804 |
+
"learning_rate": 0.0004812174771129752,
|
| 26805 |
+
"loss": 16.3338,
|
| 26806 |
+
"step": 76560
|
| 26807 |
+
},
|
| 26808 |
+
{
|
| 26809 |
+
"epoch": 0.11343907945179506,
|
| 26810 |
+
"grad_norm": 7.75,
|
| 26811 |
+
"learning_rate": 0.00048121253817796764,
|
| 26812 |
+
"loss": 16.337,
|
| 26813 |
+
"step": 76580
|
| 26814 |
+
},
|
| 26815 |
+
{
|
| 26816 |
+
"epoch": 0.11346870574572344,
|
| 26817 |
+
"grad_norm": 6.1875,
|
| 26818 |
+
"learning_rate": 0.00048120759924296004,
|
| 26819 |
+
"loss": 16.3507,
|
| 26820 |
+
"step": 76600
|
| 26821 |
+
},
|
| 26822 |
+
{
|
| 26823 |
+
"epoch": 0.11349833203965183,
|
| 26824 |
+
"grad_norm": 6.0,
|
| 26825 |
+
"learning_rate": 0.0004812026603079525,
|
| 26826 |
+
"loss": 16.311,
|
| 26827 |
+
"step": 76620
|
| 26828 |
+
},
|
| 26829 |
+
{
|
| 26830 |
+
"epoch": 0.11352795833358022,
|
| 26831 |
+
"grad_norm": 6.15625,
|
| 26832 |
+
"learning_rate": 0.00048119772137294493,
|
| 26833 |
+
"loss": 16.3014,
|
| 26834 |
+
"step": 76640
|
| 26835 |
+
},
|
| 26836 |
+
{
|
| 26837 |
+
"epoch": 0.1135575846275086,
|
| 26838 |
+
"grad_norm": 7.1875,
|
| 26839 |
+
"learning_rate": 0.00048119278243793733,
|
| 26840 |
+
"loss": 16.3813,
|
| 26841 |
+
"step": 76660
|
| 26842 |
+
},
|
| 26843 |
+
{
|
| 26844 |
+
"epoch": 0.11358721092143699,
|
| 26845 |
+
"grad_norm": 6.375,
|
| 26846 |
+
"learning_rate": 0.0004811878435029298,
|
| 26847 |
+
"loss": 16.3528,
|
| 26848 |
+
"step": 76680
|
| 26849 |
+
},
|
| 26850 |
+
{
|
| 26851 |
+
"epoch": 0.11361683721536538,
|
| 26852 |
+
"grad_norm": 7.34375,
|
| 26853 |
+
"learning_rate": 0.0004811829045679222,
|
| 26854 |
+
"loss": 16.308,
|
| 26855 |
+
"step": 76700
|
| 26856 |
+
},
|
| 26857 |
+
{
|
| 26858 |
+
"epoch": 0.11364646350929376,
|
| 26859 |
+
"grad_norm": 6.28125,
|
| 26860 |
+
"learning_rate": 0.00048117796563291467,
|
| 26861 |
+
"loss": 16.3711,
|
| 26862 |
+
"step": 76720
|
| 26863 |
+
},
|
| 26864 |
+
{
|
| 26865 |
+
"epoch": 0.11367608980322215,
|
| 26866 |
+
"grad_norm": 6.90625,
|
| 26867 |
+
"learning_rate": 0.00048117302669790706,
|
| 26868 |
+
"loss": 16.2906,
|
| 26869 |
+
"step": 76740
|
| 26870 |
+
},
|
| 26871 |
+
{
|
| 26872 |
+
"epoch": 0.11370571609715055,
|
| 26873 |
+
"grad_norm": 6.8125,
|
| 26874 |
+
"learning_rate": 0.0004811680877628995,
|
| 26875 |
+
"loss": 16.3626,
|
| 26876 |
+
"step": 76760
|
| 26877 |
+
},
|
| 26878 |
+
{
|
| 26879 |
+
"epoch": 0.11373534239107894,
|
| 26880 |
+
"grad_norm": 5.75,
|
| 26881 |
+
"learning_rate": 0.00048116314882789196,
|
| 26882 |
+
"loss": 16.2853,
|
| 26883 |
+
"step": 76780
|
| 26884 |
+
},
|
| 26885 |
+
{
|
| 26886 |
+
"epoch": 0.11376496868500732,
|
| 26887 |
+
"grad_norm": 7.0625,
|
| 26888 |
+
"learning_rate": 0.0004811582098928844,
|
| 26889 |
+
"loss": 16.3697,
|
| 26890 |
+
"step": 76800
|
| 26891 |
+
},
|
| 26892 |
+
{
|
| 26893 |
+
"epoch": 0.11379459497893571,
|
| 26894 |
+
"grad_norm": 6.28125,
|
| 26895 |
+
"learning_rate": 0.0004811532709578768,
|
| 26896 |
+
"loss": 16.3249,
|
| 26897 |
+
"step": 76820
|
| 26898 |
+
},
|
| 26899 |
+
{
|
| 26900 |
+
"epoch": 0.1138242212728641,
|
| 26901 |
+
"grad_norm": 7.59375,
|
| 26902 |
+
"learning_rate": 0.00048114833202286925,
|
| 26903 |
+
"loss": 16.3022,
|
| 26904 |
+
"step": 76840
|
| 26905 |
+
},
|
| 26906 |
+
{
|
| 26907 |
+
"epoch": 0.11385384756679248,
|
| 26908 |
+
"grad_norm": 7.375,
|
| 26909 |
+
"learning_rate": 0.0004811433930878617,
|
| 26910 |
+
"loss": 16.3372,
|
| 26911 |
+
"step": 76860
|
| 26912 |
+
},
|
| 26913 |
+
{
|
| 26914 |
+
"epoch": 0.11388347386072087,
|
| 26915 |
+
"grad_norm": 7.25,
|
| 26916 |
+
"learning_rate": 0.00048113845415285414,
|
| 26917 |
+
"loss": 16.3621,
|
| 26918 |
+
"step": 76880
|
| 26919 |
+
},
|
| 26920 |
+
{
|
| 26921 |
+
"epoch": 0.11391310015464925,
|
| 26922 |
+
"grad_norm": 6.4375,
|
| 26923 |
+
"learning_rate": 0.00048113351521784654,
|
| 26924 |
+
"loss": 16.3544,
|
| 26925 |
+
"step": 76900
|
| 26926 |
+
},
|
| 26927 |
+
{
|
| 26928 |
+
"epoch": 0.11394272644857764,
|
| 26929 |
+
"grad_norm": 6.40625,
|
| 26930 |
+
"learning_rate": 0.00048112857628283904,
|
| 26931 |
+
"loss": 16.3275,
|
| 26932 |
+
"step": 76920
|
| 26933 |
+
},
|
| 26934 |
+
{
|
| 26935 |
+
"epoch": 0.11397235274250603,
|
| 26936 |
+
"grad_norm": 6.90625,
|
| 26937 |
+
"learning_rate": 0.00048112363734783143,
|
| 26938 |
+
"loss": 16.359,
|
| 26939 |
+
"step": 76940
|
| 26940 |
+
},
|
| 26941 |
+
{
|
| 26942 |
+
"epoch": 0.11400197903643441,
|
| 26943 |
+
"grad_norm": 6.6875,
|
| 26944 |
+
"learning_rate": 0.00048111869841282383,
|
| 26945 |
+
"loss": 16.3528,
|
| 26946 |
+
"step": 76960
|
| 26947 |
+
},
|
| 26948 |
+
{
|
| 26949 |
+
"epoch": 0.1140316053303628,
|
| 26950 |
+
"grad_norm": 7.28125,
|
| 26951 |
+
"learning_rate": 0.0004811137594778163,
|
| 26952 |
+
"loss": 16.3342,
|
| 26953 |
+
"step": 76980
|
| 26954 |
+
},
|
| 26955 |
+
{
|
| 26956 |
+
"epoch": 0.11406123162429119,
|
| 26957 |
+
"grad_norm": 6.53125,
|
| 26958 |
+
"learning_rate": 0.0004811088205428087,
|
| 26959 |
+
"loss": 16.3042,
|
| 26960 |
+
"step": 77000
|
| 26961 |
}
|
| 26962 |
],
|
| 26963 |
"logging_steps": 20,
|
|
|
|
| 26977 |
"attributes": {}
|
| 26978 |
}
|
| 26979 |
},
|
| 26980 |
+
"total_flos": 5.661389989152712e+19,
|
| 26981 |
"train_batch_size": 48,
|
| 26982 |
"trial_name": null,
|
| 26983 |
"trial_params": null
|