Training in progress, step 17000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e1f6084c2fd12874836176a807971d304a89f7ecfc63e2081a9bd54f224b13b
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:729c0d767d06adf4295f1acf80d3c9a43aee84e3de6cc9a899725bd2d9ba998b
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6775411b7c96ce112db0ff86dbc4c7f4f5876ba69512e78981d49611b5ed959e
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c2e00f40f2b965358ee58725a6039af41eeb8a8f4527ae152ec5dad618307fd
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37ee15f1c9ceef9e456d1af53da3ed0fd0ec244051b974379f15c285ed42f8b7
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e79d7f1dfea25dc4809dc0e5c220d70f3b690693b546131b59ad7f9ed9b129c
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84d957adbd57639a95ced1440a685d29db26c75001a9b3061d2f7af9b9a721b1
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5608,6 +5608,356 @@
|
|
| 5608 |
"learning_rate": 0.0004949612511467957,
|
| 5609 |
"loss": 20.3333,
|
| 5610 |
"step": 16000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5611 |
}
|
| 5612 |
],
|
| 5613 |
"logging_steps": 20,
|
|
@@ -5627,7 +5977,7 @@
|
|
| 5627 |
"attributes": {}
|
| 5628 |
}
|
| 5629 |
},
|
| 5630 |
-
"total_flos": 1.
|
| 5631 |
"train_batch_size": 48,
|
| 5632 |
"trial_name": null,
|
| 5633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.033149548337403904,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 17000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5608 |
"learning_rate": 0.0004949612511467957,
|
| 5609 |
"loss": 20.3333,
|
| 5610 |
"step": 16000
|
| 5611 |
+
},
|
| 5612 |
+
{
|
| 5613 |
+
"epoch": 0.03123857437442415,
|
| 5614 |
+
"grad_norm": 11.4375,
|
| 5615 |
+
"learning_rate": 0.0004949547491158863,
|
| 5616 |
+
"loss": 20.2811,
|
| 5617 |
+
"step": 16020
|
| 5618 |
+
},
|
| 5619 |
+
{
|
| 5620 |
+
"epoch": 0.03127757384305639,
|
| 5621 |
+
"grad_norm": 11.1875,
|
| 5622 |
+
"learning_rate": 0.000494948247084977,
|
| 5623 |
+
"loss": 20.367,
|
| 5624 |
+
"step": 16040
|
| 5625 |
+
},
|
| 5626 |
+
{
|
| 5627 |
+
"epoch": 0.03131657331168863,
|
| 5628 |
+
"grad_norm": 11.75,
|
| 5629 |
+
"learning_rate": 0.0004949417450540676,
|
| 5630 |
+
"loss": 20.3134,
|
| 5631 |
+
"step": 16060
|
| 5632 |
+
},
|
| 5633 |
+
{
|
| 5634 |
+
"epoch": 0.03135557278032087,
|
| 5635 |
+
"grad_norm": 10.25,
|
| 5636 |
+
"learning_rate": 0.0004949352430231583,
|
| 5637 |
+
"loss": 20.3922,
|
| 5638 |
+
"step": 16080
|
| 5639 |
+
},
|
| 5640 |
+
{
|
| 5641 |
+
"epoch": 0.03139457224895311,
|
| 5642 |
+
"grad_norm": 10.375,
|
| 5643 |
+
"learning_rate": 0.000494928740992249,
|
| 5644 |
+
"loss": 20.3097,
|
| 5645 |
+
"step": 16100
|
| 5646 |
+
},
|
| 5647 |
+
{
|
| 5648 |
+
"epoch": 0.031433571717585346,
|
| 5649 |
+
"grad_norm": 11.375,
|
| 5650 |
+
"learning_rate": 0.0004949222389613396,
|
| 5651 |
+
"loss": 20.3737,
|
| 5652 |
+
"step": 16120
|
| 5653 |
+
},
|
| 5654 |
+
{
|
| 5655 |
+
"epoch": 0.03147257118621759,
|
| 5656 |
+
"grad_norm": 10.1875,
|
| 5657 |
+
"learning_rate": 0.0004949157369304303,
|
| 5658 |
+
"loss": 20.3886,
|
| 5659 |
+
"step": 16140
|
| 5660 |
+
},
|
| 5661 |
+
{
|
| 5662 |
+
"epoch": 0.03151157065484983,
|
| 5663 |
+
"grad_norm": 11.1875,
|
| 5664 |
+
"learning_rate": 0.0004949092348995209,
|
| 5665 |
+
"loss": 20.2403,
|
| 5666 |
+
"step": 16160
|
| 5667 |
+
},
|
| 5668 |
+
{
|
| 5669 |
+
"epoch": 0.031550570123482065,
|
| 5670 |
+
"grad_norm": 11.625,
|
| 5671 |
+
"learning_rate": 0.0004949027328686116,
|
| 5672 |
+
"loss": 20.3402,
|
| 5673 |
+
"step": 16180
|
| 5674 |
+
},
|
| 5675 |
+
{
|
| 5676 |
+
"epoch": 0.03158956959211431,
|
| 5677 |
+
"grad_norm": 14.6875,
|
| 5678 |
+
"learning_rate": 0.0004948962308377022,
|
| 5679 |
+
"loss": 20.3529,
|
| 5680 |
+
"step": 16200
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 0.03162856906074655,
|
| 5684 |
+
"grad_norm": 11.5,
|
| 5685 |
+
"learning_rate": 0.0004948897288067928,
|
| 5686 |
+
"loss": 20.2767,
|
| 5687 |
+
"step": 16220
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 0.031667568529378784,
|
| 5691 |
+
"grad_norm": 9.6875,
|
| 5692 |
+
"learning_rate": 0.0004948832267758834,
|
| 5693 |
+
"loss": 20.2271,
|
| 5694 |
+
"step": 16240
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 0.031706567998011026,
|
| 5698 |
+
"grad_norm": 11.25,
|
| 5699 |
+
"learning_rate": 0.0004948767247449741,
|
| 5700 |
+
"loss": 20.3672,
|
| 5701 |
+
"step": 16260
|
| 5702 |
+
},
|
| 5703 |
+
{
|
| 5704 |
+
"epoch": 0.03174556746664327,
|
| 5705 |
+
"grad_norm": 11.8125,
|
| 5706 |
+
"learning_rate": 0.0004948702227140648,
|
| 5707 |
+
"loss": 20.3693,
|
| 5708 |
+
"step": 16280
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 0.03178456693527551,
|
| 5712 |
+
"grad_norm": 13.6875,
|
| 5713 |
+
"learning_rate": 0.0004948637206831554,
|
| 5714 |
+
"loss": 20.2767,
|
| 5715 |
+
"step": 16300
|
| 5716 |
+
},
|
| 5717 |
+
{
|
| 5718 |
+
"epoch": 0.031823566403907745,
|
| 5719 |
+
"grad_norm": 11.25,
|
| 5720 |
+
"learning_rate": 0.0004948572186522461,
|
| 5721 |
+
"loss": 20.2559,
|
| 5722 |
+
"step": 16320
|
| 5723 |
+
},
|
| 5724 |
+
{
|
| 5725 |
+
"epoch": 0.03186256587253999,
|
| 5726 |
+
"grad_norm": 12.1875,
|
| 5727 |
+
"learning_rate": 0.0004948507166213367,
|
| 5728 |
+
"loss": 20.2962,
|
| 5729 |
+
"step": 16340
|
| 5730 |
+
},
|
| 5731 |
+
{
|
| 5732 |
+
"epoch": 0.03190156534117223,
|
| 5733 |
+
"grad_norm": 11.4375,
|
| 5734 |
+
"learning_rate": 0.0004948442145904274,
|
| 5735 |
+
"loss": 20.2648,
|
| 5736 |
+
"step": 16360
|
| 5737 |
+
},
|
| 5738 |
+
{
|
| 5739 |
+
"epoch": 0.031940564809804464,
|
| 5740 |
+
"grad_norm": 10.9375,
|
| 5741 |
+
"learning_rate": 0.0004948377125595179,
|
| 5742 |
+
"loss": 20.2703,
|
| 5743 |
+
"step": 16380
|
| 5744 |
+
},
|
| 5745 |
+
{
|
| 5746 |
+
"epoch": 0.031979564278436706,
|
| 5747 |
+
"grad_norm": 12.1875,
|
| 5748 |
+
"learning_rate": 0.0004948312105286086,
|
| 5749 |
+
"loss": 20.3281,
|
| 5750 |
+
"step": 16400
|
| 5751 |
+
},
|
| 5752 |
+
{
|
| 5753 |
+
"epoch": 0.03201856374706895,
|
| 5754 |
+
"grad_norm": 12.1875,
|
| 5755 |
+
"learning_rate": 0.0004948247084976992,
|
| 5756 |
+
"loss": 20.2317,
|
| 5757 |
+
"step": 16420
|
| 5758 |
+
},
|
| 5759 |
+
{
|
| 5760 |
+
"epoch": 0.03205756321570118,
|
| 5761 |
+
"grad_norm": 11.375,
|
| 5762 |
+
"learning_rate": 0.0004948182064667899,
|
| 5763 |
+
"loss": 20.2883,
|
| 5764 |
+
"step": 16440
|
| 5765 |
+
},
|
| 5766 |
+
{
|
| 5767 |
+
"epoch": 0.032096562684333425,
|
| 5768 |
+
"grad_norm": 12.8125,
|
| 5769 |
+
"learning_rate": 0.0004948117044358806,
|
| 5770 |
+
"loss": 20.2294,
|
| 5771 |
+
"step": 16460
|
| 5772 |
+
},
|
| 5773 |
+
{
|
| 5774 |
+
"epoch": 0.03213556215296567,
|
| 5775 |
+
"grad_norm": 12.5625,
|
| 5776 |
+
"learning_rate": 0.0004948052024049712,
|
| 5777 |
+
"loss": 20.1226,
|
| 5778 |
+
"step": 16480
|
| 5779 |
+
},
|
| 5780 |
+
{
|
| 5781 |
+
"epoch": 0.0321745616215979,
|
| 5782 |
+
"grad_norm": 11.375,
|
| 5783 |
+
"learning_rate": 0.0004947987003740619,
|
| 5784 |
+
"loss": 20.2422,
|
| 5785 |
+
"step": 16500
|
| 5786 |
+
},
|
| 5787 |
+
{
|
| 5788 |
+
"epoch": 0.032213561090230144,
|
| 5789 |
+
"grad_norm": 11.375,
|
| 5790 |
+
"learning_rate": 0.0004947921983431524,
|
| 5791 |
+
"loss": 20.2142,
|
| 5792 |
+
"step": 16520
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 0.032252560558862386,
|
| 5796 |
+
"grad_norm": 12.5,
|
| 5797 |
+
"learning_rate": 0.0004947856963122431,
|
| 5798 |
+
"loss": 20.2658,
|
| 5799 |
+
"step": 16540
|
| 5800 |
+
},
|
| 5801 |
+
{
|
| 5802 |
+
"epoch": 0.03229156002749463,
|
| 5803 |
+
"grad_norm": 9.8125,
|
| 5804 |
+
"learning_rate": 0.0004947791942813337,
|
| 5805 |
+
"loss": 20.1552,
|
| 5806 |
+
"step": 16560
|
| 5807 |
+
},
|
| 5808 |
+
{
|
| 5809 |
+
"epoch": 0.032330559496126864,
|
| 5810 |
+
"grad_norm": 10.0625,
|
| 5811 |
+
"learning_rate": 0.0004947726922504244,
|
| 5812 |
+
"loss": 20.1369,
|
| 5813 |
+
"step": 16580
|
| 5814 |
+
},
|
| 5815 |
+
{
|
| 5816 |
+
"epoch": 0.032369558964759106,
|
| 5817 |
+
"grad_norm": 11.375,
|
| 5818 |
+
"learning_rate": 0.000494766190219515,
|
| 5819 |
+
"loss": 20.1965,
|
| 5820 |
+
"step": 16600
|
| 5821 |
+
},
|
| 5822 |
+
{
|
| 5823 |
+
"epoch": 0.03240855843339135,
|
| 5824 |
+
"grad_norm": 10.875,
|
| 5825 |
+
"learning_rate": 0.0004947596881886057,
|
| 5826 |
+
"loss": 20.2377,
|
| 5827 |
+
"step": 16620
|
| 5828 |
+
},
|
| 5829 |
+
{
|
| 5830 |
+
"epoch": 0.03244755790202358,
|
| 5831 |
+
"grad_norm": 10.375,
|
| 5832 |
+
"learning_rate": 0.0004947531861576964,
|
| 5833 |
+
"loss": 20.2204,
|
| 5834 |
+
"step": 16640
|
| 5835 |
+
},
|
| 5836 |
+
{
|
| 5837 |
+
"epoch": 0.032486557370655825,
|
| 5838 |
+
"grad_norm": 11.375,
|
| 5839 |
+
"learning_rate": 0.000494746684126787,
|
| 5840 |
+
"loss": 20.1081,
|
| 5841 |
+
"step": 16660
|
| 5842 |
+
},
|
| 5843 |
+
{
|
| 5844 |
+
"epoch": 0.03252555683928807,
|
| 5845 |
+
"grad_norm": 11.1875,
|
| 5846 |
+
"learning_rate": 0.0004947401820958777,
|
| 5847 |
+
"loss": 20.3024,
|
| 5848 |
+
"step": 16680
|
| 5849 |
+
},
|
| 5850 |
+
{
|
| 5851 |
+
"epoch": 0.0325645563079203,
|
| 5852 |
+
"grad_norm": 11.3125,
|
| 5853 |
+
"learning_rate": 0.0004947336800649683,
|
| 5854 |
+
"loss": 20.1351,
|
| 5855 |
+
"step": 16700
|
| 5856 |
+
},
|
| 5857 |
+
{
|
| 5858 |
+
"epoch": 0.032603555776552544,
|
| 5859 |
+
"grad_norm": 11.1875,
|
| 5860 |
+
"learning_rate": 0.0004947271780340589,
|
| 5861 |
+
"loss": 20.1989,
|
| 5862 |
+
"step": 16720
|
| 5863 |
+
},
|
| 5864 |
+
{
|
| 5865 |
+
"epoch": 0.032642555245184786,
|
| 5866 |
+
"grad_norm": 9.6875,
|
| 5867 |
+
"learning_rate": 0.0004947206760031495,
|
| 5868 |
+
"loss": 20.1502,
|
| 5869 |
+
"step": 16740
|
| 5870 |
+
},
|
| 5871 |
+
{
|
| 5872 |
+
"epoch": 0.03268155471381702,
|
| 5873 |
+
"grad_norm": 11.125,
|
| 5874 |
+
"learning_rate": 0.0004947141739722402,
|
| 5875 |
+
"loss": 20.0948,
|
| 5876 |
+
"step": 16760
|
| 5877 |
+
},
|
| 5878 |
+
{
|
| 5879 |
+
"epoch": 0.03272055418244926,
|
| 5880 |
+
"grad_norm": 11.4375,
|
| 5881 |
+
"learning_rate": 0.0004947076719413309,
|
| 5882 |
+
"loss": 20.1084,
|
| 5883 |
+
"step": 16780
|
| 5884 |
+
},
|
| 5885 |
+
{
|
| 5886 |
+
"epoch": 0.032759553651081505,
|
| 5887 |
+
"grad_norm": 10.3125,
|
| 5888 |
+
"learning_rate": 0.0004947011699104215,
|
| 5889 |
+
"loss": 20.1207,
|
| 5890 |
+
"step": 16800
|
| 5891 |
+
},
|
| 5892 |
+
{
|
| 5893 |
+
"epoch": 0.03279855311971375,
|
| 5894 |
+
"grad_norm": 11.1875,
|
| 5895 |
+
"learning_rate": 0.0004946946678795122,
|
| 5896 |
+
"loss": 20.0984,
|
| 5897 |
+
"step": 16820
|
| 5898 |
+
},
|
| 5899 |
+
{
|
| 5900 |
+
"epoch": 0.03283755258834598,
|
| 5901 |
+
"grad_norm": 10.8125,
|
| 5902 |
+
"learning_rate": 0.0004946881658486028,
|
| 5903 |
+
"loss": 20.1778,
|
| 5904 |
+
"step": 16840
|
| 5905 |
+
},
|
| 5906 |
+
{
|
| 5907 |
+
"epoch": 0.032876552056978224,
|
| 5908 |
+
"grad_norm": 10.8125,
|
| 5909 |
+
"learning_rate": 0.0004946816638176935,
|
| 5910 |
+
"loss": 20.2415,
|
| 5911 |
+
"step": 16860
|
| 5912 |
+
},
|
| 5913 |
+
{
|
| 5914 |
+
"epoch": 0.032915551525610466,
|
| 5915 |
+
"grad_norm": 10.25,
|
| 5916 |
+
"learning_rate": 0.0004946751617867841,
|
| 5917 |
+
"loss": 20.1135,
|
| 5918 |
+
"step": 16880
|
| 5919 |
+
},
|
| 5920 |
+
{
|
| 5921 |
+
"epoch": 0.0329545509942427,
|
| 5922 |
+
"grad_norm": 10.875,
|
| 5923 |
+
"learning_rate": 0.0004946686597558748,
|
| 5924 |
+
"loss": 20.1361,
|
| 5925 |
+
"step": 16900
|
| 5926 |
+
},
|
| 5927 |
+
{
|
| 5928 |
+
"epoch": 0.03299355046287494,
|
| 5929 |
+
"grad_norm": 11.4375,
|
| 5930 |
+
"learning_rate": 0.0004946621577249655,
|
| 5931 |
+
"loss": 20.0907,
|
| 5932 |
+
"step": 16920
|
| 5933 |
+
},
|
| 5934 |
+
{
|
| 5935 |
+
"epoch": 0.033032549931507185,
|
| 5936 |
+
"grad_norm": 14.5,
|
| 5937 |
+
"learning_rate": 0.0004946556556940561,
|
| 5938 |
+
"loss": 20.1267,
|
| 5939 |
+
"step": 16940
|
| 5940 |
+
},
|
| 5941 |
+
{
|
| 5942 |
+
"epoch": 0.03307154940013942,
|
| 5943 |
+
"grad_norm": 12.875,
|
| 5944 |
+
"learning_rate": 0.0004946491536631467,
|
| 5945 |
+
"loss": 20.0818,
|
| 5946 |
+
"step": 16960
|
| 5947 |
+
},
|
| 5948 |
+
{
|
| 5949 |
+
"epoch": 0.03311054886877166,
|
| 5950 |
+
"grad_norm": 10.875,
|
| 5951 |
+
"learning_rate": 0.0004946426516322373,
|
| 5952 |
+
"loss": 20.1085,
|
| 5953 |
+
"step": 16980
|
| 5954 |
+
},
|
| 5955 |
+
{
|
| 5956 |
+
"epoch": 0.033149548337403904,
|
| 5957 |
+
"grad_norm": 10.5625,
|
| 5958 |
+
"learning_rate": 0.000494636149601328,
|
| 5959 |
+
"loss": 20.0712,
|
| 5960 |
+
"step": 17000
|
| 5961 |
}
|
| 5962 |
],
|
| 5963 |
"logging_steps": 20,
|
|
|
|
| 5977 |
"attributes": {}
|
| 5978 |
}
|
| 5979 |
},
|
| 5980 |
+
"total_flos": 1.2497927616331776e+19,
|
| 5981 |
"train_batch_size": 48,
|
| 5982 |
"trial_name": null,
|
| 5983 |
"trial_params": null
|