Training in progress, step 80000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25eece902c4fc10f4ee2062692a9aedbe51bd7b7d97a5b7d579b674f96892276
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d7f1d706e6cfbd7062b526c5f96351aba28490563e89cc3572dbd70ff071d52
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3762fb83fd702043dec9c363ac412c392bf99ebaba36635b7ce08abde68594fe
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62c6e6a2cde44218a43149d5222369dc44b7c914b2ad856e2e09dfb4dca020fb
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb4157f68b08406d6bc17d2638ab784f508ffb332e537043a8486d779d68898e
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70fc5f5dac53b26b2e075af1f8abf3943ab8de6a2ae6129d92b62d3aa9705082
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d73d6a55f40d828827c6493d8d4e36859284046429b1cc4d61ff3be96f72f5ef
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -27658,6 +27658,356 @@
|
|
| 27658 |
"learning_rate": 0.00048061492704205204,
|
| 27659 |
"loss": 16.3266,
|
| 27660 |
"step": 79000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27661 |
}
|
| 27662 |
],
|
| 27663 |
"logging_steps": 20,
|
|
@@ -27677,7 +28027,7 @@
|
|
| 27677 |
"attributes": {}
|
| 27678 |
}
|
| 27679 |
},
|
| 27680 |
-
"total_flos": 5.
|
| 27681 |
"train_batch_size": 48,
|
| 27682 |
"trial_name": null,
|
| 27683 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1185051757135493,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 80000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 27658 |
"learning_rate": 0.00048061492704205204,
|
| 27659 |
"loss": 16.3266,
|
| 27660 |
"step": 79000
|
| 27661 |
+
},
|
| 27662 |
+
{
|
| 27663 |
+
"epoch": 0.11705348731105832,
|
| 27664 |
+
"grad_norm": 6.46875,
|
| 27665 |
+
"learning_rate": 0.00048060998810704454,
|
| 27666 |
+
"loss": 16.2829,
|
| 27667 |
+
"step": 79020
|
| 27668 |
+
},
|
| 27669 |
+
{
|
| 27670 |
+
"epoch": 0.1170831136049867,
|
| 27671 |
+
"grad_norm": 6.3125,
|
| 27672 |
+
"learning_rate": 0.00048060504917203694,
|
| 27673 |
+
"loss": 16.2504,
|
| 27674 |
+
"step": 79040
|
| 27675 |
+
},
|
| 27676 |
+
{
|
| 27677 |
+
"epoch": 0.11711273989891509,
|
| 27678 |
+
"grad_norm": 6.59375,
|
| 27679 |
+
"learning_rate": 0.0004806001102370294,
|
| 27680 |
+
"loss": 16.2487,
|
| 27681 |
+
"step": 79060
|
| 27682 |
+
},
|
| 27683 |
+
{
|
| 27684 |
+
"epoch": 0.11714236619284348,
|
| 27685 |
+
"grad_norm": 6.375,
|
| 27686 |
+
"learning_rate": 0.0004805951713020218,
|
| 27687 |
+
"loss": 16.3028,
|
| 27688 |
+
"step": 79080
|
| 27689 |
+
},
|
| 27690 |
+
{
|
| 27691 |
+
"epoch": 0.11717199248677186,
|
| 27692 |
+
"grad_norm": 7.59375,
|
| 27693 |
+
"learning_rate": 0.0004805902323670143,
|
| 27694 |
+
"loss": 16.2891,
|
| 27695 |
+
"step": 79100
|
| 27696 |
+
},
|
| 27697 |
+
{
|
| 27698 |
+
"epoch": 0.11720161878070025,
|
| 27699 |
+
"grad_norm": 6.15625,
|
| 27700 |
+
"learning_rate": 0.0004805852934320067,
|
| 27701 |
+
"loss": 16.3023,
|
| 27702 |
+
"step": 79120
|
| 27703 |
+
},
|
| 27704 |
+
{
|
| 27705 |
+
"epoch": 0.11723124507462863,
|
| 27706 |
+
"grad_norm": 8.875,
|
| 27707 |
+
"learning_rate": 0.0004805803544969991,
|
| 27708 |
+
"loss": 16.3369,
|
| 27709 |
+
"step": 79140
|
| 27710 |
+
},
|
| 27711 |
+
{
|
| 27712 |
+
"epoch": 0.11726087136855702,
|
| 27713 |
+
"grad_norm": 6.875,
|
| 27714 |
+
"learning_rate": 0.0004805754155619915,
|
| 27715 |
+
"loss": 16.2611,
|
| 27716 |
+
"step": 79160
|
| 27717 |
+
},
|
| 27718 |
+
{
|
| 27719 |
+
"epoch": 0.11729049766248541,
|
| 27720 |
+
"grad_norm": 6.84375,
|
| 27721 |
+
"learning_rate": 0.000480570476626984,
|
| 27722 |
+
"loss": 16.3287,
|
| 27723 |
+
"step": 79180
|
| 27724 |
+
},
|
| 27725 |
+
{
|
| 27726 |
+
"epoch": 0.1173201239564138,
|
| 27727 |
+
"grad_norm": 6.8125,
|
| 27728 |
+
"learning_rate": 0.0004805655376919764,
|
| 27729 |
+
"loss": 16.322,
|
| 27730 |
+
"step": 79200
|
| 27731 |
+
},
|
| 27732 |
+
{
|
| 27733 |
+
"epoch": 0.11734975025034218,
|
| 27734 |
+
"grad_norm": 6.34375,
|
| 27735 |
+
"learning_rate": 0.00048056059875696886,
|
| 27736 |
+
"loss": 16.2184,
|
| 27737 |
+
"step": 79220
|
| 27738 |
+
},
|
| 27739 |
+
{
|
| 27740 |
+
"epoch": 0.11737937654427057,
|
| 27741 |
+
"grad_norm": 7.3125,
|
| 27742 |
+
"learning_rate": 0.00048055565982196125,
|
| 27743 |
+
"loss": 16.3263,
|
| 27744 |
+
"step": 79240
|
| 27745 |
+
},
|
| 27746 |
+
{
|
| 27747 |
+
"epoch": 0.11740900283819895,
|
| 27748 |
+
"grad_norm": 6.59375,
|
| 27749 |
+
"learning_rate": 0.00048055072088695375,
|
| 27750 |
+
"loss": 16.2757,
|
| 27751 |
+
"step": 79260
|
| 27752 |
+
},
|
| 27753 |
+
{
|
| 27754 |
+
"epoch": 0.11743862913212734,
|
| 27755 |
+
"grad_norm": 6.71875,
|
| 27756 |
+
"learning_rate": 0.00048054578195194615,
|
| 27757 |
+
"loss": 16.2582,
|
| 27758 |
+
"step": 79280
|
| 27759 |
+
},
|
| 27760 |
+
{
|
| 27761 |
+
"epoch": 0.11746825542605574,
|
| 27762 |
+
"grad_norm": 7.0,
|
| 27763 |
+
"learning_rate": 0.00048054084301693854,
|
| 27764 |
+
"loss": 16.2983,
|
| 27765 |
+
"step": 79300
|
| 27766 |
+
},
|
| 27767 |
+
{
|
| 27768 |
+
"epoch": 0.11749788171998413,
|
| 27769 |
+
"grad_norm": 6.3125,
|
| 27770 |
+
"learning_rate": 0.00048053590408193104,
|
| 27771 |
+
"loss": 16.314,
|
| 27772 |
+
"step": 79320
|
| 27773 |
+
},
|
| 27774 |
+
{
|
| 27775 |
+
"epoch": 0.11752750801391251,
|
| 27776 |
+
"grad_norm": 6.375,
|
| 27777 |
+
"learning_rate": 0.00048053096514692344,
|
| 27778 |
+
"loss": 16.2981,
|
| 27779 |
+
"step": 79340
|
| 27780 |
+
},
|
| 27781 |
+
{
|
| 27782 |
+
"epoch": 0.1175571343078409,
|
| 27783 |
+
"grad_norm": 7.15625,
|
| 27784 |
+
"learning_rate": 0.0004805260262119159,
|
| 27785 |
+
"loss": 16.2972,
|
| 27786 |
+
"step": 79360
|
| 27787 |
+
},
|
| 27788 |
+
{
|
| 27789 |
+
"epoch": 0.11758676060176929,
|
| 27790 |
+
"grad_norm": 7.5,
|
| 27791 |
+
"learning_rate": 0.0004805210872769083,
|
| 27792 |
+
"loss": 16.2882,
|
| 27793 |
+
"step": 79380
|
| 27794 |
+
},
|
| 27795 |
+
{
|
| 27796 |
+
"epoch": 0.11761638689569767,
|
| 27797 |
+
"grad_norm": 6.8125,
|
| 27798 |
+
"learning_rate": 0.0004805161483419008,
|
| 27799 |
+
"loss": 16.2462,
|
| 27800 |
+
"step": 79400
|
| 27801 |
+
},
|
| 27802 |
+
{
|
| 27803 |
+
"epoch": 0.11764601318962606,
|
| 27804 |
+
"grad_norm": 6.78125,
|
| 27805 |
+
"learning_rate": 0.0004805112094068932,
|
| 27806 |
+
"loss": 16.2808,
|
| 27807 |
+
"step": 79420
|
| 27808 |
+
},
|
| 27809 |
+
{
|
| 27810 |
+
"epoch": 0.11767563948355445,
|
| 27811 |
+
"grad_norm": 6.09375,
|
| 27812 |
+
"learning_rate": 0.0004805062704718856,
|
| 27813 |
+
"loss": 16.265,
|
| 27814 |
+
"step": 79440
|
| 27815 |
+
},
|
| 27816 |
+
{
|
| 27817 |
+
"epoch": 0.11770526577748283,
|
| 27818 |
+
"grad_norm": 6.78125,
|
| 27819 |
+
"learning_rate": 0.000480501331536878,
|
| 27820 |
+
"loss": 16.2757,
|
| 27821 |
+
"step": 79460
|
| 27822 |
+
},
|
| 27823 |
+
{
|
| 27824 |
+
"epoch": 0.11773489207141122,
|
| 27825 |
+
"grad_norm": 6.09375,
|
| 27826 |
+
"learning_rate": 0.0004804963926018705,
|
| 27827 |
+
"loss": 16.3099,
|
| 27828 |
+
"step": 79480
|
| 27829 |
+
},
|
| 27830 |
+
{
|
| 27831 |
+
"epoch": 0.1177645183653396,
|
| 27832 |
+
"grad_norm": 6.65625,
|
| 27833 |
+
"learning_rate": 0.0004804914536668629,
|
| 27834 |
+
"loss": 16.2713,
|
| 27835 |
+
"step": 79500
|
| 27836 |
+
},
|
| 27837 |
+
{
|
| 27838 |
+
"epoch": 0.11779414465926799,
|
| 27839 |
+
"grad_norm": 6.3125,
|
| 27840 |
+
"learning_rate": 0.00048048651473185536,
|
| 27841 |
+
"loss": 16.2622,
|
| 27842 |
+
"step": 79520
|
| 27843 |
+
},
|
| 27844 |
+
{
|
| 27845 |
+
"epoch": 0.11782377095319638,
|
| 27846 |
+
"grad_norm": 5.875,
|
| 27847 |
+
"learning_rate": 0.00048048157579684775,
|
| 27848 |
+
"loss": 16.2833,
|
| 27849 |
+
"step": 79540
|
| 27850 |
+
},
|
| 27851 |
+
{
|
| 27852 |
+
"epoch": 0.11785339724712476,
|
| 27853 |
+
"grad_norm": 6.1875,
|
| 27854 |
+
"learning_rate": 0.00048047663686184025,
|
| 27855 |
+
"loss": 16.2429,
|
| 27856 |
+
"step": 79560
|
| 27857 |
+
},
|
| 27858 |
+
{
|
| 27859 |
+
"epoch": 0.11788302354105315,
|
| 27860 |
+
"grad_norm": 6.6875,
|
| 27861 |
+
"learning_rate": 0.00048047169792683265,
|
| 27862 |
+
"loss": 16.3167,
|
| 27863 |
+
"step": 79580
|
| 27864 |
+
},
|
| 27865 |
+
{
|
| 27866 |
+
"epoch": 0.11791264983498154,
|
| 27867 |
+
"grad_norm": 6.59375,
|
| 27868 |
+
"learning_rate": 0.0004804667589918251,
|
| 27869 |
+
"loss": 16.2256,
|
| 27870 |
+
"step": 79600
|
| 27871 |
+
},
|
| 27872 |
+
{
|
| 27873 |
+
"epoch": 0.11794227612890994,
|
| 27874 |
+
"grad_norm": 6.71875,
|
| 27875 |
+
"learning_rate": 0.00048046182005681754,
|
| 27876 |
+
"loss": 16.2095,
|
| 27877 |
+
"step": 79620
|
| 27878 |
+
},
|
| 27879 |
+
{
|
| 27880 |
+
"epoch": 0.11797190242283832,
|
| 27881 |
+
"grad_norm": 7.5,
|
| 27882 |
+
"learning_rate": 0.00048045688112180994,
|
| 27883 |
+
"loss": 16.2748,
|
| 27884 |
+
"step": 79640
|
| 27885 |
+
},
|
| 27886 |
+
{
|
| 27887 |
+
"epoch": 0.11800152871676671,
|
| 27888 |
+
"grad_norm": 6.5,
|
| 27889 |
+
"learning_rate": 0.0004804519421868024,
|
| 27890 |
+
"loss": 16.2733,
|
| 27891 |
+
"step": 79660
|
| 27892 |
+
},
|
| 27893 |
+
{
|
| 27894 |
+
"epoch": 0.1180311550106951,
|
| 27895 |
+
"grad_norm": 6.46875,
|
| 27896 |
+
"learning_rate": 0.0004804470032517948,
|
| 27897 |
+
"loss": 16.2384,
|
| 27898 |
+
"step": 79680
|
| 27899 |
+
},
|
| 27900 |
+
{
|
| 27901 |
+
"epoch": 0.11806078130462348,
|
| 27902 |
+
"grad_norm": 6.03125,
|
| 27903 |
+
"learning_rate": 0.0004804420643167873,
|
| 27904 |
+
"loss": 16.2747,
|
| 27905 |
+
"step": 79700
|
| 27906 |
+
},
|
| 27907 |
+
{
|
| 27908 |
+
"epoch": 0.11809040759855187,
|
| 27909 |
+
"grad_norm": 6.59375,
|
| 27910 |
+
"learning_rate": 0.0004804371253817797,
|
| 27911 |
+
"loss": 16.2826,
|
| 27912 |
+
"step": 79720
|
| 27913 |
+
},
|
| 27914 |
+
{
|
| 27915 |
+
"epoch": 0.11812003389248026,
|
| 27916 |
+
"grad_norm": 6.875,
|
| 27917 |
+
"learning_rate": 0.0004804321864467721,
|
| 27918 |
+
"loss": 16.2646,
|
| 27919 |
+
"step": 79740
|
| 27920 |
+
},
|
| 27921 |
+
{
|
| 27922 |
+
"epoch": 0.11814966018640864,
|
| 27923 |
+
"grad_norm": 7.625,
|
| 27924 |
+
"learning_rate": 0.0004804272475117645,
|
| 27925 |
+
"loss": 16.2691,
|
| 27926 |
+
"step": 79760
|
| 27927 |
+
},
|
| 27928 |
+
{
|
| 27929 |
+
"epoch": 0.11817928648033703,
|
| 27930 |
+
"grad_norm": 7.0625,
|
| 27931 |
+
"learning_rate": 0.000480422308576757,
|
| 27932 |
+
"loss": 16.2096,
|
| 27933 |
+
"step": 79780
|
| 27934 |
+
},
|
| 27935 |
+
{
|
| 27936 |
+
"epoch": 0.11820891277426541,
|
| 27937 |
+
"grad_norm": 7.09375,
|
| 27938 |
+
"learning_rate": 0.0004804173696417494,
|
| 27939 |
+
"loss": 16.2607,
|
| 27940 |
+
"step": 79800
|
| 27941 |
+
},
|
| 27942 |
+
{
|
| 27943 |
+
"epoch": 0.1182385390681938,
|
| 27944 |
+
"grad_norm": 6.1875,
|
| 27945 |
+
"learning_rate": 0.00048041243070674186,
|
| 27946 |
+
"loss": 16.2072,
|
| 27947 |
+
"step": 79820
|
| 27948 |
+
},
|
| 27949 |
+
{
|
| 27950 |
+
"epoch": 0.11826816536212219,
|
| 27951 |
+
"grad_norm": 5.96875,
|
| 27952 |
+
"learning_rate": 0.00048040749177173425,
|
| 27953 |
+
"loss": 16.2251,
|
| 27954 |
+
"step": 79840
|
| 27955 |
+
},
|
| 27956 |
+
{
|
| 27957 |
+
"epoch": 0.11829779165605057,
|
| 27958 |
+
"grad_norm": 7.0,
|
| 27959 |
+
"learning_rate": 0.00048040255283672675,
|
| 27960 |
+
"loss": 16.2256,
|
| 27961 |
+
"step": 79860
|
| 27962 |
+
},
|
| 27963 |
+
{
|
| 27964 |
+
"epoch": 0.11832741794997896,
|
| 27965 |
+
"grad_norm": 6.84375,
|
| 27966 |
+
"learning_rate": 0.00048039761390171915,
|
| 27967 |
+
"loss": 16.3018,
|
| 27968 |
+
"step": 79880
|
| 27969 |
+
},
|
| 27970 |
+
{
|
| 27971 |
+
"epoch": 0.11835704424390735,
|
| 27972 |
+
"grad_norm": 6.65625,
|
| 27973 |
+
"learning_rate": 0.0004803926749667116,
|
| 27974 |
+
"loss": 16.273,
|
| 27975 |
+
"step": 79900
|
| 27976 |
+
},
|
| 27977 |
+
{
|
| 27978 |
+
"epoch": 0.11838667053783573,
|
| 27979 |
+
"grad_norm": 7.1875,
|
| 27980 |
+
"learning_rate": 0.00048038773603170404,
|
| 27981 |
+
"loss": 16.2277,
|
| 27982 |
+
"step": 79920
|
| 27983 |
+
},
|
| 27984 |
+
{
|
| 27985 |
+
"epoch": 0.11841629683176413,
|
| 27986 |
+
"grad_norm": 6.34375,
|
| 27987 |
+
"learning_rate": 0.0004803827970966965,
|
| 27988 |
+
"loss": 16.2374,
|
| 27989 |
+
"step": 79940
|
| 27990 |
+
},
|
| 27991 |
+
{
|
| 27992 |
+
"epoch": 0.11844592312569252,
|
| 27993 |
+
"grad_norm": 6.25,
|
| 27994 |
+
"learning_rate": 0.0004803778581616889,
|
| 27995 |
+
"loss": 16.2584,
|
| 27996 |
+
"step": 79960
|
| 27997 |
+
},
|
| 27998 |
+
{
|
| 27999 |
+
"epoch": 0.1184755494196209,
|
| 28000 |
+
"grad_norm": 6.5,
|
| 28001 |
+
"learning_rate": 0.0004803729192266813,
|
| 28002 |
+
"loss": 16.2437,
|
| 28003 |
+
"step": 79980
|
| 28004 |
+
},
|
| 28005 |
+
{
|
| 28006 |
+
"epoch": 0.1185051757135493,
|
| 28007 |
+
"grad_norm": 7.375,
|
| 28008 |
+
"learning_rate": 0.0004803679802916738,
|
| 28009 |
+
"loss": 16.2565,
|
| 28010 |
+
"step": 80000
|
| 28011 |
}
|
| 28012 |
],
|
| 28013 |
"logging_steps": 20,
|
|
|
|
| 28027 |
"attributes": {}
|
| 28028 |
}
|
| 28029 |
},
|
| 28030 |
+
"total_flos": 5.881999294671618e+19,
|
| 28031 |
"train_batch_size": 48,
|
| 28032 |
"trial_name": null,
|
| 28033 |
"trial_params": null
|