| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 68.99953929788998, | |
| "global_step": 46782, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3331797659633281, | |
| "learning_rate": 4.976190476190477e-05, | |
| "loss": 2.685059944085315, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6663595319266562, | |
| "learning_rate": 4.9523809523809525e-05, | |
| "loss": 2.5215273156630253, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9995392978899843, | |
| "learning_rate": 4.928571428571429e-05, | |
| "loss": 2.449192013360758, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.3331797659633282, | |
| "learning_rate": 4.904761904761905e-05, | |
| "loss": 2.3792454542311945, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.6663595319266562, | |
| "learning_rate": 4.880952380952381e-05, | |
| "loss": 2.3559764760785398, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.9995392978899842, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 2.328374001832135, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.333179765963328, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 2.2863990986241705, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.6663595319266564, | |
| "learning_rate": 4.80952380952381e-05, | |
| "loss": 2.2656879256256914, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.999539297889984, | |
| "learning_rate": 4.785714285714286e-05, | |
| "loss": 2.245354711481955, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 3.333179765963328, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 2.2154734113575083, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.6663595319266564, | |
| "learning_rate": 4.738095238095238e-05, | |
| "loss": 2.19904536694552, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 3.999539297889984, | |
| "learning_rate": 4.714285714285714e-05, | |
| "loss": 2.185555112045423, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 4.333179765963328, | |
| "learning_rate": 4.690476190476191e-05, | |
| "loss": 2.162922816993916, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 4.666359531926656, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 2.1419683135716263, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 4.999539297889984, | |
| "learning_rate": 4.642857142857143e-05, | |
| "loss": 2.1468805633814987, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 5.333179765963328, | |
| "learning_rate": 4.6190476190476194e-05, | |
| "loss": 2.114357872346861, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 5.666359531926656, | |
| "learning_rate": 4.595238095238095e-05, | |
| "loss": 2.1005791858234235, | |
| "step": 3842 | |
| }, | |
| { | |
| "epoch": 5.999539297889984, | |
| "learning_rate": 4.5714285714285716e-05, | |
| "loss": 2.0986059408272264, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 6.333179765963328, | |
| "learning_rate": 4.547619047619048e-05, | |
| "loss": 2.063803073579231, | |
| "step": 4294 | |
| }, | |
| { | |
| "epoch": 6.666359531926656, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 2.0596065014864493, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 6.999539297889984, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.0634367276081997, | |
| "step": 4746 | |
| }, | |
| { | |
| "epoch": 7.333179765963328, | |
| "learning_rate": 4.476190476190477e-05, | |
| "loss": 2.049277921693515, | |
| "step": 4972 | |
| }, | |
| { | |
| "epoch": 7.666359531926656, | |
| "learning_rate": 4.4523809523809525e-05, | |
| "loss": 2.0317085738730642, | |
| "step": 5198 | |
| }, | |
| { | |
| "epoch": 7.999539297889984, | |
| "learning_rate": 4.428571428571428e-05, | |
| "loss": 2.0282083697023645, | |
| "step": 5424 | |
| }, | |
| { | |
| "epoch": 8.333179765963328, | |
| "learning_rate": 4.404761904761905e-05, | |
| "loss": 1.9894960116496128, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 8.666359531926656, | |
| "learning_rate": 4.380952380952381e-05, | |
| "loss": 1.996843321133504, | |
| "step": 5876 | |
| }, | |
| { | |
| "epoch": 8.999539297889985, | |
| "learning_rate": 4.3571428571428576e-05, | |
| "loss": 2.0045918152395603, | |
| "step": 6102 | |
| }, | |
| { | |
| "epoch": 9.333179765963328, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 1.966206238333103, | |
| "step": 6328 | |
| }, | |
| { | |
| "epoch": 9.666359531926656, | |
| "learning_rate": 4.30952380952381e-05, | |
| "loss": 1.9711824940369191, | |
| "step": 6554 | |
| }, | |
| { | |
| "epoch": 9.999539297889985, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 1.9805989223243916, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 10.333179765963328, | |
| "learning_rate": 4.261904761904762e-05, | |
| "loss": 1.9500477208500415, | |
| "step": 7006 | |
| }, | |
| { | |
| "epoch": 10.666359531926656, | |
| "learning_rate": 4.2380952380952385e-05, | |
| "loss": 1.9578322689090155, | |
| "step": 7232 | |
| }, | |
| { | |
| "epoch": 10.999539297889985, | |
| "learning_rate": 4.214285714285714e-05, | |
| "loss": 1.9484224572645878, | |
| "step": 7458 | |
| }, | |
| { | |
| "epoch": 11.333179765963328, | |
| "learning_rate": 4.190476190476191e-05, | |
| "loss": 1.9283131287161228, | |
| "step": 7684 | |
| }, | |
| { | |
| "epoch": 11.666359531926656, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 1.9156345603740321, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 11.999539297889985, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 1.926576732534223, | |
| "step": 8136 | |
| }, | |
| { | |
| "epoch": 12.333179765963328, | |
| "learning_rate": 4.119047619047619e-05, | |
| "loss": 1.907513238687431, | |
| "step": 8362 | |
| }, | |
| { | |
| "epoch": 12.666359531926656, | |
| "learning_rate": 4.095238095238095e-05, | |
| "loss": 1.9078028856125553, | |
| "step": 8588 | |
| }, | |
| { | |
| "epoch": 12.999539297889985, | |
| "learning_rate": 4.0714285714285717e-05, | |
| "loss": 1.90263299182453, | |
| "step": 8814 | |
| }, | |
| { | |
| "epoch": 13.333179765963328, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 1.8844813794161366, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 13.666359531926656, | |
| "learning_rate": 4.023809523809524e-05, | |
| "loss": 1.8840125429946764, | |
| "step": 9266 | |
| }, | |
| { | |
| "epoch": 13.999539297889985, | |
| "learning_rate": 4e-05, | |
| "loss": 1.8717386701465708, | |
| "step": 9492 | |
| }, | |
| { | |
| "epoch": 14.333179765963328, | |
| "learning_rate": 3.976190476190476e-05, | |
| "loss": 1.858954910683421, | |
| "step": 9718 | |
| }, | |
| { | |
| "epoch": 14.666359531926656, | |
| "learning_rate": 3.9523809523809526e-05, | |
| "loss": 1.8676287895810288, | |
| "step": 9944 | |
| }, | |
| { | |
| "epoch": 14.999539297889985, | |
| "learning_rate": 3.928571428571429e-05, | |
| "loss": 1.8678895393304065, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 15.333179765963328, | |
| "learning_rate": 3.904761904761905e-05, | |
| "loss": 1.8486336227011892, | |
| "step": 10396 | |
| }, | |
| { | |
| "epoch": 15.666359531926656, | |
| "learning_rate": 3.880952380952381e-05, | |
| "loss": 1.8404835388723728, | |
| "step": 10622 | |
| }, | |
| { | |
| "epoch": 15.999539297889985, | |
| "learning_rate": 3.857142857142858e-05, | |
| "loss": 1.8481951688243226, | |
| "step": 10848 | |
| }, | |
| { | |
| "epoch": 16.33317976596333, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "loss": 1.8267865476355087, | |
| "step": 11074 | |
| }, | |
| { | |
| "epoch": 16.666359531926656, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 1.818214213953609, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 16.999539297889985, | |
| "learning_rate": 3.785714285714286e-05, | |
| "loss": 1.8284451645032493, | |
| "step": 11526 | |
| }, | |
| { | |
| "epoch": 17.33317976596333, | |
| "learning_rate": 3.761904761904762e-05, | |
| "loss": 1.8039584539632882, | |
| "step": 11752 | |
| }, | |
| { | |
| "epoch": 17.666359531926656, | |
| "learning_rate": 3.7380952380952386e-05, | |
| "loss": 1.8007052261217507, | |
| "step": 11978 | |
| }, | |
| { | |
| "epoch": 17.999539297889985, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 1.800786381274198, | |
| "step": 12204 | |
| }, | |
| { | |
| "epoch": 18.33317976596333, | |
| "learning_rate": 3.690476190476191e-05, | |
| "loss": 1.7864516266679342, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 18.666359531926656, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 1.790079842626521, | |
| "step": 12656 | |
| }, | |
| { | |
| "epoch": 18.999539297889985, | |
| "learning_rate": 3.642857142857143e-05, | |
| "loss": 1.7918755185287611, | |
| "step": 12882 | |
| }, | |
| { | |
| "epoch": 19.33317976596333, | |
| "learning_rate": 3.619047619047619e-05, | |
| "loss": 1.7703251121318446, | |
| "step": 13108 | |
| }, | |
| { | |
| "epoch": 19.666359531926656, | |
| "learning_rate": 3.595238095238095e-05, | |
| "loss": 1.7819123394721377, | |
| "step": 13334 | |
| }, | |
| { | |
| "epoch": 19.999539297889985, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 1.7857482370022124, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 20.33317976596333, | |
| "learning_rate": 3.547619047619048e-05, | |
| "loss": 1.7561523977634126, | |
| "step": 13786 | |
| }, | |
| { | |
| "epoch": 20.666359531926656, | |
| "learning_rate": 3.523809523809524e-05, | |
| "loss": 1.7562858108925608, | |
| "step": 14012 | |
| }, | |
| { | |
| "epoch": 20.999539297889985, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.767443496569068, | |
| "step": 14238 | |
| }, | |
| { | |
| "epoch": 21.33317976596333, | |
| "learning_rate": 3.476190476190476e-05, | |
| "loss": 1.7421298406820382, | |
| "step": 14464 | |
| }, | |
| { | |
| "epoch": 21.666359531926656, | |
| "learning_rate": 3.4523809523809526e-05, | |
| "loss": 1.7460298791395878, | |
| "step": 14690 | |
| }, | |
| { | |
| "epoch": 21.999539297889985, | |
| "learning_rate": 3.428571428571429e-05, | |
| "loss": 1.7635893695122373, | |
| "step": 14916 | |
| }, | |
| { | |
| "epoch": 22.33317976596333, | |
| "learning_rate": 3.404761904761905e-05, | |
| "loss": 1.7318757993985066, | |
| "step": 15142 | |
| }, | |
| { | |
| "epoch": 22.666359531926656, | |
| "learning_rate": 3.380952380952381e-05, | |
| "loss": 1.7320329784292035, | |
| "step": 15368 | |
| }, | |
| { | |
| "epoch": 22.999539297889985, | |
| "learning_rate": 3.357142857142857e-05, | |
| "loss": 1.7314567903501799, | |
| "step": 15594 | |
| }, | |
| { | |
| "epoch": 23.33317976596333, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.7091308863817063, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 23.666359531926656, | |
| "learning_rate": 3.309523809523809e-05, | |
| "loss": 1.7127776019341123, | |
| "step": 16046 | |
| }, | |
| { | |
| "epoch": 23.999539297889985, | |
| "learning_rate": 3.285714285714286e-05, | |
| "loss": 1.714874267578125, | |
| "step": 16272 | |
| }, | |
| { | |
| "epoch": 24.33317976596333, | |
| "learning_rate": 3.261904761904762e-05, | |
| "loss": 1.7092985980278623, | |
| "step": 16498 | |
| }, | |
| { | |
| "epoch": 24.666359531926656, | |
| "learning_rate": 3.2380952380952386e-05, | |
| "loss": 1.7022116331927544, | |
| "step": 16724 | |
| }, | |
| { | |
| "epoch": 24.999539297889985, | |
| "learning_rate": 3.2142857142857144e-05, | |
| "loss": 1.7083171743207273, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 25.33317976596333, | |
| "learning_rate": 3.19047619047619e-05, | |
| "loss": 1.6961614558127074, | |
| "step": 17176 | |
| }, | |
| { | |
| "epoch": 25.666359531926656, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "loss": 1.6968883413129148, | |
| "step": 17402 | |
| }, | |
| { | |
| "epoch": 25.999539297889985, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 1.6995788844285813, | |
| "step": 17628 | |
| }, | |
| { | |
| "epoch": 26.33317976596333, | |
| "learning_rate": 3.1190476190476195e-05, | |
| "loss": 1.6844244762859513, | |
| "step": 17854 | |
| }, | |
| { | |
| "epoch": 26.666359531926656, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 1.6839947995886337, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 26.999539297889985, | |
| "learning_rate": 3.071428571428572e-05, | |
| "loss": 1.6868854623980227, | |
| "step": 18306 | |
| }, | |
| { | |
| "epoch": 27.33317976596333, | |
| "learning_rate": 3.0476190476190482e-05, | |
| "loss": 1.6642917869365321, | |
| "step": 18532 | |
| }, | |
| { | |
| "epoch": 27.666359531926656, | |
| "learning_rate": 3.0238095238095236e-05, | |
| "loss": 1.6676389981160122, | |
| "step": 18758 | |
| }, | |
| { | |
| "epoch": 27.999539297889985, | |
| "learning_rate": 3e-05, | |
| "loss": 1.6597493939695105, | |
| "step": 18984 | |
| }, | |
| { | |
| "epoch": 28.33317976596333, | |
| "learning_rate": 2.9761904761904762e-05, | |
| "loss": 1.652435842868501, | |
| "step": 19210 | |
| }, | |
| { | |
| "epoch": 28.666359531926656, | |
| "learning_rate": 2.9523809523809526e-05, | |
| "loss": 1.6600899485360205, | |
| "step": 19436 | |
| }, | |
| { | |
| "epoch": 28.999539297889985, | |
| "learning_rate": 2.9285714285714288e-05, | |
| "loss": 1.6490822851130393, | |
| "step": 19662 | |
| }, | |
| { | |
| "epoch": 29.33317976596333, | |
| "learning_rate": 2.9047619047619052e-05, | |
| "loss": 1.6511269628474143, | |
| "step": 19888 | |
| }, | |
| { | |
| "epoch": 29.666359531926656, | |
| "learning_rate": 2.880952380952381e-05, | |
| "loss": 1.6514149893701604, | |
| "step": 20114 | |
| }, | |
| { | |
| "epoch": 29.999539297889985, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.6452392308057937, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 30.33317976596333, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 1.6428464366271434, | |
| "step": 20566 | |
| }, | |
| { | |
| "epoch": 30.666359531926656, | |
| "learning_rate": 2.8095238095238096e-05, | |
| "loss": 1.632520692538371, | |
| "step": 20792 | |
| }, | |
| { | |
| "epoch": 30.999539297889985, | |
| "learning_rate": 2.785714285714286e-05, | |
| "loss": 1.6398790747718472, | |
| "step": 21018 | |
| }, | |
| { | |
| "epoch": 31.33317976596333, | |
| "learning_rate": 2.7619047619047622e-05, | |
| "loss": 1.6239518697282909, | |
| "step": 21244 | |
| }, | |
| { | |
| "epoch": 31.666359531926656, | |
| "learning_rate": 2.7380952380952383e-05, | |
| "loss": 1.6242760852374862, | |
| "step": 21470 | |
| }, | |
| { | |
| "epoch": 31.999539297889985, | |
| "learning_rate": 2.714285714285714e-05, | |
| "loss": 1.627472463962251, | |
| "step": 21696 | |
| }, | |
| { | |
| "epoch": 32.33317976596333, | |
| "learning_rate": 2.6904761904761905e-05, | |
| "loss": 1.6053812482715708, | |
| "step": 21922 | |
| }, | |
| { | |
| "epoch": 32.66635953192666, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.610074878793902, | |
| "step": 22148 | |
| }, | |
| { | |
| "epoch": 32.99953929788998, | |
| "learning_rate": 2.642857142857143e-05, | |
| "loss": 1.620673390616358, | |
| "step": 22374 | |
| }, | |
| { | |
| "epoch": 33.33317976596333, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 1.6081807634471792, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 33.66635953192666, | |
| "learning_rate": 2.5952380952380957e-05, | |
| "loss": 1.6156560847189574, | |
| "step": 22826 | |
| }, | |
| { | |
| "epoch": 33.99953929788998, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 1.6041984895689299, | |
| "step": 23052 | |
| }, | |
| { | |
| "epoch": 34.33317976596333, | |
| "learning_rate": 2.5476190476190476e-05, | |
| "loss": 1.5947894881256914, | |
| "step": 23278 | |
| }, | |
| { | |
| "epoch": 34.66635953192666, | |
| "learning_rate": 2.523809523809524e-05, | |
| "loss": 1.5988714167502074, | |
| "step": 23504 | |
| }, | |
| { | |
| "epoch": 34.99953929788998, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.5926924173810841, | |
| "step": 23730 | |
| }, | |
| { | |
| "epoch": 35.33317976596333, | |
| "learning_rate": 2.4761904761904762e-05, | |
| "loss": 1.5834472116115874, | |
| "step": 23956 | |
| }, | |
| { | |
| "epoch": 35.66635953192666, | |
| "learning_rate": 2.4523809523809523e-05, | |
| "loss": 1.5848133458500415, | |
| "step": 24182 | |
| }, | |
| { | |
| "epoch": 35.99953929788998, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 1.602300593283324, | |
| "step": 24408 | |
| }, | |
| { | |
| "epoch": 36.33317976596333, | |
| "learning_rate": 2.404761904761905e-05, | |
| "loss": 1.5835503772296737, | |
| "step": 24634 | |
| }, | |
| { | |
| "epoch": 36.66635953192666, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 1.5831868669628042, | |
| "step": 24860 | |
| }, | |
| { | |
| "epoch": 36.99953929788998, | |
| "learning_rate": 2.357142857142857e-05, | |
| "loss": 1.5691194787489628, | |
| "step": 25086 | |
| }, | |
| { | |
| "epoch": 37.33317976596333, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 1.568113208872027, | |
| "step": 25312 | |
| }, | |
| { | |
| "epoch": 37.66635953192666, | |
| "learning_rate": 2.3095238095238097e-05, | |
| "loss": 1.5696247742239353, | |
| "step": 25538 | |
| }, | |
| { | |
| "epoch": 37.99953929788998, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 1.5706803313398783, | |
| "step": 25764 | |
| }, | |
| { | |
| "epoch": 38.33317976596333, | |
| "learning_rate": 2.261904761904762e-05, | |
| "loss": 1.5539683114110896, | |
| "step": 25990 | |
| }, | |
| { | |
| "epoch": 38.66635953192666, | |
| "learning_rate": 2.2380952380952384e-05, | |
| "loss": 1.5682053017405282, | |
| "step": 26216 | |
| }, | |
| { | |
| "epoch": 38.99953929788998, | |
| "learning_rate": 2.214285714285714e-05, | |
| "loss": 1.5620073976769913, | |
| "step": 26442 | |
| }, | |
| { | |
| "epoch": 39.33317976596333, | |
| "learning_rate": 2.1904761904761906e-05, | |
| "loss": 1.5591868172704646, | |
| "step": 26668 | |
| }, | |
| { | |
| "epoch": 39.66635953192666, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 1.5566102424554065, | |
| "step": 26894 | |
| }, | |
| { | |
| "epoch": 39.99953929788998, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 1.5528145849177268, | |
| "step": 27120 | |
| }, | |
| { | |
| "epoch": 40.33317976596333, | |
| "learning_rate": 2.1190476190476193e-05, | |
| "loss": 1.5426600633469303, | |
| "step": 27346 | |
| }, | |
| { | |
| "epoch": 40.66635953192666, | |
| "learning_rate": 2.0952380952380954e-05, | |
| "loss": 1.541890642284292, | |
| "step": 27572 | |
| }, | |
| { | |
| "epoch": 40.99953929788998, | |
| "learning_rate": 2.0714285714285718e-05, | |
| "loss": 1.5517368823026134, | |
| "step": 27798 | |
| }, | |
| { | |
| "epoch": 41.33317976596333, | |
| "learning_rate": 2.0476190476190476e-05, | |
| "loss": 1.540764057530766, | |
| "step": 28024 | |
| }, | |
| { | |
| "epoch": 41.66635953192666, | |
| "learning_rate": 2.023809523809524e-05, | |
| "loss": 1.5214318469562362, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 41.99953929788998, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5345232128042035, | |
| "step": 28476 | |
| }, | |
| { | |
| "epoch": 42.33317976596333, | |
| "learning_rate": 1.9761904761904763e-05, | |
| "loss": 1.5242220448181691, | |
| "step": 28702 | |
| }, | |
| { | |
| "epoch": 42.66635953192666, | |
| "learning_rate": 1.9523809523809524e-05, | |
| "loss": 1.535507067114906, | |
| "step": 28928 | |
| }, | |
| { | |
| "epoch": 42.99953929788998, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 1.5329083468006774, | |
| "step": 29154 | |
| }, | |
| { | |
| "epoch": 43.33317976596333, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 1.5211832502246958, | |
| "step": 29380 | |
| }, | |
| { | |
| "epoch": 43.66635953192666, | |
| "learning_rate": 1.880952380952381e-05, | |
| "loss": 1.5096026395274476, | |
| "step": 29606 | |
| }, | |
| { | |
| "epoch": 43.99953929788998, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 1.5239440107767561, | |
| "step": 29832 | |
| }, | |
| { | |
| "epoch": 44.33317976596333, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 1.518264635474281, | |
| "step": 30058 | |
| }, | |
| { | |
| "epoch": 44.66635953192666, | |
| "learning_rate": 1.8095238095238094e-05, | |
| "loss": 1.5015280394427544, | |
| "step": 30284 | |
| }, | |
| { | |
| "epoch": 44.99953929788998, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 1.5198267033669801, | |
| "step": 30510 | |
| }, | |
| { | |
| "epoch": 45.33317976596333, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 1.5029577744745575, | |
| "step": 30736 | |
| }, | |
| { | |
| "epoch": 45.66635953192666, | |
| "learning_rate": 1.738095238095238e-05, | |
| "loss": 1.5046313800642976, | |
| "step": 30962 | |
| }, | |
| { | |
| "epoch": 45.99953929788998, | |
| "learning_rate": 1.7142857142857145e-05, | |
| "loss": 1.5148508527637583, | |
| "step": 31188 | |
| }, | |
| { | |
| "epoch": 46.33317976596333, | |
| "learning_rate": 1.6904761904761906e-05, | |
| "loss": 1.4938382849229122, | |
| "step": 31414 | |
| }, | |
| { | |
| "epoch": 46.66635953192666, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.5032175789892146, | |
| "step": 31640 | |
| }, | |
| { | |
| "epoch": 46.99953929788998, | |
| "learning_rate": 1.642857142857143e-05, | |
| "loss": 1.4958131503214878, | |
| "step": 31866 | |
| }, | |
| { | |
| "epoch": 47.33317976596333, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 1.5038191533721654, | |
| "step": 32092 | |
| }, | |
| { | |
| "epoch": 47.66635953192666, | |
| "learning_rate": 1.595238095238095e-05, | |
| "loss": 1.4897128755012445, | |
| "step": 32318 | |
| }, | |
| { | |
| "epoch": 47.99953929788998, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 1.4913623100888413, | |
| "step": 32544 | |
| }, | |
| { | |
| "epoch": 48.33317976596333, | |
| "learning_rate": 1.5476190476190476e-05, | |
| "loss": 1.491287096411781, | |
| "step": 32770 | |
| }, | |
| { | |
| "epoch": 48.66635953192666, | |
| "learning_rate": 1.5238095238095241e-05, | |
| "loss": 1.4894442938070382, | |
| "step": 32996 | |
| }, | |
| { | |
| "epoch": 48.99953929788998, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.4941079468853706, | |
| "step": 33222 | |
| }, | |
| { | |
| "epoch": 49.33317976596333, | |
| "learning_rate": 1.4761904761904763e-05, | |
| "loss": 1.494901538950152, | |
| "step": 33448 | |
| }, | |
| { | |
| "epoch": 49.66635953192666, | |
| "learning_rate": 1.4523809523809526e-05, | |
| "loss": 1.4872477033496958, | |
| "step": 33674 | |
| }, | |
| { | |
| "epoch": 49.99953929788998, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 1.4863664745229535, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 50.33317976596333, | |
| "learning_rate": 1.4047619047619048e-05, | |
| "loss": 1.4825258508192753, | |
| "step": 34126 | |
| }, | |
| { | |
| "epoch": 50.66635953192666, | |
| "learning_rate": 1.3809523809523811e-05, | |
| "loss": 1.4887811441337113, | |
| "step": 34352 | |
| }, | |
| { | |
| "epoch": 50.99953929788998, | |
| "learning_rate": 1.357142857142857e-05, | |
| "loss": 1.475349223719234, | |
| "step": 34578 | |
| }, | |
| { | |
| "epoch": 51.33317976596333, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.468778492075152, | |
| "step": 34804 | |
| }, | |
| { | |
| "epoch": 51.66635953192666, | |
| "learning_rate": 1.3095238095238096e-05, | |
| "loss": 1.4681135869659154, | |
| "step": 35030 | |
| }, | |
| { | |
| "epoch": 51.99953929788998, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 1.4756152397763413, | |
| "step": 35256 | |
| }, | |
| { | |
| "epoch": 52.33317976596333, | |
| "learning_rate": 1.261904761904762e-05, | |
| "loss": 1.461721504684043, | |
| "step": 35482 | |
| }, | |
| { | |
| "epoch": 52.66635953192666, | |
| "learning_rate": 1.2380952380952381e-05, | |
| "loss": 1.472177421097207, | |
| "step": 35708 | |
| }, | |
| { | |
| "epoch": 52.99953929788998, | |
| "learning_rate": 1.2142857142857144e-05, | |
| "loss": 1.470105331555932, | |
| "step": 35934 | |
| }, | |
| { | |
| "epoch": 53.33317976596333, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 1.463019582022608, | |
| "step": 36160 | |
| }, | |
| { | |
| "epoch": 53.66635953192666, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 1.4712693205976908, | |
| "step": 36386 | |
| }, | |
| { | |
| "epoch": 53.99953929788998, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 1.4639480725853844, | |
| "step": 36612 | |
| }, | |
| { | |
| "epoch": 54.33317976596333, | |
| "learning_rate": 1.1190476190476192e-05, | |
| "loss": 1.4621197185685149, | |
| "step": 36838 | |
| }, | |
| { | |
| "epoch": 54.66635953192666, | |
| "learning_rate": 1.0952380952380953e-05, | |
| "loss": 1.4438346930309736, | |
| "step": 37064 | |
| }, | |
| { | |
| "epoch": 54.99953929788998, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 1.45255799420112, | |
| "step": 37290 | |
| }, | |
| { | |
| "epoch": 55.33317976596333, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 1.4527645955043555, | |
| "step": 37516 | |
| }, | |
| { | |
| "epoch": 55.66635953192666, | |
| "learning_rate": 1.0238095238095238e-05, | |
| "loss": 1.4556512073077987, | |
| "step": 37742 | |
| }, | |
| { | |
| "epoch": 55.99953929788998, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4523168243138136, | |
| "step": 37968 | |
| }, | |
| { | |
| "epoch": 56.33317976596333, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 1.4451588318411228, | |
| "step": 38194 | |
| }, | |
| { | |
| "epoch": 56.66635953192666, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 1.4351428546736726, | |
| "step": 38420 | |
| }, | |
| { | |
| "epoch": 56.99953929788998, | |
| "learning_rate": 9.285714285714286e-06, | |
| "loss": 1.4480798771951051, | |
| "step": 38646 | |
| }, | |
| { | |
| "epoch": 57.33317976596333, | |
| "learning_rate": 9.047619047619047e-06, | |
| "loss": 1.4419262641299087, | |
| "step": 38872 | |
| }, | |
| { | |
| "epoch": 57.66635953192666, | |
| "learning_rate": 8.80952380952381e-06, | |
| "loss": 1.4400379552250415, | |
| "step": 39098 | |
| }, | |
| { | |
| "epoch": 57.99953929788998, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 1.4458791006982854, | |
| "step": 39324 | |
| }, | |
| { | |
| "epoch": 58.33317976596333, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.4246890987970133, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 58.66635953192666, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 1.4372091377730918, | |
| "step": 39776 | |
| }, | |
| { | |
| "epoch": 58.99953929788998, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.4388618131654451, | |
| "step": 40002 | |
| }, | |
| { | |
| "epoch": 59.33317976596333, | |
| "learning_rate": 7.6190476190476205e-06, | |
| "loss": 1.437955738168902, | |
| "step": 40228 | |
| }, | |
| { | |
| "epoch": 59.66635953192666, | |
| "learning_rate": 7.380952380952382e-06, | |
| "loss": 1.4384045896276962, | |
| "step": 40454 | |
| }, | |
| { | |
| "epoch": 59.99953929788998, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 1.4317560786694552, | |
| "step": 40680 | |
| }, | |
| { | |
| "epoch": 60.33317976596333, | |
| "learning_rate": 6.9047619047619055e-06, | |
| "loss": 1.4312950741928236, | |
| "step": 40906 | |
| }, | |
| { | |
| "epoch": 60.66635953192666, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.4349378737728153, | |
| "step": 41132 | |
| }, | |
| { | |
| "epoch": 60.99953929788998, | |
| "learning_rate": 6.428571428571429e-06, | |
| "loss": 1.4232025146484375, | |
| "step": 41358 | |
| }, | |
| { | |
| "epoch": 61.33317976596333, | |
| "learning_rate": 6.190476190476191e-06, | |
| "loss": 1.4273036180344303, | |
| "step": 41584 | |
| }, | |
| { | |
| "epoch": 61.66635953192666, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 1.437505671408324, | |
| "step": 41810 | |
| }, | |
| { | |
| "epoch": 61.99953929788998, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.4295697507605087, | |
| "step": 42036 | |
| }, | |
| { | |
| "epoch": 62.33317976596333, | |
| "learning_rate": 5.4761904761904765e-06, | |
| "loss": 1.4301999522521434, | |
| "step": 42262 | |
| }, | |
| { | |
| "epoch": 62.66635953192666, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 1.4240087998651825, | |
| "step": 42488 | |
| }, | |
| { | |
| "epoch": 62.99953929788998, | |
| "learning_rate": 5e-06, | |
| "loss": 1.418294991012168, | |
| "step": 42714 | |
| }, | |
| { | |
| "epoch": 63.33317976596333, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 1.4275296641662059, | |
| "step": 42940 | |
| }, | |
| { | |
| "epoch": 63.66635953192666, | |
| "learning_rate": 4.5238095238095235e-06, | |
| "loss": 1.4242185069396434, | |
| "step": 43166 | |
| }, | |
| { | |
| "epoch": 63.99953929788998, | |
| "learning_rate": 4.285714285714286e-06, | |
| "loss": 1.4242925053149198, | |
| "step": 43392 | |
| }, | |
| { | |
| "epoch": 64.33317976596332, | |
| "learning_rate": 4.047619047619048e-06, | |
| "loss": 1.4117900206979397, | |
| "step": 43618 | |
| }, | |
| { | |
| "epoch": 64.66635953192666, | |
| "learning_rate": 3.8095238095238102e-06, | |
| "loss": 1.4196827306156665, | |
| "step": 43844 | |
| }, | |
| { | |
| "epoch": 64.99953929788998, | |
| "learning_rate": 3.5714285714285714e-06, | |
| "loss": 1.4127752253439574, | |
| "step": 44070 | |
| }, | |
| { | |
| "epoch": 65.33317976596332, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.415105228930448, | |
| "step": 44296 | |
| }, | |
| { | |
| "epoch": 65.66635953192666, | |
| "learning_rate": 3.0952380952380953e-06, | |
| "loss": 1.4221684278640072, | |
| "step": 44522 | |
| }, | |
| { | |
| "epoch": 65.99953929788998, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.4126794865701051, | |
| "step": 44748 | |
| }, | |
| { | |
| "epoch": 66.33317976596332, | |
| "learning_rate": 2.6190476190476192e-06, | |
| "loss": 1.412820731644082, | |
| "step": 44974 | |
| }, | |
| { | |
| "epoch": 66.66635953192666, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 1.4091276995903623, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 66.99953929788998, | |
| "learning_rate": 2.142857142857143e-06, | |
| "loss": 1.413559094994469, | |
| "step": 45426 | |
| }, | |
| { | |
| "epoch": 67.33317976596332, | |
| "learning_rate": 1.9047619047619051e-06, | |
| "loss": 1.4078961937828403, | |
| "step": 45652 | |
| }, | |
| { | |
| "epoch": 67.66635953192666, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.4104151092799364, | |
| "step": 45878 | |
| }, | |
| { | |
| "epoch": 67.99953929788998, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.4099604513792865, | |
| "step": 46104 | |
| }, | |
| { | |
| "epoch": 68.33317976596332, | |
| "learning_rate": 1.1904761904761904e-06, | |
| "loss": 1.406501297402171, | |
| "step": 46330 | |
| }, | |
| { | |
| "epoch": 68.66635953192666, | |
| "learning_rate": 9.523809523809526e-07, | |
| "loss": 1.4021470061445658, | |
| "step": 46556 | |
| }, | |
| { | |
| "epoch": 68.99953929788998, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.4063275092470962, | |
| "step": 46782 | |
| } | |
| ], | |
| "max_steps": 47460, | |
| "num_train_epochs": 70, | |
| "total_flos": 531450425497308624, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |