| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 273, |
| "global_step": 273, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003663003663003663, |
| "grad_norm": 0.291015625, |
| "learning_rate": 1e-05, |
| "loss": 2.4491, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007326007326007326, |
| "grad_norm": 0.302734375, |
| "learning_rate": 9.963369963369965e-06, |
| "loss": 2.1231, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01098901098901099, |
| "grad_norm": 0.263671875, |
| "learning_rate": 9.926739926739928e-06, |
| "loss": 2.1594, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.014652014652014652, |
| "grad_norm": 0.341796875, |
| "learning_rate": 9.890109890109892e-06, |
| "loss": 2.3652, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.018315018315018316, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.853479853479855e-06, |
| "loss": 2.0435, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02197802197802198, |
| "grad_norm": 0.26953125, |
| "learning_rate": 9.816849816849817e-06, |
| "loss": 2.0461, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 0.2578125, |
| "learning_rate": 9.780219780219781e-06, |
| "loss": 2.2132, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.029304029304029304, |
| "grad_norm": 0.259765625, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 2.1265, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03296703296703297, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.706959706959708e-06, |
| "loss": 1.9773, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "grad_norm": 0.294921875, |
| "learning_rate": 9.670329670329671e-06, |
| "loss": 2.2541, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.040293040293040296, |
| "grad_norm": 0.26953125, |
| "learning_rate": 9.633699633699635e-06, |
| "loss": 2.0383, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04395604395604396, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.597069597069598e-06, |
| "loss": 2.0413, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 0.337890625, |
| "learning_rate": 9.560439560439562e-06, |
| "loss": 2.0745, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.523809523809525e-06, |
| "loss": 2.2327, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.054945054945054944, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 1.9576, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05860805860805861, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.450549450549452e-06, |
| "loss": 2.035, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06227106227106227, |
| "grad_norm": 0.251953125, |
| "learning_rate": 9.413919413919414e-06, |
| "loss": 2.0768, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06593406593406594, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 9.377289377289379e-06, |
| "loss": 2.1331, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0695970695970696, |
| "grad_norm": 0.24609375, |
| "learning_rate": 9.340659340659341e-06, |
| "loss": 2.0382, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "grad_norm": 0.275390625, |
| "learning_rate": 9.304029304029305e-06, |
| "loss": 2.0348, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.1875, |
| "learning_rate": 9.267399267399268e-06, |
| "loss": 2.1407, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.08058608058608059, |
| "grad_norm": 0.162109375, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 2.1144, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.08424908424908426, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 9.194139194139195e-06, |
| "loss": 1.9681, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08791208791208792, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 9.157509157509158e-06, |
| "loss": 2.1024, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.09157509157509157, |
| "grad_norm": 0.17578125, |
| "learning_rate": 9.120879120879122e-06, |
| "loss": 2.1393, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 9.084249084249084e-06, |
| "loss": 2.1336, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0989010989010989, |
| "grad_norm": 1.140625, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": 2.0976, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 0.20703125, |
| "learning_rate": 9.010989010989011e-06, |
| "loss": 2.1487, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10622710622710622, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 1.9983, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.937728937728938e-06, |
| "loss": 2.1838, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11355311355311355, |
| "grad_norm": 0.181640625, |
| "learning_rate": 8.9010989010989e-06, |
| "loss": 1.8104, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.11721611721611722, |
| "grad_norm": 0.146484375, |
| "learning_rate": 8.864468864468865e-06, |
| "loss": 2.216, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.12087912087912088, |
| "grad_norm": 0.185546875, |
| "learning_rate": 8.827838827838828e-06, |
| "loss": 1.9466, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.12454212454212454, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 8.791208791208792e-06, |
| "loss": 2.007, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 0.13671875, |
| "learning_rate": 8.754578754578755e-06, |
| "loss": 2.0568, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.13186813186813187, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 8.717948717948719e-06, |
| "loss": 2.0231, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.13553113553113552, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 8.681318681318681e-06, |
| "loss": 2.1427, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1391941391941392, |
| "grad_norm": 0.3515625, |
| "learning_rate": 8.644688644688646e-06, |
| "loss": 2.0347, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 8.60805860805861e-06, |
| "loss": 2.0072, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.8946, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15018315018315018, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.534798534798535e-06, |
| "loss": 2.106, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 8.498168498168498e-06, |
| "loss": 1.9073, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1575091575091575, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 2.1076, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.16117216117216118, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.424908424908426e-06, |
| "loss": 2.0488, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.16483516483516483, |
| "grad_norm": 0.119140625, |
| "learning_rate": 8.388278388278389e-06, |
| "loss": 2.1218, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1684981684981685, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.351648351648353e-06, |
| "loss": 2.1201, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.17216117216117216, |
| "grad_norm": 0.14453125, |
| "learning_rate": 8.315018315018316e-06, |
| "loss": 1.9976, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.17582417582417584, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.278388278388278e-06, |
| "loss": 1.7992, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 8.241758241758243e-06, |
| "loss": 2.1759, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 2.1589, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18681318681318682, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.16849816849817e-06, |
| "loss": 2.0749, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 8.131868131868132e-06, |
| "loss": 2.1892, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.19413919413919414, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 8.095238095238097e-06, |
| "loss": 1.9497, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1978021978021978, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 8.058608058608059e-06, |
| "loss": 1.8706, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.20146520146520147, |
| "grad_norm": 0.11328125, |
| "learning_rate": 8.021978021978023e-06, |
| "loss": 2.0449, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.985347985347986e-06, |
| "loss": 2.0302, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2087912087912088, |
| "grad_norm": 0.12890625, |
| "learning_rate": 7.948717948717949e-06, |
| "loss": 1.9157, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.21245421245421245, |
| "grad_norm": 0.234375, |
| "learning_rate": 7.912087912087913e-06, |
| "loss": 2.1313, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.21611721611721613, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 7.875457875457876e-06, |
| "loss": 2.1958, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 7.83882783882784e-06, |
| "loss": 2.0664, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22344322344322345, |
| "grad_norm": 0.134765625, |
| "learning_rate": 7.802197802197802e-06, |
| "loss": 1.8385, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2271062271062271, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 7.765567765567767e-06, |
| "loss": 1.9514, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 7.72893772893773e-06, |
| "loss": 2.0861, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.23443223443223443, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 1.8578, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.15625, |
| "learning_rate": 7.655677655677656e-06, |
| "loss": 2.0476, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.24175824175824176, |
| "grad_norm": 0.138671875, |
| "learning_rate": 7.61904761904762e-06, |
| "loss": 2.0206, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2454212454212454, |
| "grad_norm": 0.166015625, |
| "learning_rate": 7.582417582417583e-06, |
| "loss": 2.0828, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2490842490842491, |
| "grad_norm": 0.16015625, |
| "learning_rate": 7.5457875457875465e-06, |
| "loss": 2.0207, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.25274725274725274, |
| "grad_norm": 0.138671875, |
| "learning_rate": 7.50915750915751e-06, |
| "loss": 1.875, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 7.472527472527473e-06, |
| "loss": 1.928, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2600732600732601, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 7.435897435897437e-06, |
| "loss": 2.2046, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.26373626373626374, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.3992673992674e-06, |
| "loss": 1.7548, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2673992673992674, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 7.362637362637364e-06, |
| "loss": 1.9705, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.27106227106227104, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 7.326007326007326e-06, |
| "loss": 2.1324, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.27472527472527475, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 7.28937728937729e-06, |
| "loss": 2.0416, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2783882783882784, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.252747252747253e-06, |
| "loss": 2.0204, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 0.138671875, |
| "learning_rate": 7.216117216117217e-06, |
| "loss": 2.0576, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.11328125, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 1.9715, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2893772893772894, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 2.0947, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 7.106227106227107e-06, |
| "loss": 2.1038, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2967032967032967, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 7.0695970695970705e-06, |
| "loss": 2.0939, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.30036630036630035, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 7.032967032967034e-06, |
| "loss": 1.961, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.304029304029304, |
| "grad_norm": 0.16796875, |
| "learning_rate": 6.9963369963369965e-06, |
| "loss": 2.099, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 6.95970695970696e-06, |
| "loss": 2.0525, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.31135531135531136, |
| "grad_norm": 0.134765625, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 2.0044, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.315018315018315, |
| "grad_norm": 0.154296875, |
| "learning_rate": 6.886446886446887e-06, |
| "loss": 1.9307, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.31868131868131866, |
| "grad_norm": 0.12109375, |
| "learning_rate": 6.84981684981685e-06, |
| "loss": 2.0896, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.32234432234432236, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.813186813186814e-06, |
| "loss": 2.0983, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.326007326007326, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 6.776556776556777e-06, |
| "loss": 1.9151, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.32967032967032966, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 6.739926739926741e-06, |
| "loss": 2.0818, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 6.703296703296703e-06, |
| "loss": 1.9896, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.336996336996337, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 2.1386, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.34065934065934067, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 6.63003663003663e-06, |
| "loss": 1.9473, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3443223443223443, |
| "grad_norm": 0.14453125, |
| "learning_rate": 6.5934065934065935e-06, |
| "loss": 2.1156, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.34798534798534797, |
| "grad_norm": 0.119140625, |
| "learning_rate": 6.556776556776557e-06, |
| "loss": 1.985, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3516483516483517, |
| "grad_norm": 0.140625, |
| "learning_rate": 6.5201465201465204e-06, |
| "loss": 2.0145, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3553113553113553, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 6.483516483516485e-06, |
| "loss": 2.0167, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 6.446886446886448e-06, |
| "loss": 1.8292, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3626373626373626, |
| "grad_norm": 0.125, |
| "learning_rate": 6.410256410256412e-06, |
| "loss": 1.9866, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3663003663003663, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.373626373626373e-06, |
| "loss": 1.7332, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.36996336996337, |
| "grad_norm": 0.125, |
| "learning_rate": 6.336996336996337e-06, |
| "loss": 1.9392, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.37362637362637363, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 6.300366300366301e-06, |
| "loss": 1.9127, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3772893772893773, |
| "grad_norm": 0.142578125, |
| "learning_rate": 6.2637362637362645e-06, |
| "loss": 2.1041, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 6.227106227106228e-06, |
| "loss": 2.0903, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 6.1904761904761914e-06, |
| "loss": 1.8948, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3882783882783883, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 1.9763, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.39194139194139194, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 6.117216117216118e-06, |
| "loss": 2.0464, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3956043956043956, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 6.080586080586081e-06, |
| "loss": 2.1199, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3992673992673993, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.043956043956044e-06, |
| "loss": 1.8225, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.40293040293040294, |
| "grad_norm": 0.12109375, |
| "learning_rate": 6.007326007326008e-06, |
| "loss": 2.0006, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4065934065934066, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 5.970695970695971e-06, |
| "loss": 2.0466, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 5.934065934065935e-06, |
| "loss": 1.9684, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4139194139194139, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 5.897435897435898e-06, |
| "loss": 1.9289, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4175824175824176, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 5.860805860805862e-06, |
| "loss": 2.035, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.42124542124542125, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 5.824175824175825e-06, |
| "loss": 2.2397, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4249084249084249, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 5.7875457875457885e-06, |
| "loss": 1.8636, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 5.750915750915751e-06, |
| "loss": 2.0132, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.43223443223443225, |
| "grad_norm": 0.16015625, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 2.0484, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 5.677655677655678e-06, |
| "loss": 2.0593, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 2.0788, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4432234432234432, |
| "grad_norm": 0.14453125, |
| "learning_rate": 5.604395604395605e-06, |
| "loss": 1.8239, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.4468864468864469, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 5.567765567765568e-06, |
| "loss": 2.1902, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.45054945054945056, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 5.531135531135532e-06, |
| "loss": 1.9744, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4542124542124542, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 5.494505494505495e-06, |
| "loss": 2.0978, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.45787545787545786, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.457875457875458e-06, |
| "loss": 1.9445, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 5.421245421245421e-06, |
| "loss": 1.92, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4652014652014652, |
| "grad_norm": 0.15234375, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 2.0029, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.46886446886446886, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 5.347985347985348e-06, |
| "loss": 2.0166, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4725274725274725, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 5.3113553113553116e-06, |
| "loss": 1.852, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 5.274725274725275e-06, |
| "loss": 2.1644, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.47985347985347987, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 5.2380952380952384e-06, |
| "loss": 2.1177, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4835164835164835, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.201465201465202e-06, |
| "loss": 2.0386, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 5.164835164835166e-06, |
| "loss": 2.058, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4908424908424908, |
| "grad_norm": 0.138671875, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 2.0366, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4945054945054945, |
| "grad_norm": 0.107421875, |
| "learning_rate": 5.091575091575091e-06, |
| "loss": 2.0057, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4981684981684982, |
| "grad_norm": 0.126953125, |
| "learning_rate": 5.054945054945055e-06, |
| "loss": 2.0425, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5018315018315018, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 5.018315018315018e-06, |
| "loss": 1.9883, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5054945054945055, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 4.9816849816849826e-06, |
| "loss": 2.0651, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5091575091575091, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 4.945054945054946e-06, |
| "loss": 2.047, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.1328125, |
| "learning_rate": 4.908424908424909e-06, |
| "loss": 2.0373, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5164835164835165, |
| "grad_norm": 0.16015625, |
| "learning_rate": 4.871794871794872e-06, |
| "loss": 1.7688, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5201465201465202, |
| "grad_norm": 0.138671875, |
| "learning_rate": 4.8351648351648355e-06, |
| "loss": 2.1753, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 4.798534798534799e-06, |
| "loss": 2.086, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.5274725274725275, |
| "grad_norm": 0.115234375, |
| "learning_rate": 4.761904761904762e-06, |
| "loss": 2.0105, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5311355311355311, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 4.725274725274726e-06, |
| "loss": 2.0646, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5347985347985348, |
| "grad_norm": 0.109375, |
| "learning_rate": 4.688644688644689e-06, |
| "loss": 1.9313, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.15234375, |
| "learning_rate": 4.652014652014653e-06, |
| "loss": 2.0475, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5421245421245421, |
| "grad_norm": 0.125, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 2.0117, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5457875457875457, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 4.578754578754579e-06, |
| "loss": 2.0851, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 4.542124542124542e-06, |
| "loss": 2.1002, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5531135531135531, |
| "grad_norm": 0.142578125, |
| "learning_rate": 4.505494505494506e-06, |
| "loss": 2.017, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5567765567765568, |
| "grad_norm": 0.130859375, |
| "learning_rate": 4.468864468864469e-06, |
| "loss": 1.9418, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5604395604395604, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 4.4322344322344325e-06, |
| "loss": 1.8559, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 4.395604395604396e-06, |
| "loss": 1.8791, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5677655677655677, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 4.358974358974359e-06, |
| "loss": 1.9635, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 4.322344322344323e-06, |
| "loss": 1.9865, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.575091575091575, |
| "grad_norm": 0.111328125, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 2.1464, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5787545787545788, |
| "grad_norm": 0.11328125, |
| "learning_rate": 4.249084249084249e-06, |
| "loss": 2.0168, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5824175824175825, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 4.212454212454213e-06, |
| "loss": 2.0145, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "grad_norm": 0.212890625, |
| "learning_rate": 4.175824175824177e-06, |
| "loss": 1.9969, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 4.139194139194139e-06, |
| "loss": 2.038, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5934065934065934, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 2.1425, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5970695970695971, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 4.065934065934066e-06, |
| "loss": 2.0305, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.6007326007326007, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 4.0293040293040296e-06, |
| "loss": 2.0674, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6043956043956044, |
| "grad_norm": 0.130859375, |
| "learning_rate": 3.992673992673993e-06, |
| "loss": 1.8832, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.608058608058608, |
| "grad_norm": 0.130859375, |
| "learning_rate": 3.9560439560439565e-06, |
| "loss": 1.9492, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6117216117216118, |
| "grad_norm": 0.138671875, |
| "learning_rate": 3.91941391941392e-06, |
| "loss": 1.9543, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.1171875, |
| "learning_rate": 3.882783882783883e-06, |
| "loss": 2.106, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 1.9179, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.6227106227106227, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 3.80952380952381e-06, |
| "loss": 1.9645, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6263736263736264, |
| "grad_norm": 0.142578125, |
| "learning_rate": 3.7728937728937733e-06, |
| "loss": 1.8276, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.63003663003663, |
| "grad_norm": 0.142578125, |
| "learning_rate": 3.7362637362637367e-06, |
| "loss": 2.0342, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6336996336996337, |
| "grad_norm": 0.123046875, |
| "learning_rate": 3.6996336996337e-06, |
| "loss": 1.9801, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.6373626373626373, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 3.663003663003663e-06, |
| "loss": 2.0429, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 3.6263736263736266e-06, |
| "loss": 2.1585, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6446886446886447, |
| "grad_norm": 0.138671875, |
| "learning_rate": 3.58974358974359e-06, |
| "loss": 1.6646, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6483516483516484, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.5531135531135535e-06, |
| "loss": 2.1162, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.652014652014652, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.516483516483517e-06, |
| "loss": 1.919, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6556776556776557, |
| "grad_norm": 0.126953125, |
| "learning_rate": 3.47985347985348e-06, |
| "loss": 2.0356, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6593406593406593, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.4432234432234434e-06, |
| "loss": 1.904, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.663003663003663, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 3.406593406593407e-06, |
| "loss": 2.1578, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.123046875, |
| "learning_rate": 3.3699633699633703e-06, |
| "loss": 2.0865, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6703296703296703, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.9698, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.673992673992674, |
| "grad_norm": 0.126953125, |
| "learning_rate": 3.2967032967032968e-06, |
| "loss": 1.926, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6776556776556777, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 3.2600732600732602e-06, |
| "loss": 2.0217, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6813186813186813, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.223443223443224e-06, |
| "loss": 2.0438, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.684981684981685, |
| "grad_norm": 0.1171875, |
| "learning_rate": 3.1868131868131867e-06, |
| "loss": 2.0881, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6886446886446886, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 3.1501831501831505e-06, |
| "loss": 1.9943, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.140625, |
| "learning_rate": 3.113553113553114e-06, |
| "loss": 1.9431, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6959706959706959, |
| "grad_norm": 0.1484375, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 2.1479, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6996336996336996, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 3.0402930402930405e-06, |
| "loss": 2.1206, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.7032967032967034, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 3.003663003663004e-06, |
| "loss": 1.9557, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.706959706959707, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 2.9670329670329673e-06, |
| "loss": 2.0169, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.7106227106227107, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 2.930402930402931e-06, |
| "loss": 2.08, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.1328125, |
| "learning_rate": 2.8937728937728942e-06, |
| "loss": 1.9894, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.9888, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.7216117216117216, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 2.8205128205128207e-06, |
| "loss": 2.0276, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.7252747252747253, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 2.783882783882784e-06, |
| "loss": 2.0774, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.7289377289377289, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 2.7472527472527476e-06, |
| "loss": 1.9885, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 2.7106227106227106e-06, |
| "loss": 2.0725, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7362637362637363, |
| "grad_norm": 0.140625, |
| "learning_rate": 2.673992673992674e-06, |
| "loss": 2.1302, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.73992673992674, |
| "grad_norm": 0.83984375, |
| "learning_rate": 2.6373626373626375e-06, |
| "loss": 2.0621, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 0.134765625, |
| "learning_rate": 2.600732600732601e-06, |
| "loss": 1.8663, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.7472527472527473, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 2.0237, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.7509157509157509, |
| "grad_norm": 0.12890625, |
| "learning_rate": 2.5274725274725274e-06, |
| "loss": 2.0746, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7545787545787546, |
| "grad_norm": 0.14453125, |
| "learning_rate": 2.4908424908424913e-06, |
| "loss": 2.0825, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.7582417582417582, |
| "grad_norm": 0.12109375, |
| "learning_rate": 2.4542124542124543e-06, |
| "loss": 1.9998, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 2.4175824175824177e-06, |
| "loss": 1.8544, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7655677655677655, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 2.380952380952381e-06, |
| "loss": 1.9558, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 2.3443223443223446e-06, |
| "loss": 1.954, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7728937728937729, |
| "grad_norm": 0.1171875, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 1.8657, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7765567765567766, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 2.271062271062271e-06, |
| "loss": 1.9396, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7802197802197802, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 2.2344322344322345e-06, |
| "loss": 2.1369, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7838827838827839, |
| "grad_norm": 0.11376953125, |
| "learning_rate": 2.197802197802198e-06, |
| "loss": 2.1242, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7875457875457875, |
| "grad_norm": 0.125, |
| "learning_rate": 2.1611721611721614e-06, |
| "loss": 1.9287, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7912087912087912, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 2.1245421245421245e-06, |
| "loss": 1.8351, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 2.0879120879120883e-06, |
| "loss": 2.0038, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7985347985347986, |
| "grad_norm": 0.134765625, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 1.9222, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.8021978021978022, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 2.0146520146520148e-06, |
| "loss": 2.0722, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.8058608058608059, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 1.9780219780219782e-06, |
| "loss": 2.1235, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 1.9413919413919417e-06, |
| "loss": 2.0746, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.8131868131868132, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 1.904761904761905e-06, |
| "loss": 2.1134, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.8168498168498168, |
| "grad_norm": 0.13671875, |
| "learning_rate": 1.8681318681318684e-06, |
| "loss": 1.8974, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 0.123046875, |
| "learning_rate": 1.8315018315018316e-06, |
| "loss": 2.0427, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.8241758241758241, |
| "grad_norm": 0.146484375, |
| "learning_rate": 1.794871794871795e-06, |
| "loss": 2.0288, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8278388278388278, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 1.7582417582417585e-06, |
| "loss": 1.9075, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.8315018315018315, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 1.7216117216117217e-06, |
| "loss": 1.9775, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.8351648351648352, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 1.6849816849816852e-06, |
| "loss": 2.1007, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.8388278388278388, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 1.6483516483516484e-06, |
| "loss": 1.9674, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.8424908424908425, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 1.611721611721612e-06, |
| "loss": 1.9472, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.138671875, |
| "learning_rate": 1.5750915750915753e-06, |
| "loss": 1.9424, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.8498168498168498, |
| "grad_norm": 0.154296875, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 1.8776, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.8534798534798534, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 1.501831501831502e-06, |
| "loss": 1.9734, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 1.4652014652014654e-06, |
| "loss": 2.1264, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.8608058608058609, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.9834, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8644688644688645, |
| "grad_norm": 0.125, |
| "learning_rate": 1.391941391941392e-06, |
| "loss": 2.0317, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8681318681318682, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 1.3553113553113553e-06, |
| "loss": 2.1196, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 1.3186813186813187e-06, |
| "loss": 2.0782, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8754578754578755, |
| "grad_norm": 0.1171875, |
| "learning_rate": 1.282051282051282e-06, |
| "loss": 2.1013, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 1.2454212454212456e-06, |
| "loss": 2.0802, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8827838827838828, |
| "grad_norm": 0.115234375, |
| "learning_rate": 1.2087912087912089e-06, |
| "loss": 2.0424, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8864468864468864, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 1.1721611721611723e-06, |
| "loss": 2.0878, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8901098901098901, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 1.1355311355311355e-06, |
| "loss": 2.0501, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8937728937728938, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.098901098901099e-06, |
| "loss": 1.8274, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 1.0622710622710622e-06, |
| "loss": 2.0722, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9010989010989011, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 1.9583, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 9.890109890109891e-07, |
| "loss": 2.0437, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.9084249084249084, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 1.9011, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.9120879120879121, |
| "grad_norm": 0.13671875, |
| "learning_rate": 9.157509157509158e-07, |
| "loss": 1.9985, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.9157509157509157, |
| "grad_norm": 0.1328125, |
| "learning_rate": 8.791208791208792e-07, |
| "loss": 2.1273, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9194139194139194, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 8.424908424908426e-07, |
| "loss": 2.1925, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 8.05860805860806e-07, |
| "loss": 2.0017, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.9267399267399268, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 2.1022, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.9304029304029304, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 7.326007326007327e-07, |
| "loss": 2.2094, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.9340659340659341, |
| "grad_norm": 0.13671875, |
| "learning_rate": 6.95970695970696e-07, |
| "loss": 1.667, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9377289377289377, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 6.593406593406594e-07, |
| "loss": 2.0756, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.9413919413919414, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.227106227106228e-07, |
| "loss": 1.9019, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.945054945054945, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 5.860805860805862e-07, |
| "loss": 2.1588, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 0.142578125, |
| "learning_rate": 5.494505494505495e-07, |
| "loss": 1.9677, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 1.8556, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9560439560439561, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 4.7619047619047623e-07, |
| "loss": 2.0772, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.9597069597069597, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 4.395604395604396e-07, |
| "loss": 2.0482, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.9633699633699634, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 4.02930402930403e-07, |
| "loss": 2.0276, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.967032967032967, |
| "grad_norm": 0.130859375, |
| "learning_rate": 3.6630036630036635e-07, |
| "loss": 2.037, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9706959706959707, |
| "grad_norm": 0.115234375, |
| "learning_rate": 3.296703296703297e-07, |
| "loss": 2.1114, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 2.930402930402931e-07, |
| "loss": 2.0338, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.978021978021978, |
| "grad_norm": 0.107421875, |
| "learning_rate": 2.564102564102564e-07, |
| "loss": 2.0252, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9816849816849816, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 2.197802197802198e-07, |
| "loss": 1.9975, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9853479853479854, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 1.8315018315018317e-07, |
| "loss": 2.1205, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.989010989010989, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 1.4652014652014654e-07, |
| "loss": 1.9853, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9926739926739927, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.098901098901099e-07, |
| "loss": 2.0503, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9963369963369964, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 7.326007326007327e-08, |
| "loss": 1.9472, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 3.6630036630036635e-08, |
| "loss": 2.2143, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.0474956035614014, |
| "eval_runtime": 10.4605, |
| "eval_samples_per_second": 2.868, |
| "eval_steps_per_second": 0.382, |
| "step": 273 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 273, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.435856337025106e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|