| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 273, |
| "global_step": 273, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003663003663003663, |
| "grad_norm": 0.28515625, |
| "learning_rate": 1e-05, |
| "loss": 2.4491, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007326007326007326, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.963369963369965e-06, |
| "loss": 2.1232, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01098901098901099, |
| "grad_norm": 0.25390625, |
| "learning_rate": 9.926739926739928e-06, |
| "loss": 2.1596, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.014652014652014652, |
| "grad_norm": 0.33203125, |
| "learning_rate": 9.890109890109892e-06, |
| "loss": 2.3652, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.018315018315018316, |
| "grad_norm": 0.48828125, |
| "learning_rate": 9.853479853479855e-06, |
| "loss": 2.0435, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02197802197802198, |
| "grad_norm": 0.263671875, |
| "learning_rate": 9.816849816849817e-06, |
| "loss": 2.0464, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 0.25390625, |
| "learning_rate": 9.780219780219781e-06, |
| "loss": 2.2135, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.029304029304029304, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 2.1267, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03296703296703297, |
| "grad_norm": 0.283203125, |
| "learning_rate": 9.706959706959708e-06, |
| "loss": 1.9772, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "grad_norm": 0.287109375, |
| "learning_rate": 9.670329670329671e-06, |
| "loss": 2.2541, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.040293040293040296, |
| "grad_norm": 0.259765625, |
| "learning_rate": 9.633699633699635e-06, |
| "loss": 2.0382, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04395604395604396, |
| "grad_norm": 0.26171875, |
| "learning_rate": 9.597069597069598e-06, |
| "loss": 2.0414, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.560439560439562e-06, |
| "loss": 2.0746, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 0.267578125, |
| "learning_rate": 9.523809523809525e-06, |
| "loss": 2.2324, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.054945054945054944, |
| "grad_norm": 0.361328125, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 1.9576, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05860805860805861, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.450549450549452e-06, |
| "loss": 2.0349, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06227106227106227, |
| "grad_norm": 0.240234375, |
| "learning_rate": 9.413919413919414e-06, |
| "loss": 2.0768, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06593406593406594, |
| "grad_norm": 0.232421875, |
| "learning_rate": 9.377289377289379e-06, |
| "loss": 2.133, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0695970695970696, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 9.340659340659341e-06, |
| "loss": 2.0385, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "grad_norm": 0.267578125, |
| "learning_rate": 9.304029304029305e-06, |
| "loss": 2.0351, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 9.267399267399268e-06, |
| "loss": 2.1407, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.08058608058608059, |
| "grad_norm": 0.16015625, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 2.1146, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.08424908424908426, |
| "grad_norm": 0.16796875, |
| "learning_rate": 9.194139194139195e-06, |
| "loss": 1.968, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08791208791208792, |
| "grad_norm": 0.1875, |
| "learning_rate": 9.157509157509158e-06, |
| "loss": 2.1025, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.09157509157509157, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 9.120879120879122e-06, |
| "loss": 2.1392, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.140625, |
| "learning_rate": 9.084249084249084e-06, |
| "loss": 2.1334, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0989010989010989, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": 2.0976, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 0.23046875, |
| "learning_rate": 9.010989010989011e-06, |
| "loss": 2.1485, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10622710622710622, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 1.9981, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 8.937728937728938e-06, |
| "loss": 2.1841, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11355311355311355, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 8.9010989010989e-06, |
| "loss": 1.8101, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.11721611721611722, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.864468864468865e-06, |
| "loss": 2.2158, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.12087912087912088, |
| "grad_norm": 0.17578125, |
| "learning_rate": 8.827838827838828e-06, |
| "loss": 1.946, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.12454212454212454, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 8.791208791208792e-06, |
| "loss": 2.0068, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 0.134765625, |
| "learning_rate": 8.754578754578755e-06, |
| "loss": 2.0567, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.13186813186813187, |
| "grad_norm": 0.146484375, |
| "learning_rate": 8.717948717948719e-06, |
| "loss": 2.0229, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.13553113553113552, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.681318681318681e-06, |
| "loss": 2.1425, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1391941391941392, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 8.644688644688646e-06, |
| "loss": 2.0344, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 8.60805860805861e-06, |
| "loss": 2.0068, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 0.224609375, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.8943, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15018315018315018, |
| "grad_norm": 0.14453125, |
| "learning_rate": 8.534798534798535e-06, |
| "loss": 2.1059, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 8.498168498168498e-06, |
| "loss": 1.907, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1575091575091575, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 2.1064, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.16117216117216118, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 8.424908424908426e-06, |
| "loss": 2.0486, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.16483516483516483, |
| "grad_norm": 0.115234375, |
| "learning_rate": 8.388278388278389e-06, |
| "loss": 2.1213, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1684981684981685, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.351648351648353e-06, |
| "loss": 2.1197, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.17216117216117216, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.315018315018316e-06, |
| "loss": 1.9972, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.17582417582417584, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 8.278388278388278e-06, |
| "loss": 1.7985, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 0.150390625, |
| "learning_rate": 8.241758241758243e-06, |
| "loss": 2.1753, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "grad_norm": 0.17578125, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 2.1589, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18681318681318682, |
| "grad_norm": 0.130859375, |
| "learning_rate": 8.16849816849817e-06, |
| "loss": 2.0744, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.134765625, |
| "learning_rate": 8.131868131868132e-06, |
| "loss": 2.1888, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.19413919413919414, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 8.095238095238097e-06, |
| "loss": 1.9494, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1978021978021978, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.058608058608059e-06, |
| "loss": 1.8701, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.20146520146520147, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 8.021978021978023e-06, |
| "loss": 2.0446, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.985347985347986e-06, |
| "loss": 2.03, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2087912087912088, |
| "grad_norm": 0.12890625, |
| "learning_rate": 7.948717948717949e-06, |
| "loss": 1.9154, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.21245421245421245, |
| "grad_norm": 0.236328125, |
| "learning_rate": 7.912087912087913e-06, |
| "loss": 2.1308, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.21611721611721613, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 7.875457875457876e-06, |
| "loss": 2.1953, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.83882783882784e-06, |
| "loss": 2.0663, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22344322344322345, |
| "grad_norm": 0.130859375, |
| "learning_rate": 7.802197802197802e-06, |
| "loss": 1.838, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2271062271062271, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 7.765567765567767e-06, |
| "loss": 1.9511, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.115234375, |
| "learning_rate": 7.72893772893773e-06, |
| "loss": 2.0857, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.23443223443223443, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 1.8574, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 7.655677655677656e-06, |
| "loss": 2.047, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.24175824175824176, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.61904761904762e-06, |
| "loss": 2.0204, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2454212454212454, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.582417582417583e-06, |
| "loss": 2.0828, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2490842490842491, |
| "grad_norm": 0.15625, |
| "learning_rate": 7.5457875457875465e-06, |
| "loss": 2.02, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.25274725274725274, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 7.50915750915751e-06, |
| "loss": 1.8749, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 0.154296875, |
| "learning_rate": 7.472527472527473e-06, |
| "loss": 1.9276, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2600732600732601, |
| "grad_norm": 0.12109375, |
| "learning_rate": 7.435897435897437e-06, |
| "loss": 2.2043, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.26373626373626374, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 7.3992673992674e-06, |
| "loss": 1.7546, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2673992673992674, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.362637362637364e-06, |
| "loss": 1.9699, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.27106227106227104, |
| "grad_norm": 0.119140625, |
| "learning_rate": 7.326007326007326e-06, |
| "loss": 2.1321, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.27472527472527475, |
| "grad_norm": 0.12109375, |
| "learning_rate": 7.28937728937729e-06, |
| "loss": 2.0409, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2783882783882784, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.252747252747253e-06, |
| "loss": 2.0202, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 7.216117216117217e-06, |
| "loss": 2.0567, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.111328125, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 1.9711, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2893772893772894, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 2.0943, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.29304029304029305, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.106227106227107e-06, |
| "loss": 2.1033, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2967032967032967, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 7.0695970695970705e-06, |
| "loss": 2.0933, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.30036630036630035, |
| "grad_norm": 0.123046875, |
| "learning_rate": 7.032967032967034e-06, |
| "loss": 1.9606, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.304029304029304, |
| "grad_norm": 0.287109375, |
| "learning_rate": 6.9963369963369965e-06, |
| "loss": 2.0986, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 6.95970695970696e-06, |
| "loss": 2.0522, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.31135531135531136, |
| "grad_norm": 0.14453125, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 2.0038, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.315018315018315, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 6.886446886446887e-06, |
| "loss": 1.93, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.31868131868131866, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 6.84981684981685e-06, |
| "loss": 2.0896, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.32234432234432236, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 6.813186813186814e-06, |
| "loss": 2.098, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.326007326007326, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 6.776556776556777e-06, |
| "loss": 1.9145, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.32967032967032966, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 6.739926739926741e-06, |
| "loss": 2.0812, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.115234375, |
| "learning_rate": 6.703296703296703e-06, |
| "loss": 1.9889, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.336996336996337, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 2.1382, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.34065934065934067, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 6.63003663003663e-06, |
| "loss": 1.9468, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3443223443223443, |
| "grad_norm": 0.1953125, |
| "learning_rate": 6.5934065934065935e-06, |
| "loss": 2.1151, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.34798534798534797, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 6.556776556776557e-06, |
| "loss": 1.9844, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3516483516483517, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.5201465201465204e-06, |
| "loss": 2.0139, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3553113553113553, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 6.483516483516485e-06, |
| "loss": 2.0162, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 0.1328125, |
| "learning_rate": 6.446886446886448e-06, |
| "loss": 1.8283, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3626373626373626, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 6.410256410256412e-06, |
| "loss": 1.986, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3663003663003663, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 6.373626373626373e-06, |
| "loss": 1.7326, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.36996336996337, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 6.336996336996337e-06, |
| "loss": 1.9385, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.37362637362637363, |
| "grad_norm": 0.12890625, |
| "learning_rate": 6.300366300366301e-06, |
| "loss": 1.912, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3772893772893773, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.2637362637362645e-06, |
| "loss": 2.1039, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 6.227106227106228e-06, |
| "loss": 2.0899, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 6.1904761904761914e-06, |
| "loss": 1.8942, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3882783882783883, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 1.9758, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.39194139194139194, |
| "grad_norm": 0.10498046875, |
| "learning_rate": 6.117216117216118e-06, |
| "loss": 2.0461, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3956043956043956, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.080586080586081e-06, |
| "loss": 2.1192, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3992673992673993, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 6.043956043956044e-06, |
| "loss": 1.8223, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.40293040293040294, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 6.007326007326008e-06, |
| "loss": 1.9999, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4065934065934066, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 5.970695970695971e-06, |
| "loss": 2.0462, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 5.934065934065935e-06, |
| "loss": 1.968, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4139194139194139, |
| "grad_norm": 0.125, |
| "learning_rate": 5.897435897435898e-06, |
| "loss": 1.9285, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4175824175824176, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 5.860805860805862e-06, |
| "loss": 2.0344, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.42124542124542125, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 5.824175824175825e-06, |
| "loss": 2.2391, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4249084249084249, |
| "grad_norm": 0.12890625, |
| "learning_rate": 5.7875457875457885e-06, |
| "loss": 1.8632, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 5.750915750915751e-06, |
| "loss": 2.0129, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.43223443223443225, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 2.048, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 5.677655677655678e-06, |
| "loss": 2.0589, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 2.0783, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4432234432234432, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 5.604395604395605e-06, |
| "loss": 1.8234, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.4468864468864469, |
| "grad_norm": 0.126953125, |
| "learning_rate": 5.567765567765568e-06, |
| "loss": 2.1897, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.45054945054945056, |
| "grad_norm": 0.1328125, |
| "learning_rate": 5.531135531135532e-06, |
| "loss": 1.9739, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4542124542124542, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 5.494505494505495e-06, |
| "loss": 2.0972, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.45787545787545786, |
| "grad_norm": 0.140625, |
| "learning_rate": 5.457875457875458e-06, |
| "loss": 1.944, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.421245421245421e-06, |
| "loss": 1.9197, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4652014652014652, |
| "grad_norm": 0.126953125, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 2.0027, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.46886446886446886, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.347985347985348e-06, |
| "loss": 2.0159, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4725274725274725, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 5.3113553113553116e-06, |
| "loss": 1.8513, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 5.274725274725275e-06, |
| "loss": 2.1638, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.47985347985347987, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 5.2380952380952384e-06, |
| "loss": 2.1176, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4835164835164835, |
| "grad_norm": 0.236328125, |
| "learning_rate": 5.201465201465202e-06, |
| "loss": 2.0379, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 0.10546875, |
| "learning_rate": 5.164835164835166e-06, |
| "loss": 2.0576, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4908424908424908, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 2.0361, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4945054945054945, |
| "grad_norm": 0.10595703125, |
| "learning_rate": 5.091575091575091e-06, |
| "loss": 2.0052, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4981684981684982, |
| "grad_norm": 0.123046875, |
| "learning_rate": 5.054945054945055e-06, |
| "loss": 2.042, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5018315018315018, |
| "grad_norm": 0.1171875, |
| "learning_rate": 5.018315018315018e-06, |
| "loss": 1.9879, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5054945054945055, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 4.9816849816849826e-06, |
| "loss": 2.0647, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5091575091575091, |
| "grad_norm": 0.12109375, |
| "learning_rate": 4.945054945054946e-06, |
| "loss": 2.0464, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.140625, |
| "learning_rate": 4.908424908424909e-06, |
| "loss": 2.037, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5164835164835165, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 4.871794871794872e-06, |
| "loss": 1.7685, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5201465201465202, |
| "grad_norm": 0.12890625, |
| "learning_rate": 4.8351648351648355e-06, |
| "loss": 2.1747, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 0.126953125, |
| "learning_rate": 4.798534798534799e-06, |
| "loss": 2.0855, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.5274725274725275, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 4.761904761904762e-06, |
| "loss": 2.0099, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5311355311355311, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 4.725274725274726e-06, |
| "loss": 2.0641, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5347985347985348, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 4.688644688644689e-06, |
| "loss": 1.9309, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 4.652014652014653e-06, |
| "loss": 2.047, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5421245421245421, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 2.0111, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5457875457875457, |
| "grad_norm": 0.126953125, |
| "learning_rate": 4.578754578754579e-06, |
| "loss": 2.0847, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 0.119140625, |
| "learning_rate": 4.542124542124542e-06, |
| "loss": 2.0996, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5531135531135531, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 4.505494505494506e-06, |
| "loss": 2.0165, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5567765567765568, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 4.468864468864469e-06, |
| "loss": 1.9413, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5604395604395604, |
| "grad_norm": 0.173828125, |
| "learning_rate": 4.4322344322344325e-06, |
| "loss": 1.8553, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 4.395604395604396e-06, |
| "loss": 1.8789, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5677655677655677, |
| "grad_norm": 0.14453125, |
| "learning_rate": 4.358974358974359e-06, |
| "loss": 1.9633, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 4.322344322344323e-06, |
| "loss": 1.9861, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.575091575091575, |
| "grad_norm": 0.109375, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 2.146, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5787545787545788, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 4.249084249084249e-06, |
| "loss": 2.0163, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5824175824175825, |
| "grad_norm": 0.119140625, |
| "learning_rate": 4.212454212454213e-06, |
| "loss": 2.0142, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5860805860805861, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 4.175824175824177e-06, |
| "loss": 1.9965, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 4.139194139194139e-06, |
| "loss": 2.0379, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5934065934065934, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 2.1424, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5970695970695971, |
| "grad_norm": 0.1328125, |
| "learning_rate": 4.065934065934066e-06, |
| "loss": 2.0301, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.6007326007326007, |
| "grad_norm": 0.134765625, |
| "learning_rate": 4.0293040293040296e-06, |
| "loss": 2.067, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6043956043956044, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 3.992673992673993e-06, |
| "loss": 1.8831, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.608058608058608, |
| "grad_norm": 0.61328125, |
| "learning_rate": 3.9560439560439565e-06, |
| "loss": 1.9485, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6117216117216118, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.91941391941392e-06, |
| "loss": 1.9539, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 3.882783882783883e-06, |
| "loss": 2.1058, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 1.9176, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.6227106227106227, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 3.80952380952381e-06, |
| "loss": 1.964, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6263736263736264, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 3.7728937728937733e-06, |
| "loss": 1.827, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.63003663003663, |
| "grad_norm": 0.138671875, |
| "learning_rate": 3.7362637362637367e-06, |
| "loss": 2.0337, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6336996336996337, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 3.6996336996337e-06, |
| "loss": 1.9798, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.6373626373626373, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 3.663003663003663e-06, |
| "loss": 2.0426, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 3.6263736263736266e-06, |
| "loss": 2.1581, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6446886446886447, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.58974358974359e-06, |
| "loss": 1.6641, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6483516483516484, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 3.5531135531135535e-06, |
| "loss": 2.1157, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.652014652014652, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 3.516483516483517e-06, |
| "loss": 1.9187, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6556776556776557, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.47985347985348e-06, |
| "loss": 2.0353, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6593406593406593, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 3.4432234432234434e-06, |
| "loss": 1.9037, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.663003663003663, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 3.406593406593407e-06, |
| "loss": 2.1575, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.12109375, |
| "learning_rate": 3.3699633699633703e-06, |
| "loss": 2.0865, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6703296703296703, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.9694, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.673992673992674, |
| "grad_norm": 0.125, |
| "learning_rate": 3.2967032967032968e-06, |
| "loss": 1.9256, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6776556776556777, |
| "grad_norm": 0.109375, |
| "learning_rate": 3.2600732600732602e-06, |
| "loss": 2.0214, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6813186813186813, |
| "grad_norm": 0.13671875, |
| "learning_rate": 3.223443223443224e-06, |
| "loss": 2.0439, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.684981684981685, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 3.1868131868131867e-06, |
| "loss": 2.0877, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6886446886446886, |
| "grad_norm": 0.12109375, |
| "learning_rate": 3.1501831501831505e-06, |
| "loss": 1.9943, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 3.113553113553114e-06, |
| "loss": 1.9425, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6959706959706959, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 2.1475, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6996336996336996, |
| "grad_norm": 0.119140625, |
| "learning_rate": 3.0402930402930405e-06, |
| "loss": 2.12, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.7032967032967034, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 3.003663003663004e-06, |
| "loss": 1.9553, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.706959706959707, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 2.9670329670329673e-06, |
| "loss": 2.0165, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.7106227106227107, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 2.930402930402931e-06, |
| "loss": 2.0797, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 2.8937728937728942e-06, |
| "loss": 1.9891, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 0.125, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.9883, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.7216117216117216, |
| "grad_norm": 0.115234375, |
| "learning_rate": 2.8205128205128207e-06, |
| "loss": 2.0272, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.7252747252747253, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 2.783882783882784e-06, |
| "loss": 2.0771, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.7289377289377289, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 2.7472527472527476e-06, |
| "loss": 1.9882, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 0.111328125, |
| "learning_rate": 2.7106227106227106e-06, |
| "loss": 2.0723, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7362637362637363, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.673992673992674e-06, |
| "loss": 2.13, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.73992673992674, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.6373626373626375e-06, |
| "loss": 2.0619, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 2.600732600732601e-06, |
| "loss": 1.8662, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.7472527472527473, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 2.023, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.7509157509157509, |
| "grad_norm": 0.201171875, |
| "learning_rate": 2.5274725274725274e-06, |
| "loss": 2.0744, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7545787545787546, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.4908424908424913e-06, |
| "loss": 2.0821, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.7582417582417582, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 2.4542124542124543e-06, |
| "loss": 1.9993, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 2.4175824175824177e-06, |
| "loss": 1.8539, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7655677655677655, |
| "grad_norm": 0.11376953125, |
| "learning_rate": 2.380952380952381e-06, |
| "loss": 1.9552, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 2.3443223443223446e-06, |
| "loss": 1.9538, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7728937728937729, |
| "grad_norm": 0.8046875, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 1.8655, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7765567765567766, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 2.271062271062271e-06, |
| "loss": 1.9395, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7802197802197802, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 2.2344322344322345e-06, |
| "loss": 2.1366, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7838827838827839, |
| "grad_norm": 0.111328125, |
| "learning_rate": 2.197802197802198e-06, |
| "loss": 2.1238, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7875457875457875, |
| "grad_norm": 0.12890625, |
| "learning_rate": 2.1611721611721614e-06, |
| "loss": 1.9283, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7912087912087912, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 2.1245421245421245e-06, |
| "loss": 1.8349, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 2.0879120879120883e-06, |
| "loss": 2.0036, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7985347985347986, |
| "grad_norm": 0.16015625, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 1.9215, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.8021978021978022, |
| "grad_norm": 0.119140625, |
| "learning_rate": 2.0146520146520148e-06, |
| "loss": 2.0719, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.8058608058608059, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 1.9780219780219782e-06, |
| "loss": 2.1231, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 0.1171875, |
| "learning_rate": 1.9413919413919417e-06, |
| "loss": 2.0741, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.8131868131868132, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 1.904761904761905e-06, |
| "loss": 2.1134, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.8168498168498168, |
| "grad_norm": 0.12890625, |
| "learning_rate": 1.8681318681318684e-06, |
| "loss": 1.8974, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 1.8315018315018316e-06, |
| "loss": 2.0421, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.8241758241758241, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 1.794871794871795e-06, |
| "loss": 2.0287, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8278388278388278, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.7582417582417585e-06, |
| "loss": 1.9072, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.8315018315018315, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 1.7216117216117217e-06, |
| "loss": 1.9773, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.8351648351648352, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 1.6849816849816852e-06, |
| "loss": 2.1004, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.8388278388278388, |
| "grad_norm": 0.109375, |
| "learning_rate": 1.6483516483516484e-06, |
| "loss": 1.9671, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.8424908424908425, |
| "grad_norm": 0.14453125, |
| "learning_rate": 1.611721611721612e-06, |
| "loss": 1.9465, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 1.5750915750915753e-06, |
| "loss": 1.9421, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.8498168498168498, |
| "grad_norm": 0.15234375, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 1.8772, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.8534798534798534, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 1.501831501831502e-06, |
| "loss": 1.9729, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 1.4652014652014654e-06, |
| "loss": 2.1261, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.8608058608058609, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.9833, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8644688644688645, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 1.391941391941392e-06, |
| "loss": 2.0312, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8681318681318682, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 1.3553113553113553e-06, |
| "loss": 2.1189, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 0.13671875, |
| "learning_rate": 1.3186813186813187e-06, |
| "loss": 2.0782, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8754578754578755, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 1.282051282051282e-06, |
| "loss": 2.101, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 1.2454212454212456e-06, |
| "loss": 2.0797, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8827838827838828, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 1.2087912087912089e-06, |
| "loss": 2.0421, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8864468864468864, |
| "grad_norm": 0.11376953125, |
| "learning_rate": 1.1721611721611723e-06, |
| "loss": 2.0875, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8901098901098901, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 1.1355311355311355e-06, |
| "loss": 2.0497, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8937728937728938, |
| "grad_norm": 0.1640625, |
| "learning_rate": 1.098901098901099e-06, |
| "loss": 1.8271, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 0.09912109375, |
| "learning_rate": 1.0622710622710622e-06, |
| "loss": 2.0719, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9010989010989011, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 1.9579, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 9.890109890109891e-07, |
| "loss": 2.0435, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.9084249084249084, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 1.9005, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.9120879120879121, |
| "grad_norm": 0.126953125, |
| "learning_rate": 9.157509157509158e-07, |
| "loss": 1.998, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.9157509157509157, |
| "grad_norm": 0.146484375, |
| "learning_rate": 8.791208791208792e-07, |
| "loss": 2.1269, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9194139194139194, |
| "grad_norm": 0.126953125, |
| "learning_rate": 8.424908424908426e-07, |
| "loss": 2.192, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.443359375, |
| "learning_rate": 8.05860805860806e-07, |
| "loss": 2.0013, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.9267399267399268, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 2.1018, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.9304029304029304, |
| "grad_norm": 0.150390625, |
| "learning_rate": 7.326007326007327e-07, |
| "loss": 2.2087, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.9340659340659341, |
| "grad_norm": 0.14453125, |
| "learning_rate": 6.95970695970696e-07, |
| "loss": 1.6666, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9377289377289377, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 6.593406593406594e-07, |
| "loss": 2.0752, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.9413919413919414, |
| "grad_norm": 0.1328125, |
| "learning_rate": 6.227106227106228e-07, |
| "loss": 1.9017, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.945054945054945, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 5.860805860805862e-07, |
| "loss": 2.1586, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 0.12890625, |
| "learning_rate": 5.494505494505495e-07, |
| "loss": 1.9673, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 1.8555, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9560439560439561, |
| "grad_norm": 0.10595703125, |
| "learning_rate": 4.7619047619047623e-07, |
| "loss": 2.0766, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.9597069597069597, |
| "grad_norm": 0.12890625, |
| "learning_rate": 4.395604395604396e-07, |
| "loss": 2.0478, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.9633699633699634, |
| "grad_norm": 0.150390625, |
| "learning_rate": 4.02930402930403e-07, |
| "loss": 2.0272, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.967032967032967, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 3.6630036630036635e-07, |
| "loss": 2.0367, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9706959706959707, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 3.296703296703297e-07, |
| "loss": 2.1111, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 2.930402930402931e-07, |
| "loss": 2.0334, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.978021978021978, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 2.564102564102564e-07, |
| "loss": 2.0251, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9816849816849816, |
| "grad_norm": 0.119140625, |
| "learning_rate": 2.197802197802198e-07, |
| "loss": 1.9969, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9853479853479854, |
| "grad_norm": 0.123046875, |
| "learning_rate": 1.8315018315018317e-07, |
| "loss": 2.1203, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.989010989010989, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 1.4652014652014654e-07, |
| "loss": 1.9849, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9926739926739927, |
| "grad_norm": 0.251953125, |
| "learning_rate": 1.098901098901099e-07, |
| "loss": 2.0497, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9963369963369964, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 7.326007326007327e-08, |
| "loss": 1.9467, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 3.6630036630036635e-08, |
| "loss": 2.2138, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.047095537185669, |
| "eval_runtime": 10.3149, |
| "eval_samples_per_second": 2.908, |
| "eval_steps_per_second": 0.388, |
| "step": 273 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 273, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.435856337025106e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|