| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9867256637168142, |
| "eval_steps": 500, |
| "global_step": 225, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01327433628318584, |
| "grad_norm": 7.501493793577306, |
| "learning_rate": 4.347826086956522e-07, |
| "loss": 1.2083, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02654867256637168, |
| "grad_norm": 7.69893637940224, |
| "learning_rate": 8.695652173913044e-07, |
| "loss": 1.2329, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03982300884955752, |
| "grad_norm": 7.606118479356691, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 1.2323, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05309734513274336, |
| "grad_norm": 7.175916361320481, |
| "learning_rate": 1.7391304347826088e-06, |
| "loss": 1.1988, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06637168141592921, |
| "grad_norm": 6.935826297903381, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 1.2046, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07964601769911504, |
| "grad_norm": 5.549054007536664, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 1.1353, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09292035398230089, |
| "grad_norm": 5.031745386989603, |
| "learning_rate": 3.043478260869566e-06, |
| "loss": 1.1348, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.10619469026548672, |
| "grad_norm": 2.737042220164658, |
| "learning_rate": 3.4782608695652175e-06, |
| "loss": 1.0683, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.11946902654867257, |
| "grad_norm": 2.3132871718286463, |
| "learning_rate": 3.91304347826087e-06, |
| "loss": 1.0536, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 2.7666200466111754, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 1.0506, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14601769911504425, |
| "grad_norm": 4.090979262135771, |
| "learning_rate": 4.782608695652174e-06, |
| "loss": 1.0238, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1592920353982301, |
| "grad_norm": 4.142166776364045, |
| "learning_rate": 5.2173913043478265e-06, |
| "loss": 1.0412, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.17256637168141592, |
| "grad_norm": 3.79529965630404, |
| "learning_rate": 5.652173913043479e-06, |
| "loss": 1.0377, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.18584070796460178, |
| "grad_norm": 2.6054414904303718, |
| "learning_rate": 6.086956521739132e-06, |
| "loss": 0.9789, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.19911504424778761, |
| "grad_norm": 2.627668862856322, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 0.9534, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.21238938053097345, |
| "grad_norm": 2.1366957452962083, |
| "learning_rate": 6.956521739130435e-06, |
| "loss": 0.9418, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.22566371681415928, |
| "grad_norm": 1.6354470043380167, |
| "learning_rate": 7.391304347826087e-06, |
| "loss": 0.951, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.23893805309734514, |
| "grad_norm": 1.4598328197110417, |
| "learning_rate": 7.82608695652174e-06, |
| "loss": 0.9296, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.252212389380531, |
| "grad_norm": 1.5519981091962836, |
| "learning_rate": 8.260869565217392e-06, |
| "loss": 0.9174, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 1.4414501159063233, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 0.8893, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.27876106194690264, |
| "grad_norm": 1.2559447173533318, |
| "learning_rate": 9.130434782608697e-06, |
| "loss": 0.8833, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2920353982300885, |
| "grad_norm": 1.202388398042244, |
| "learning_rate": 9.565217391304349e-06, |
| "loss": 0.8773, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3053097345132743, |
| "grad_norm": 1.0994174756781598, |
| "learning_rate": 1e-05, |
| "loss": 0.8674, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3185840707964602, |
| "grad_norm": 0.9262699978897617, |
| "learning_rate": 9.999395316300748e-06, |
| "loss": 0.8848, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.33185840707964603, |
| "grad_norm": 1.0063568356700823, |
| "learning_rate": 9.99758141145994e-06, |
| "loss": 0.8717, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.34513274336283184, |
| "grad_norm": 0.7838127318380594, |
| "learning_rate": 9.994558724213056e-06, |
| "loss": 0.8578, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3584070796460177, |
| "grad_norm": 0.7604334069843581, |
| "learning_rate": 9.990327985667972e-06, |
| "loss": 0.8576, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.37168141592920356, |
| "grad_norm": 0.7392944593702723, |
| "learning_rate": 9.984890219128148e-06, |
| "loss": 0.861, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.38495575221238937, |
| "grad_norm": 0.6180669571411701, |
| "learning_rate": 9.978246739845095e-06, |
| "loss": 0.8376, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.39823008849557523, |
| "grad_norm": 0.5653293908697061, |
| "learning_rate": 9.970399154700264e-06, |
| "loss": 0.8358, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.41150442477876104, |
| "grad_norm": 0.6253879615255548, |
| "learning_rate": 9.961349361816384e-06, |
| "loss": 0.8266, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 0.6311146270349652, |
| "learning_rate": 9.951099550098349e-06, |
| "loss": 0.8271, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.43805309734513276, |
| "grad_norm": 0.4522340368594046, |
| "learning_rate": 9.939652198703785e-06, |
| "loss": 0.8212, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.45132743362831856, |
| "grad_norm": 0.46944490857782306, |
| "learning_rate": 9.927010076443408e-06, |
| "loss": 0.8088, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4646017699115044, |
| "grad_norm": 0.6619178624223411, |
| "learning_rate": 9.91317624111132e-06, |
| "loss": 0.8248, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4778761061946903, |
| "grad_norm": 0.4793421273100569, |
| "learning_rate": 9.898154038745408e-06, |
| "loss": 0.8227, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.4911504424778761, |
| "grad_norm": 0.5048716018168768, |
| "learning_rate": 9.881947102818036e-06, |
| "loss": 0.8136, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.504424778761062, |
| "grad_norm": 0.5310568438628753, |
| "learning_rate": 9.864559353357189e-06, |
| "loss": 0.8219, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5176991150442478, |
| "grad_norm": 0.4039868493135018, |
| "learning_rate": 9.845994995998332e-06, |
| "loss": 0.8121, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 0.40613975716581396, |
| "learning_rate": 9.826258520967178e-06, |
| "loss": 0.8064, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5442477876106194, |
| "grad_norm": 0.4243798130279049, |
| "learning_rate": 9.805354701993624e-06, |
| "loss": 0.7947, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5575221238938053, |
| "grad_norm": 0.3937395008126602, |
| "learning_rate": 9.7832885951571e-06, |
| "loss": 0.7997, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5707964601769911, |
| "grad_norm": 0.35927664574355933, |
| "learning_rate": 9.76006553766365e-06, |
| "loss": 0.8128, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.584070796460177, |
| "grad_norm": 0.41494094102543927, |
| "learning_rate": 9.735691146555002e-06, |
| "loss": 0.7987, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5973451327433629, |
| "grad_norm": 0.39159423176658326, |
| "learning_rate": 9.710171317349946e-06, |
| "loss": 0.7982, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6106194690265486, |
| "grad_norm": 0.3546396421266, |
| "learning_rate": 9.683512222618376e-06, |
| "loss": 0.8022, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6238938053097345, |
| "grad_norm": 0.4031443445409564, |
| "learning_rate": 9.655720310488298e-06, |
| "loss": 0.7927, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.6371681415929203, |
| "grad_norm": 0.3739479458029583, |
| "learning_rate": 9.62680230308621e-06, |
| "loss": 0.7833, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6504424778761062, |
| "grad_norm": 0.36631551044391514, |
| "learning_rate": 9.596765194911182e-06, |
| "loss": 0.7759, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6637168141592921, |
| "grad_norm": 0.422089187866885, |
| "learning_rate": 9.565616251143094e-06, |
| "loss": 0.7904, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6769911504424779, |
| "grad_norm": 0.38001471063547193, |
| "learning_rate": 9.533363005885362e-06, |
| "loss": 0.7966, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.6902654867256637, |
| "grad_norm": 0.34994815094524784, |
| "learning_rate": 9.50001326034265e-06, |
| "loss": 0.7831, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7035398230088495, |
| "grad_norm": 0.4021483166751257, |
| "learning_rate": 9.465575080933959e-06, |
| "loss": 0.7822, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7168141592920354, |
| "grad_norm": 0.37951200554127446, |
| "learning_rate": 9.430056797341574e-06, |
| "loss": 0.766, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7300884955752213, |
| "grad_norm": 0.3885651067994481, |
| "learning_rate": 9.393467000496345e-06, |
| "loss": 0.8123, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7433628318584071, |
| "grad_norm": 0.33788656471657824, |
| "learning_rate": 9.355814540499753e-06, |
| "loss": 0.792, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7566371681415929, |
| "grad_norm": 0.35257733343859254, |
| "learning_rate": 9.317108524483319e-06, |
| "loss": 0.7808, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7699115044247787, |
| "grad_norm": 0.4493980702652022, |
| "learning_rate": 9.27735831440582e-06, |
| "loss": 0.775, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7831858407079646, |
| "grad_norm": 0.31327763449691237, |
| "learning_rate": 9.236573524788888e-06, |
| "loss": 0.8022, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 0.3539933006466989, |
| "learning_rate": 9.194764020391507e-06, |
| "loss": 0.7881, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8097345132743363, |
| "grad_norm": 0.33723712804528594, |
| "learning_rate": 9.151939913823988e-06, |
| "loss": 0.7865, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8230088495575221, |
| "grad_norm": 0.32722331239315644, |
| "learning_rate": 9.108111563102005e-06, |
| "loss": 0.7636, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.8362831858407079, |
| "grad_norm": 0.29895062665359806, |
| "learning_rate": 9.063289569141251e-06, |
| "loss": 0.8098, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.33490720312849226, |
| "learning_rate": 9.01748477319338e-06, |
| "loss": 0.7983, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8628318584070797, |
| "grad_norm": 0.3138722396258816, |
| "learning_rate": 8.970708254223768e-06, |
| "loss": 0.7809, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.8761061946902655, |
| "grad_norm": 0.3503834090081562, |
| "learning_rate": 8.92297132623183e-06, |
| "loss": 0.771, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.8893805309734514, |
| "grad_norm": 0.3200718203542079, |
| "learning_rate": 8.87428553551445e-06, |
| "loss": 0.7774, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.9026548672566371, |
| "grad_norm": 0.3076356619481548, |
| "learning_rate": 8.82466265787324e-06, |
| "loss": 0.7715, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.915929203539823, |
| "grad_norm": 0.38503832854879466, |
| "learning_rate": 8.774114695766286e-06, |
| "loss": 0.7601, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9292035398230089, |
| "grad_norm": 0.3446207105255993, |
| "learning_rate": 8.722653875405077e-06, |
| "loss": 0.7808, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9424778761061947, |
| "grad_norm": 0.40538454765257886, |
| "learning_rate": 8.670292643797302e-06, |
| "loss": 0.771, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.9557522123893806, |
| "grad_norm": 0.36973676733332295, |
| "learning_rate": 8.61704366573625e-06, |
| "loss": 0.7665, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9690265486725663, |
| "grad_norm": 0.3591470148947773, |
| "learning_rate": 8.562919820737537e-06, |
| "loss": 0.7489, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.9823008849557522, |
| "grad_norm": 0.40505318586997696, |
| "learning_rate": 8.507934199923884e-06, |
| "loss": 0.7959, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.995575221238938, |
| "grad_norm": 0.43745974627838796, |
| "learning_rate": 8.452100102858734e-06, |
| "loss": 0.7848, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.008849557522124, |
| "grad_norm": 0.34126147919255617, |
| "learning_rate": 8.395431034329431e-06, |
| "loss": 0.7542, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0221238938053097, |
| "grad_norm": 0.4316984815289514, |
| "learning_rate": 8.33794070108077e-06, |
| "loss": 0.7461, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.0353982300884956, |
| "grad_norm": 0.3615278805808863, |
| "learning_rate": 8.2796430084997e-06, |
| "loss": 0.7213, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.0486725663716814, |
| "grad_norm": 0.35254356847110446, |
| "learning_rate": 8.22055205725199e-06, |
| "loss": 0.7324, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.0619469026548674, |
| "grad_norm": 0.3963258413649147, |
| "learning_rate": 8.160682139871634e-06, |
| "loss": 0.7468, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.075221238938053, |
| "grad_norm": 0.35741415329062726, |
| "learning_rate": 8.100047737303877e-06, |
| "loss": 0.7501, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.0884955752212389, |
| "grad_norm": 0.3471322354436462, |
| "learning_rate": 8.038663515402659e-06, |
| "loss": 0.7252, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1017699115044248, |
| "grad_norm": 0.42484254049532516, |
| "learning_rate": 7.97654432138333e-06, |
| "loss": 0.7435, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.1150442477876106, |
| "grad_norm": 0.3394032976998188, |
| "learning_rate": 7.913705180231505e-06, |
| "loss": 0.7416, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.1283185840707965, |
| "grad_norm": 0.3419038953247492, |
| "learning_rate": 7.850161291068915e-06, |
| "loss": 0.7328, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.1415929203539823, |
| "grad_norm": 0.3897771775281859, |
| "learning_rate": 7.785928023477142e-06, |
| "loss": 0.7482, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.154867256637168, |
| "grad_norm": 0.3573777190290809, |
| "learning_rate": 7.721020913780137e-06, |
| "loss": 0.7496, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.168141592920354, |
| "grad_norm": 0.3259149835142648, |
| "learning_rate": 7.655455661286376e-06, |
| "loss": 0.7355, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1814159292035398, |
| "grad_norm": 0.3482317049523075, |
| "learning_rate": 7.589248124491627e-06, |
| "loss": 0.772, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.1946902654867257, |
| "grad_norm": 0.35536003531689786, |
| "learning_rate": 7.5224143172432e-06, |
| "loss": 0.7308, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.2079646017699115, |
| "grad_norm": 0.3512034399410828, |
| "learning_rate": 7.454970404866612e-06, |
| "loss": 0.7281, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.2212389380530975, |
| "grad_norm": 0.3443490381255487, |
| "learning_rate": 7.386932700255635e-06, |
| "loss": 0.754, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.2345132743362832, |
| "grad_norm": 0.3321105735622909, |
| "learning_rate": 7.318317659926637e-06, |
| "loss": 0.7459, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.247787610619469, |
| "grad_norm": 0.30533172570444717, |
| "learning_rate": 7.249141880038181e-06, |
| "loss": 0.7298, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.261061946902655, |
| "grad_norm": 0.3420069157117049, |
| "learning_rate": 7.179422092376856e-06, |
| "loss": 0.7205, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.2743362831858407, |
| "grad_norm": 0.2974664185360883, |
| "learning_rate": 7.109175160310312e-06, |
| "loss": 0.7237, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.2876106194690267, |
| "grad_norm": 0.3354261539877681, |
| "learning_rate": 7.038418074708444e-06, |
| "loss": 0.7165, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.3008849557522124, |
| "grad_norm": 0.30569893584802527, |
| "learning_rate": 6.967167949833763e-06, |
| "loss": 0.7205, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3141592920353982, |
| "grad_norm": 0.331228561765267, |
| "learning_rate": 6.895442019201898e-06, |
| "loss": 0.7401, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.3274336283185841, |
| "grad_norm": 0.31319049040987623, |
| "learning_rate": 6.8232576314132755e-06, |
| "loss": 0.7521, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3407079646017699, |
| "grad_norm": 0.3221250361865119, |
| "learning_rate": 6.750632245956954e-06, |
| "loss": 0.7267, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.3539823008849559, |
| "grad_norm": 0.29427759113072766, |
| "learning_rate": 6.677583428987625e-06, |
| "loss": 0.7391, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.3672566371681416, |
| "grad_norm": 0.31137760374687096, |
| "learning_rate": 6.6041288490768385e-06, |
| "loss": 0.7501, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.3805309734513274, |
| "grad_norm": 0.34160988744926724, |
| "learning_rate": 6.530286272939438e-06, |
| "loss": 0.7364, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.3938053097345133, |
| "grad_norm": 0.28973977407554946, |
| "learning_rate": 6.456073561136261e-06, |
| "loss": 0.7439, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.407079646017699, |
| "grad_norm": 0.3568614786870276, |
| "learning_rate": 6.381508663754152e-06, |
| "loss": 0.7294, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.420353982300885, |
| "grad_norm": 0.3029840332798554, |
| "learning_rate": 6.306609616064304e-06, |
| "loss": 0.7363, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.4336283185840708, |
| "grad_norm": 0.3861113557071875, |
| "learning_rate": 6.231394534160008e-06, |
| "loss": 0.7383, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.4469026548672566, |
| "grad_norm": 0.28409536861891677, |
| "learning_rate": 6.15588161057485e-06, |
| "loss": 0.7411, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.4601769911504425, |
| "grad_norm": 0.3530117611821851, |
| "learning_rate": 6.080089109882419e-06, |
| "loss": 0.7259, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4734513274336283, |
| "grad_norm": 0.33355378626358784, |
| "learning_rate": 6.004035364278593e-06, |
| "loss": 0.7374, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.4867256637168142, |
| "grad_norm": 0.30357771727319294, |
| "learning_rate": 5.927738769147467e-06, |
| "loss": 0.7325, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.3362838667459555, |
| "learning_rate": 5.851217778611994e-06, |
| "loss": 0.7345, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.5132743362831858, |
| "grad_norm": 0.32474587984909936, |
| "learning_rate": 5.774490901070424e-06, |
| "loss": 0.737, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.5265486725663717, |
| "grad_norm": 0.3014879597724264, |
| "learning_rate": 5.697576694719616e-06, |
| "loss": 0.7272, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.5398230088495575, |
| "grad_norm": 0.3040467458628167, |
| "learning_rate": 5.6204937630662974e-06, |
| "loss": 0.734, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.5530973451327434, |
| "grad_norm": 0.31350706920507393, |
| "learning_rate": 5.543260750427373e-06, |
| "loss": 0.7466, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.5663716814159292, |
| "grad_norm": 0.3229512522338443, |
| "learning_rate": 5.465896337420359e-06, |
| "loss": 0.7141, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.579646017699115, |
| "grad_norm": 0.28121228903323864, |
| "learning_rate": 5.388419236445033e-06, |
| "loss": 0.7285, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.592920353982301, |
| "grad_norm": 0.338272529289003, |
| "learning_rate": 5.310848187157404e-06, |
| "loss": 0.7185, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.606194690265487, |
| "grad_norm": 0.29341122571043904, |
| "learning_rate": 5.233201951937088e-06, |
| "loss": 0.7189, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.6194690265486726, |
| "grad_norm": 0.3376683438075859, |
| "learning_rate": 5.155499311349185e-06, |
| "loss": 0.7169, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.6327433628318584, |
| "grad_norm": 0.3007152432888567, |
| "learning_rate": 5.077759059601756e-06, |
| "loss": 0.7265, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.6460176991150441, |
| "grad_norm": 0.26177776789824847, |
| "learning_rate": 5e-06, |
| "loss": 0.7227, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.6592920353982301, |
| "grad_norm": 0.3554074144730279, |
| "learning_rate": 4.922240940398246e-06, |
| "loss": 0.7129, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.672566371681416, |
| "grad_norm": 0.29604443076936837, |
| "learning_rate": 4.844500688650817e-06, |
| "loss": 0.7199, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.6858407079646018, |
| "grad_norm": 0.3045326626118736, |
| "learning_rate": 4.766798048062913e-06, |
| "loss": 0.7251, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.6991150442477876, |
| "grad_norm": 0.2994924584025303, |
| "learning_rate": 4.689151812842598e-06, |
| "loss": 0.746, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.7123893805309733, |
| "grad_norm": 0.26667687455270667, |
| "learning_rate": 4.611580763554969e-06, |
| "loss": 0.7166, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.7256637168141593, |
| "grad_norm": 0.2954822813324762, |
| "learning_rate": 4.534103662579643e-06, |
| "loss": 0.7078, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7389380530973453, |
| "grad_norm": 0.26995487436514215, |
| "learning_rate": 4.456739249572628e-06, |
| "loss": 0.7192, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.752212389380531, |
| "grad_norm": 0.26773767932102494, |
| "learning_rate": 4.379506236933703e-06, |
| "loss": 0.7186, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.7654867256637168, |
| "grad_norm": 0.257637247498496, |
| "learning_rate": 4.3024233052803855e-06, |
| "loss": 0.7214, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.7787610619469025, |
| "grad_norm": 0.2875621691724459, |
| "learning_rate": 4.2255090989295765e-06, |
| "loss": 0.7155, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.7920353982300885, |
| "grad_norm": 0.29010788059474596, |
| "learning_rate": 4.148782221388007e-06, |
| "loss": 0.709, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.8053097345132745, |
| "grad_norm": 0.2588099571871009, |
| "learning_rate": 4.072261230852534e-06, |
| "loss": 0.7042, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.8185840707964602, |
| "grad_norm": 0.2664579826688701, |
| "learning_rate": 3.995964635721409e-06, |
| "loss": 0.7182, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.831858407079646, |
| "grad_norm": 0.27721920237328196, |
| "learning_rate": 3.919910890117584e-06, |
| "loss": 0.7082, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.8451327433628317, |
| "grad_norm": 0.27627030502639816, |
| "learning_rate": 3.844118389425154e-06, |
| "loss": 0.7329, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.8584070796460177, |
| "grad_norm": 0.2913739464055808, |
| "learning_rate": 3.768605465839994e-06, |
| "loss": 0.7173, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8716814159292037, |
| "grad_norm": 0.2850484954797806, |
| "learning_rate": 3.6933903839356983e-06, |
| "loss": 0.7221, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.8849557522123894, |
| "grad_norm": 0.25821886946143024, |
| "learning_rate": 3.6184913362458497e-06, |
| "loss": 0.7315, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.8982300884955752, |
| "grad_norm": 0.2569518339216896, |
| "learning_rate": 3.5439264388637407e-06, |
| "loss": 0.7154, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.911504424778761, |
| "grad_norm": 0.2648943046537424, |
| "learning_rate": 3.469713727060564e-06, |
| "loss": 0.7221, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.924778761061947, |
| "grad_norm": 0.32763318064063895, |
| "learning_rate": 3.3958711509231627e-06, |
| "loss": 0.7675, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.9380530973451329, |
| "grad_norm": 0.2653495628703862, |
| "learning_rate": 3.322416571012376e-06, |
| "loss": 0.7343, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.9513274336283186, |
| "grad_norm": 0.2702072967100288, |
| "learning_rate": 3.249367754043047e-06, |
| "loss": 0.7341, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.9646017699115044, |
| "grad_norm": 0.2803919016892307, |
| "learning_rate": 3.176742368586725e-06, |
| "loss": 0.7222, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.9778761061946901, |
| "grad_norm": 0.25493487382341745, |
| "learning_rate": 3.104557980798104e-06, |
| "loss": 0.7172, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.991150442477876, |
| "grad_norm": 0.2570628171188845, |
| "learning_rate": 3.032832050166239e-06, |
| "loss": 0.7224, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.004424778761062, |
| "grad_norm": 0.267565604708211, |
| "learning_rate": 2.961581925291557e-06, |
| "loss": 0.7017, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.017699115044248, |
| "grad_norm": 0.26815503106911354, |
| "learning_rate": 2.8908248396896893e-06, |
| "loss": 0.7, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.0309734513274336, |
| "grad_norm": 0.26249885130766193, |
| "learning_rate": 2.820577907623145e-06, |
| "loss": 0.7034, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.0442477876106193, |
| "grad_norm": 0.2549779907951391, |
| "learning_rate": 2.750858119961821e-06, |
| "loss": 0.7078, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.0575221238938055, |
| "grad_norm": 0.2525346837688464, |
| "learning_rate": 2.6816823400733628e-06, |
| "loss": 0.6995, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.0707964601769913, |
| "grad_norm": 0.2679663133077141, |
| "learning_rate": 2.613067299744364e-06, |
| "loss": 0.7055, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.084070796460177, |
| "grad_norm": 0.2586016464525557, |
| "learning_rate": 2.5450295951333896e-06, |
| "loss": 0.6959, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.0973451327433628, |
| "grad_norm": 0.30308624026957554, |
| "learning_rate": 2.4775856827568016e-06, |
| "loss": 0.6883, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.1106194690265485, |
| "grad_norm": 0.27265295782990395, |
| "learning_rate": 2.410751875508373e-06, |
| "loss": 0.6695, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.1238938053097347, |
| "grad_norm": 0.2700979984703012, |
| "learning_rate": 2.3445443387136247e-06, |
| "loss": 0.6957, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.1371681415929205, |
| "grad_norm": 0.30092750399401313, |
| "learning_rate": 2.278979086219863e-06, |
| "loss": 0.6828, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.150442477876106, |
| "grad_norm": 0.29172927343524324, |
| "learning_rate": 2.2140719765228587e-06, |
| "loss": 0.7153, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.163716814159292, |
| "grad_norm": 0.26968822161138883, |
| "learning_rate": 2.149838708931087e-06, |
| "loss": 0.703, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.1769911504424777, |
| "grad_norm": 0.2572024987696099, |
| "learning_rate": 2.086294819768496e-06, |
| "loss": 0.6879, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.190265486725664, |
| "grad_norm": 0.26928155454929387, |
| "learning_rate": 2.0234556786166715e-06, |
| "loss": 0.6988, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.2035398230088497, |
| "grad_norm": 0.27668720512764094, |
| "learning_rate": 1.9613364845973433e-06, |
| "loss": 0.7004, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.2168141592920354, |
| "grad_norm": 0.2618962828624147, |
| "learning_rate": 1.8999522626961254e-06, |
| "loss": 0.6835, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.230088495575221, |
| "grad_norm": 0.2559019455661947, |
| "learning_rate": 1.8393178601283684e-06, |
| "loss": 0.6954, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.243362831858407, |
| "grad_norm": 0.2586893846210169, |
| "learning_rate": 1.7794479427480115e-06, |
| "loss": 0.7103, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.256637168141593, |
| "grad_norm": 0.27637512548980897, |
| "learning_rate": 1.7203569915003005e-06, |
| "loss": 0.7001, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.269911504424779, |
| "grad_norm": 0.25343974808714076, |
| "learning_rate": 1.6620592989192318e-06, |
| "loss": 0.7123, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.2831858407079646, |
| "grad_norm": 0.24237088389234168, |
| "learning_rate": 1.6045689656705715e-06, |
| "loss": 0.6823, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.2964601769911503, |
| "grad_norm": 0.2711005680287586, |
| "learning_rate": 1.5478998971412669e-06, |
| "loss": 0.6891, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.309734513274336, |
| "grad_norm": 0.27026911575300705, |
| "learning_rate": 1.4920658000761172e-06, |
| "loss": 0.683, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.3230088495575223, |
| "grad_norm": 0.2675921091939595, |
| "learning_rate": 1.4370801792624656e-06, |
| "loss": 0.7062, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.336283185840708, |
| "grad_norm": 0.250664626986251, |
| "learning_rate": 1.3829563342637514e-06, |
| "loss": 0.6941, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.349557522123894, |
| "grad_norm": 0.2318767325573604, |
| "learning_rate": 1.3297073562026992e-06, |
| "loss": 0.696, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.3628318584070795, |
| "grad_norm": 0.24483698072857207, |
| "learning_rate": 1.2773461245949249e-06, |
| "loss": 0.6904, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.3761061946902653, |
| "grad_norm": 0.24702478932419128, |
| "learning_rate": 1.225885304233716e-06, |
| "loss": 0.6939, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.3893805309734515, |
| "grad_norm": 0.24194451250650229, |
| "learning_rate": 1.1753373421267622e-06, |
| "loss": 0.6797, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.4026548672566372, |
| "grad_norm": 0.2645341332285249, |
| "learning_rate": 1.125714464485551e-06, |
| "loss": 0.6966, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.415929203539823, |
| "grad_norm": 0.2534238881143905, |
| "learning_rate": 1.0770286737681701e-06, |
| "loss": 0.6885, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.4292035398230087, |
| "grad_norm": 0.25437197528124134, |
| "learning_rate": 1.0292917457762325e-06, |
| "loss": 0.7007, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.442477876106195, |
| "grad_norm": 0.24736717833800334, |
| "learning_rate": 9.825152268066213e-07, |
| "loss": 0.6807, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.4557522123893807, |
| "grad_norm": 0.22872199162900433, |
| "learning_rate": 9.367104308587493e-07, |
| "loss": 0.6987, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.4690265486725664, |
| "grad_norm": 0.24419690309952968, |
| "learning_rate": 8.918884368979969e-07, |
| "loss": 0.6749, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.482300884955752, |
| "grad_norm": 0.2512421685702932, |
| "learning_rate": 8.480600861760124e-07, |
| "loss": 0.7019, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.495575221238938, |
| "grad_norm": 0.2282816625905513, |
| "learning_rate": 8.052359796084952e-07, |
| "loss": 0.6816, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.508849557522124, |
| "grad_norm": 0.23523586566320434, |
| "learning_rate": 7.634264752111131e-07, |
| "loss": 0.68, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.52212389380531, |
| "grad_norm": 0.2363793146961199, |
| "learning_rate": 7.226416855941814e-07, |
| "loss": 0.7033, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.5353982300884956, |
| "grad_norm": 0.24298969213909608, |
| "learning_rate": 6.828914755166826e-07, |
| "loss": 0.6979, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.5486725663716814, |
| "grad_norm": 0.251085353078741, |
| "learning_rate": 6.441854595002478e-07, |
| "loss": 0.6809, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.561946902654867, |
| "grad_norm": 0.2448680893471181, |
| "learning_rate": 6.065329995036573e-07, |
| "loss": 0.7131, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.5752212389380533, |
| "grad_norm": 0.23725089777367897, |
| "learning_rate": 5.699432026584267e-07, |
| "loss": 0.6959, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.588495575221239, |
| "grad_norm": 0.23157485314336165, |
| "learning_rate": 5.344249190660427e-07, |
| "loss": 0.7001, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.601769911504425, |
| "grad_norm": 0.23391962016161583, |
| "learning_rate": 4.999867396573499e-07, |
| "loss": 0.6893, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.6150442477876106, |
| "grad_norm": 0.236263144354454, |
| "learning_rate": 4.666369941146376e-07, |
| "loss": 0.6937, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.6283185840707963, |
| "grad_norm": 0.23366739049943105, |
| "learning_rate": 4.343837488569058e-07, |
| "loss": 0.6943, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.6415929203539825, |
| "grad_norm": 0.2362436163336678, |
| "learning_rate": 4.03234805088818e-07, |
| "loss": 0.7204, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.6548672566371683, |
| "grad_norm": 0.23416015494584175, |
| "learning_rate": 3.7319769691379295e-07, |
| "loss": 0.6887, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.668141592920354, |
| "grad_norm": 0.23617669670661126, |
| "learning_rate": 3.4427968951170287e-07, |
| "loss": 0.7037, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.6814159292035398, |
| "grad_norm": 0.24035284150274663, |
| "learning_rate": 3.1648777738162496e-07, |
| "loss": 0.702, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.6946902654867255, |
| "grad_norm": 0.22345558795205156, |
| "learning_rate": 2.8982868265005457e-07, |
| "loss": 0.684, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.7079646017699117, |
| "grad_norm": 0.22664491638297707, |
| "learning_rate": 2.6430885344499944e-07, |
| "loss": 0.7095, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.7212389380530975, |
| "grad_norm": 0.23480608139334605, |
| "learning_rate": 2.399344623363503e-07, |
| "loss": 0.6892, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.734513274336283, |
| "grad_norm": 0.2286804361911203, |
| "learning_rate": 2.1671140484290144e-07, |
| "loss": 0.6904, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.747787610619469, |
| "grad_norm": 0.2368767359746762, |
| "learning_rate": 1.9464529800637731e-07, |
| "loss": 0.6816, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.7610619469026547, |
| "grad_norm": 0.22986090669280276, |
| "learning_rate": 1.737414790328218e-07, |
| "loss": 0.7106, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.774336283185841, |
| "grad_norm": 0.22856046175085815, |
| "learning_rate": 1.540050040016694e-07, |
| "loss": 0.6838, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.7876106194690267, |
| "grad_norm": 0.23620045893690103, |
| "learning_rate": 1.3544064664281266e-07, |
| "loss": 0.6835, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.8008849557522124, |
| "grad_norm": 0.22791291798965643, |
| "learning_rate": 1.1805289718196499e-07, |
| "loss": 0.7034, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.814159292035398, |
| "grad_norm": 0.2369430962945639, |
| "learning_rate": 1.0184596125459134e-07, |
| "loss": 0.6882, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.827433628318584, |
| "grad_norm": 0.22918753871731054, |
| "learning_rate": 8.682375888868167e-08, |
| "loss": 0.6952, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.84070796460177, |
| "grad_norm": 0.2370616166930934, |
| "learning_rate": 7.29899235565934e-08, |
| "loss": 0.6998, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.853982300884956, |
| "grad_norm": 0.22596145625094352, |
| "learning_rate": 6.034780129621664e-08, |
| "loss": 0.6918, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.8672566371681416, |
| "grad_norm": 0.23262221148740786, |
| "learning_rate": 4.8900449901653214e-08, |
| "loss": 0.6887, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.8805309734513274, |
| "grad_norm": 0.24233993030152726, |
| "learning_rate": 3.8650638183617695e-08, |
| "loss": 0.6984, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.893805309734513, |
| "grad_norm": 0.22180097024671025, |
| "learning_rate": 2.960084529973706e-08, |
| "loss": 0.6982, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.9070796460176993, |
| "grad_norm": 0.2299320357668445, |
| "learning_rate": 2.1753260154906973e-08, |
| "loss": 0.6907, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.920353982300885, |
| "grad_norm": 0.24963653707358402, |
| "learning_rate": 1.5109780871853663e-08, |
| "loss": 0.6966, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.933628318584071, |
| "grad_norm": 0.23346145464997622, |
| "learning_rate": 9.672014332028357e-09, |
| "loss": 0.6937, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.9469026548672566, |
| "grad_norm": 0.2293849764149983, |
| "learning_rate": 5.4412757869459765e-09, |
| "loss": 0.7024, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.9601769911504423, |
| "grad_norm": 0.23761192295842704, |
| "learning_rate": 2.4185885400596076e-09, |
| "loss": 0.686, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.9734513274336285, |
| "grad_norm": 0.234006094580547, |
| "learning_rate": 6.04683699252373e-10, |
| "loss": 0.7013, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.9867256637168142, |
| "grad_norm": 0.2434351223584646, |
| "learning_rate": 0.0, |
| "loss": 0.7003, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.9867256637168142, |
| "step": 225, |
| "total_flos": 7.721927725170033e+17, |
| "train_loss": 0.7663452034526401, |
| "train_runtime": 7882.5173, |
| "train_samples_per_second": 2.745, |
| "train_steps_per_second": 0.029 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 225, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.721927725170033e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|