| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 230, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004347826086956522, |
| "grad_norm": 210.10928344726562, |
| "learning_rate": 0.0, |
| "loss": 5.8188, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008695652173913044, |
| "grad_norm": 216.5006561279297, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 5.9259, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013043478260869565, |
| "grad_norm": 144.48963928222656, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 5.646, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017391304347826087, |
| "grad_norm": 45.486934661865234, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 5.3097, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021739130434782608, |
| "grad_norm": 83.79264831542969, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 5.3505, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02608695652173913, |
| "grad_norm": 33.744483947753906, |
| "learning_rate": 1.0869565217391305e-05, |
| "loss": 5.1314, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030434782608695653, |
| "grad_norm": 22.175418853759766, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 4.8346, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.034782608695652174, |
| "grad_norm": 18.40424156188965, |
| "learning_rate": 1.5217391304347828e-05, |
| "loss": 4.7562, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0391304347826087, |
| "grad_norm": 15.772565841674805, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 4.5057, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 11.410517692565918, |
| "learning_rate": 1.956521739130435e-05, |
| "loss": 4.3231, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04782608695652174, |
| "grad_norm": 14.64340877532959, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 4.3797, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05217391304347826, |
| "grad_norm": 7.4696946144104, |
| "learning_rate": 2.391304347826087e-05, |
| "loss": 3.9548, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05652173913043478, |
| "grad_norm": 3.1422557830810547, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 3.8226, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06086956521739131, |
| "grad_norm": 2.6594135761260986, |
| "learning_rate": 2.826086956521739e-05, |
| "loss": 3.8783, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06521739130434782, |
| "grad_norm": 2.0335605144500732, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 3.626, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 2.045989513397217, |
| "learning_rate": 3.260869565217392e-05, |
| "loss": 3.4734, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07391304347826087, |
| "grad_norm": 1.797641396522522, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 3.3667, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0782608695652174, |
| "grad_norm": 1.7289575338363647, |
| "learning_rate": 3.695652173913043e-05, |
| "loss": 3.2171, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08260869565217391, |
| "grad_norm": 1.6280560493469238, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 3.0697, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 1.5199931859970093, |
| "learning_rate": 4.130434782608696e-05, |
| "loss": 2.9537, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09130434782608696, |
| "grad_norm": 1.4183111190795898, |
| "learning_rate": 4.347826086956522e-05, |
| "loss": 2.8091, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09565217391304348, |
| "grad_norm": 1.453029990196228, |
| "learning_rate": 4.565217391304348e-05, |
| "loss": 2.6457, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.135553002357483, |
| "learning_rate": 4.782608695652174e-05, |
| "loss": 2.4701, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10434782608695652, |
| "grad_norm": 0.9866960644721985, |
| "learning_rate": 5e-05, |
| "loss": 2.3948, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10869565217391304, |
| "grad_norm": 0.8710840344429016, |
| "learning_rate": 5.217391304347826e-05, |
| "loss": 2.3239, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11304347826086956, |
| "grad_norm": 0.8170456886291504, |
| "learning_rate": 5.4347826086956524e-05, |
| "loss": 2.1285, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11739130434782609, |
| "grad_norm": 0.790302038192749, |
| "learning_rate": 5.652173913043478e-05, |
| "loss": 2.021, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12173913043478261, |
| "grad_norm": 0.7848089933395386, |
| "learning_rate": 5.869565217391305e-05, |
| "loss": 1.9254, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12608695652173912, |
| "grad_norm": 0.7707406878471375, |
| "learning_rate": 6.086956521739131e-05, |
| "loss": 1.8048, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 0.7862960696220398, |
| "learning_rate": 6.304347826086957e-05, |
| "loss": 1.6704, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13478260869565217, |
| "grad_norm": 0.8184984922409058, |
| "learning_rate": 6.521739130434783e-05, |
| "loss": 1.5525, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 0.751800537109375, |
| "learning_rate": 6.73913043478261e-05, |
| "loss": 1.4305, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14347826086956522, |
| "grad_norm": 0.6508727073669434, |
| "learning_rate": 6.956521739130436e-05, |
| "loss": 1.3082, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14782608695652175, |
| "grad_norm": 0.5927818417549133, |
| "learning_rate": 7.17391304347826e-05, |
| "loss": 1.2962, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15217391304347827, |
| "grad_norm": 0.48864519596099854, |
| "learning_rate": 7.391304347826086e-05, |
| "loss": 1.1943, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1565217391304348, |
| "grad_norm": 0.43812891840934753, |
| "learning_rate": 7.608695652173914e-05, |
| "loss": 1.1367, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1608695652173913, |
| "grad_norm": 0.3985790014266968, |
| "learning_rate": 7.82608695652174e-05, |
| "loss": 1.0961, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.16521739130434782, |
| "grad_norm": 0.3411348760128021, |
| "learning_rate": 8.043478260869566e-05, |
| "loss": 1.0314, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16956521739130434, |
| "grad_norm": 0.32298171520233154, |
| "learning_rate": 8.260869565217392e-05, |
| "loss": 0.9771, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.30958038568496704, |
| "learning_rate": 8.478260869565218e-05, |
| "loss": 0.9268, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1782608695652174, |
| "grad_norm": 0.2889741063117981, |
| "learning_rate": 8.695652173913044e-05, |
| "loss": 0.9256, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1826086956521739, |
| "grad_norm": 0.24591656029224396, |
| "learning_rate": 8.91304347826087e-05, |
| "loss": 0.883, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18695652173913044, |
| "grad_norm": 0.23997186124324799, |
| "learning_rate": 9.130434782608696e-05, |
| "loss": 0.8786, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19130434782608696, |
| "grad_norm": 0.2006598263978958, |
| "learning_rate": 9.347826086956522e-05, |
| "loss": 0.8396, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1956521739130435, |
| "grad_norm": 0.18479709327220917, |
| "learning_rate": 9.565217391304348e-05, |
| "loss": 0.8413, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.17641599476337433, |
| "learning_rate": 9.782608695652174e-05, |
| "loss": 0.8359, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.20434782608695654, |
| "grad_norm": 0.15423867106437683, |
| "learning_rate": 0.0001, |
| "loss": 0.8058, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 0.1461988240480423, |
| "learning_rate": 9.999856041607731e-05, |
| "loss": 0.8029, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.21304347826086956, |
| "grad_norm": 0.12839862704277039, |
| "learning_rate": 9.999424174720531e-05, |
| "loss": 0.7822, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.12158359587192535, |
| "learning_rate": 9.998704424206746e-05, |
| "loss": 0.7748, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2217391304347826, |
| "grad_norm": 0.1291743963956833, |
| "learning_rate": 9.997696831512027e-05, |
| "loss": 0.7661, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.22608695652173913, |
| "grad_norm": 0.12144283205270767, |
| "learning_rate": 9.99640145465694e-05, |
| "loss": 0.7869, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.23043478260869565, |
| "grad_norm": 0.1100422814488411, |
| "learning_rate": 9.994818368233639e-05, |
| "loss": 0.7777, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.23478260869565218, |
| "grad_norm": 0.0993693619966507, |
| "learning_rate": 9.992947663401548e-05, |
| "loss": 0.7473, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2391304347826087, |
| "grad_norm": 0.0941305086016655, |
| "learning_rate": 9.990789447882137e-05, |
| "loss": 0.7516, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24347826086956523, |
| "grad_norm": 0.09400874376296997, |
| "learning_rate": 9.988343845952697e-05, |
| "loss": 0.7517, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24782608695652175, |
| "grad_norm": 0.083980493247509, |
| "learning_rate": 9.985610998439197e-05, |
| "loss": 0.749, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.25217391304347825, |
| "grad_norm": 0.08494170755147934, |
| "learning_rate": 9.98259106270817e-05, |
| "loss": 0.7332, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2565217391304348, |
| "grad_norm": 0.08159317076206207, |
| "learning_rate": 9.979284212657657e-05, |
| "loss": 0.7343, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.08313615620136261, |
| "learning_rate": 9.97569063870718e-05, |
| "loss": 0.7211, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26521739130434785, |
| "grad_norm": 0.07852096855640411, |
| "learning_rate": 9.971810547786793e-05, |
| "loss": 0.731, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.26956521739130435, |
| "grad_norm": 0.0774468258023262, |
| "learning_rate": 9.967644163325156e-05, |
| "loss": 0.7198, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.27391304347826084, |
| "grad_norm": 0.07157547771930695, |
| "learning_rate": 9.963191725236672e-05, |
| "loss": 0.6946, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 0.07179877161979675, |
| "learning_rate": 9.958453489907673e-05, |
| "loss": 0.6983, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2826086956521739, |
| "grad_norm": 0.13720852136611938, |
| "learning_rate": 9.953429730181653e-05, |
| "loss": 0.7209, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.28695652173913044, |
| "grad_norm": 0.08586138486862183, |
| "learning_rate": 9.948120735343566e-05, |
| "loss": 0.7022, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.29130434782608694, |
| "grad_norm": 0.06595543771982193, |
| "learning_rate": 9.942526811103152e-05, |
| "loss": 0.6857, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2956521739130435, |
| "grad_norm": 0.06423239409923553, |
| "learning_rate": 9.936648279577349e-05, |
| "loss": 0.6924, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.07080892473459244, |
| "learning_rate": 9.930485479271735e-05, |
| "loss": 0.6963, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 0.06481339782476425, |
| "learning_rate": 9.924038765061042e-05, |
| "loss": 0.7055, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30869565217391304, |
| "grad_norm": 0.07143648713827133, |
| "learning_rate": 9.91730850816871e-05, |
| "loss": 0.6761, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3130434782608696, |
| "grad_norm": 0.06885742396116257, |
| "learning_rate": 9.91029509614553e-05, |
| "loss": 0.7111, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3173913043478261, |
| "grad_norm": 0.06406974792480469, |
| "learning_rate": 9.902998932847307e-05, |
| "loss": 0.6971, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3217391304347826, |
| "grad_norm": 0.06285955011844635, |
| "learning_rate": 9.895420438411616e-05, |
| "loss": 0.681, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.32608695652173914, |
| "grad_norm": 0.07179131358861923, |
| "learning_rate": 9.887560049233605e-05, |
| "loss": 0.7001, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.33043478260869563, |
| "grad_norm": 0.06652161478996277, |
| "learning_rate": 9.879418217940873e-05, |
| "loss": 0.6668, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3347826086956522, |
| "grad_norm": 0.06445639580488205, |
| "learning_rate": 9.870995413367397e-05, |
| "loss": 0.6981, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3391304347826087, |
| "grad_norm": 0.06834300607442856, |
| "learning_rate": 9.862292120526535e-05, |
| "loss": 0.6484, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.34347826086956523, |
| "grad_norm": 0.06481563299894333, |
| "learning_rate": 9.853308840583109e-05, |
| "loss": 0.6875, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.062026482075452805, |
| "learning_rate": 9.844046090824533e-05, |
| "loss": 0.6889, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3521739130434783, |
| "grad_norm": 0.07275456190109253, |
| "learning_rate": 9.834504404631031e-05, |
| "loss": 0.6879, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3565217391304348, |
| "grad_norm": 0.06591422110795975, |
| "learning_rate": 9.824684331444927e-05, |
| "loss": 0.6554, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.36086956521739133, |
| "grad_norm": 0.06396066397428513, |
| "learning_rate": 9.814586436738998e-05, |
| "loss": 0.6925, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3652173913043478, |
| "grad_norm": 0.08825157582759857, |
| "learning_rate": 9.804211301983918e-05, |
| "loss": 0.6629, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3695652173913043, |
| "grad_norm": 0.06731634587049484, |
| "learning_rate": 9.793559524614779e-05, |
| "loss": 0.6745, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3739130434782609, |
| "grad_norm": 0.06455274671316147, |
| "learning_rate": 9.782631717996675e-05, |
| "loss": 0.6851, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3782608695652174, |
| "grad_norm": 0.07710668444633484, |
| "learning_rate": 9.771428511389395e-05, |
| "loss": 0.6929, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3826086956521739, |
| "grad_norm": 0.0727052241563797, |
| "learning_rate": 9.759950549911186e-05, |
| "loss": 0.6798, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3869565217391304, |
| "grad_norm": 0.07156208157539368, |
| "learning_rate": 9.748198494501597e-05, |
| "loss": 0.6807, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 0.0921456515789032, |
| "learning_rate": 9.736173021883432e-05, |
| "loss": 0.6435, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39565217391304347, |
| "grad_norm": 0.09094609320163727, |
| "learning_rate": 9.723874824523771e-05, |
| "loss": 0.6874, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.09006571024656296, |
| "learning_rate": 9.711304610594104e-05, |
| "loss": 0.6778, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4043478260869565, |
| "grad_norm": 0.13732297718524933, |
| "learning_rate": 9.698463103929542e-05, |
| "loss": 0.6561, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.40869565217391307, |
| "grad_norm": 0.09598764777183533, |
| "learning_rate": 9.685351043987151e-05, |
| "loss": 0.6624, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.41304347826086957, |
| "grad_norm": 0.09070798009634018, |
| "learning_rate": 9.671969185803356e-05, |
| "loss": 0.6684, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 0.0911954715847969, |
| "learning_rate": 9.658318299950473e-05, |
| "loss": 0.6568, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4217391304347826, |
| "grad_norm": 0.08703230321407318, |
| "learning_rate": 9.644399172492336e-05, |
| "loss": 0.6442, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4260869565217391, |
| "grad_norm": 0.0760849341750145, |
| "learning_rate": 9.630212604939026e-05, |
| "loss": 0.6551, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.43043478260869567, |
| "grad_norm": 0.10621879249811172, |
| "learning_rate": 9.615759414200729e-05, |
| "loss": 0.6665, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.08248650282621384, |
| "learning_rate": 9.601040432540684e-05, |
| "loss": 0.6752, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4391304347826087, |
| "grad_norm": 0.10147503018379211, |
| "learning_rate": 9.586056507527266e-05, |
| "loss": 0.6602, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.4434782608695652, |
| "grad_norm": 0.1442282497882843, |
| "learning_rate": 9.570808501985175e-05, |
| "loss": 0.6704, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.44782608695652176, |
| "grad_norm": 0.11339450627565384, |
| "learning_rate": 9.555297293945759e-05, |
| "loss": 0.6631, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.45217391304347826, |
| "grad_norm": 0.15643437206745148, |
| "learning_rate": 9.539523776596445e-05, |
| "loss": 0.668, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.45652173913043476, |
| "grad_norm": 0.1856074035167694, |
| "learning_rate": 9.523488858229313e-05, |
| "loss": 0.6413, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4608695652173913, |
| "grad_norm": 0.12280824780464172, |
| "learning_rate": 9.507193462188791e-05, |
| "loss": 0.6658, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4652173913043478, |
| "grad_norm": 0.18749414384365082, |
| "learning_rate": 9.49063852681848e-05, |
| "loss": 0.6785, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.46956521739130436, |
| "grad_norm": 0.13954943418502808, |
| "learning_rate": 9.47382500540714e-05, |
| "loss": 0.652, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.47391304347826085, |
| "grad_norm": 0.15025292336940765, |
| "learning_rate": 9.45675386613377e-05, |
| "loss": 0.6622, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.11263363063335419, |
| "learning_rate": 9.439426092011875e-05, |
| "loss": 0.6573, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4826086956521739, |
| "grad_norm": 0.12779393792152405, |
| "learning_rate": 9.421842680832861e-05, |
| "loss": 0.6535, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 0.11488567292690277, |
| "learning_rate": 9.404004645108568e-05, |
| "loss": 0.6438, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.49130434782608695, |
| "grad_norm": 0.1706668585538864, |
| "learning_rate": 9.385913012012973e-05, |
| "loss": 0.6427, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4956521739130435, |
| "grad_norm": 0.13733729720115662, |
| "learning_rate": 9.367568823323039e-05, |
| "loss": 0.6555, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.11061578243970871, |
| "learning_rate": 9.348973135358734e-05, |
| "loss": 0.6672, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5043478260869565, |
| "grad_norm": 0.18926067650318146, |
| "learning_rate": 9.330127018922194e-05, |
| "loss": 0.6573, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.508695652173913, |
| "grad_norm": 0.15428727865219116, |
| "learning_rate": 9.311031559236067e-05, |
| "loss": 0.6708, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5130434782608696, |
| "grad_norm": 0.16264328360557556, |
| "learning_rate": 9.291687855881026e-05, |
| "loss": 0.6446, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5173913043478261, |
| "grad_norm": 0.11342114955186844, |
| "learning_rate": 9.272097022732443e-05, |
| "loss": 0.6571, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.15034589171409607, |
| "learning_rate": 9.252260187896256e-05, |
| "loss": 0.6408, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5260869565217391, |
| "grad_norm": 0.21747715771198273, |
| "learning_rate": 9.232178493644006e-05, |
| "loss": 0.6346, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5304347826086957, |
| "grad_norm": 0.27781569957733154, |
| "learning_rate": 9.211853096347058e-05, |
| "loss": 0.6541, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5347826086956522, |
| "grad_norm": 0.2587333023548126, |
| "learning_rate": 9.191285166410022e-05, |
| "loss": 0.6516, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5391304347826087, |
| "grad_norm": 0.16397182643413544, |
| "learning_rate": 9.170475888203347e-05, |
| "loss": 0.6716, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5434782608695652, |
| "grad_norm": 0.12862510979175568, |
| "learning_rate": 9.149426459995126e-05, |
| "loss": 0.6596, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5478260869565217, |
| "grad_norm": 0.15427789092063904, |
| "learning_rate": 9.128138093882098e-05, |
| "loss": 0.6588, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5521739130434783, |
| "grad_norm": 0.22064033150672913, |
| "learning_rate": 9.106612015719845e-05, |
| "loss": 0.6314, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 0.1941988468170166, |
| "learning_rate": 9.08484946505221e-05, |
| "loss": 0.648, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5608695652173913, |
| "grad_norm": 0.18163767457008362, |
| "learning_rate": 9.062851695039915e-05, |
| "loss": 0.6738, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.16294820606708527, |
| "learning_rate": 9.040619972388403e-05, |
| "loss": 0.6534, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5695652173913044, |
| "grad_norm": 0.23330819606781006, |
| "learning_rate": 9.018155577274892e-05, |
| "loss": 0.6478, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5739130434782609, |
| "grad_norm": 0.3880465030670166, |
| "learning_rate": 8.995459803274664e-05, |
| "loss": 0.6566, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5782608695652174, |
| "grad_norm": 0.6047540903091431, |
| "learning_rate": 8.972533957286573e-05, |
| "loss": 0.6321, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5826086956521739, |
| "grad_norm": 0.526760995388031, |
| "learning_rate": 8.949379359457793e-05, |
| "loss": 0.6501, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5869565217391305, |
| "grad_norm": 0.26121070981025696, |
| "learning_rate": 8.925997343107795e-05, |
| "loss": 0.6462, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.591304347826087, |
| "grad_norm": 0.3640858232975006, |
| "learning_rate": 8.902389254651569e-05, |
| "loss": 0.6378, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5956521739130435, |
| "grad_norm": 0.3413775861263275, |
| "learning_rate": 8.8785564535221e-05, |
| "loss": 0.6549, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.25738435983657837, |
| "learning_rate": 8.854500312092081e-05, |
| "loss": 0.6266, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6043478260869565, |
| "grad_norm": 0.3974941670894623, |
| "learning_rate": 8.83022221559489e-05, |
| "loss": 0.6207, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.3541712462902069, |
| "learning_rate": 8.805723562044824e-05, |
| "loss": 0.6623, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6130434782608696, |
| "grad_norm": 0.29466933012008667, |
| "learning_rate": 8.781005762156593e-05, |
| "loss": 0.6753, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6173913043478261, |
| "grad_norm": 0.429376482963562, |
| "learning_rate": 8.75607023926409e-05, |
| "loss": 0.6351, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6217391304347826, |
| "grad_norm": 0.28085529804229736, |
| "learning_rate": 8.730918429238428e-05, |
| "loss": 0.6584, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 0.34451988339424133, |
| "learning_rate": 8.705551780405263e-05, |
| "loss": 0.6619, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6304347826086957, |
| "grad_norm": 0.3307543098926544, |
| "learning_rate": 8.679971753461387e-05, |
| "loss": 0.6448, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6347826086956522, |
| "grad_norm": 0.2655896842479706, |
| "learning_rate": 8.654179821390621e-05, |
| "loss": 0.6442, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6391304347826087, |
| "grad_norm": 0.4360576868057251, |
| "learning_rate": 8.628177469378995e-05, |
| "loss": 0.6487, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6434782608695652, |
| "grad_norm": 0.35094520449638367, |
| "learning_rate": 8.601966194729227e-05, |
| "loss": 0.6359, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6478260869565218, |
| "grad_norm": 0.4109646677970886, |
| "learning_rate": 8.575547506774497e-05, |
| "loss": 0.6519, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.3401927053928375, |
| "learning_rate": 8.548922926791545e-05, |
| "loss": 0.6375, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6565217391304348, |
| "grad_norm": 0.22073158621788025, |
| "learning_rate": 8.522093987913062e-05, |
| "loss": 0.6462, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6608695652173913, |
| "grad_norm": 0.43310844898223877, |
| "learning_rate": 8.495062235039411e-05, |
| "loss": 0.6697, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6652173913043479, |
| "grad_norm": 0.42843684554100037, |
| "learning_rate": 8.467829224749665e-05, |
| "loss": 0.6169, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6695652173913044, |
| "grad_norm": 0.4057531952857971, |
| "learning_rate": 8.440396525211975e-05, |
| "loss": 0.6625, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6739130434782609, |
| "grad_norm": 0.23454974591732025, |
| "learning_rate": 8.412765716093272e-05, |
| "loss": 0.616, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6782608695652174, |
| "grad_norm": 0.3723919987678528, |
| "learning_rate": 8.384938388468296e-05, |
| "loss": 0.6576, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6826086956521739, |
| "grad_norm": 0.44731444120407104, |
| "learning_rate": 8.356916144727985e-05, |
| "loss": 0.6408, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6869565217391305, |
| "grad_norm": 0.4032682180404663, |
| "learning_rate": 8.328700598487203e-05, |
| "loss": 0.6541, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.691304347826087, |
| "grad_norm": 0.34927839040756226, |
| "learning_rate": 8.300293374491821e-05, |
| "loss": 0.641, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.37337374687194824, |
| "learning_rate": 8.271696108525157e-05, |
| "loss": 0.6409, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.33849138021469116, |
| "learning_rate": 8.24291044731378e-05, |
| "loss": 0.6571, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7043478260869566, |
| "grad_norm": 0.26402008533477783, |
| "learning_rate": 8.213938048432697e-05, |
| "loss": 0.6467, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7086956521739131, |
| "grad_norm": 0.31730157136917114, |
| "learning_rate": 8.184780580209892e-05, |
| "loss": 0.6519, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7130434782608696, |
| "grad_norm": 0.47295334935188293, |
| "learning_rate": 8.155439721630264e-05, |
| "loss": 0.629, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.717391304347826, |
| "grad_norm": 0.3847337067127228, |
| "learning_rate": 8.125917162238945e-05, |
| "loss": 0.6404, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7217391304347827, |
| "grad_norm": 0.30035194754600525, |
| "learning_rate": 8.09621460204401e-05, |
| "loss": 0.6697, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7260869565217392, |
| "grad_norm": 0.4391736686229706, |
| "learning_rate": 8.066333751418583e-05, |
| "loss": 0.6399, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7304347826086957, |
| "grad_norm": 0.6057283878326416, |
| "learning_rate": 8.036276331002348e-05, |
| "loss": 0.6341, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7347826086956522, |
| "grad_norm": 0.644005537033081, |
| "learning_rate": 8.006044071602477e-05, |
| "loss": 0.662, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 0.6034097671508789, |
| "learning_rate": 7.975638714093949e-05, |
| "loss": 0.638, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7434782608695653, |
| "grad_norm": 0.49918678402900696, |
| "learning_rate": 7.945062009319319e-05, |
| "loss": 0.6322, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7478260869565218, |
| "grad_norm": 0.4087945520877838, |
| "learning_rate": 7.914315717987892e-05, |
| "loss": 0.6419, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7521739130434782, |
| "grad_norm": 0.3623512089252472, |
| "learning_rate": 7.883401610574336e-05, |
| "loss": 0.6618, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7565217391304347, |
| "grad_norm": 0.7307239174842834, |
| "learning_rate": 7.85232146721673e-05, |
| "loss": 0.6572, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7608695652173914, |
| "grad_norm": 0.8763480186462402, |
| "learning_rate": 7.821077077614061e-05, |
| "loss": 0.6434, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 0.5741376280784607, |
| "learning_rate": 7.789670240923168e-05, |
| "loss": 0.6539, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7695652173913043, |
| "grad_norm": 0.4742548167705536, |
| "learning_rate": 7.758102765655137e-05, |
| "loss": 0.6435, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7739130434782608, |
| "grad_norm": 0.6679338216781616, |
| "learning_rate": 7.726376469571164e-05, |
| "loss": 0.6654, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7782608695652173, |
| "grad_norm": 0.7236630320549011, |
| "learning_rate": 7.694493179577879e-05, |
| "loss": 0.655, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.3954794704914093, |
| "learning_rate": 7.662454731622148e-05, |
| "loss": 0.6733, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7869565217391304, |
| "grad_norm": 0.41423317790031433, |
| "learning_rate": 7.630262970585356e-05, |
| "loss": 0.6466, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7913043478260869, |
| "grad_norm": 0.5248022675514221, |
| "learning_rate": 7.597919750177168e-05, |
| "loss": 0.6343, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7956521739130434, |
| "grad_norm": 0.4523037075996399, |
| "learning_rate": 7.56542693282879e-05, |
| "loss": 0.6494, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.3984985947608948, |
| "learning_rate": 7.532786389585716e-05, |
| "loss": 0.6511, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8043478260869565, |
| "grad_norm": 0.3854583501815796, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.6628, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.808695652173913, |
| "grad_norm": 0.43506574630737305, |
| "learning_rate": 7.467069652022016e-05, |
| "loss": 0.6603, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8130434782608695, |
| "grad_norm": 0.41759249567985535, |
| "learning_rate": 7.433997241891742e-05, |
| "loss": 0.6674, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8173913043478261, |
| "grad_norm": 0.2531141936779022, |
| "learning_rate": 7.400784674029578e-05, |
| "loss": 0.6395, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8217391304347826, |
| "grad_norm": 0.4464227259159088, |
| "learning_rate": 7.36743386092667e-05, |
| "loss": 0.6576, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.47379711270332336, |
| "learning_rate": 7.333946723034794e-05, |
| "loss": 0.6423, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8304347826086956, |
| "grad_norm": 0.3901284635066986, |
| "learning_rate": 7.300325188655761e-05, |
| "loss": 0.6594, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 0.3132023811340332, |
| "learning_rate": 7.266571193830387e-05, |
| "loss": 0.6611, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8391304347826087, |
| "grad_norm": 0.4356115460395813, |
| "learning_rate": 7.232686682227001e-05, |
| "loss": 0.6376, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8434782608695652, |
| "grad_norm": 0.5321224331855774, |
| "learning_rate": 7.198673605029528e-05, |
| "loss": 0.6643, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8478260869565217, |
| "grad_norm": 0.3640391230583191, |
| "learning_rate": 7.164533920825137e-05, |
| "loss": 0.6476, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8521739130434782, |
| "grad_norm": 0.2873951494693756, |
| "learning_rate": 7.130269595491443e-05, |
| "loss": 0.649, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8565217391304348, |
| "grad_norm": 0.45937976241111755, |
| "learning_rate": 7.095882602083322e-05, |
| "loss": 0.648, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8608695652173913, |
| "grad_norm": 0.5308820009231567, |
| "learning_rate": 7.061374920719288e-05, |
| "loss": 0.6458, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8652173913043478, |
| "grad_norm": 0.43750235438346863, |
| "learning_rate": 7.026748538467474e-05, |
| "loss": 0.6457, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.27052804827690125, |
| "learning_rate": 6.992005449231208e-05, |
| "loss": 0.6452, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8739130434782608, |
| "grad_norm": 0.37570297718048096, |
| "learning_rate": 6.957147653634198e-05, |
| "loss": 0.6566, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8782608695652174, |
| "grad_norm": 0.32025307416915894, |
| "learning_rate": 6.922177158905325e-05, |
| "loss": 0.6655, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8826086956521739, |
| "grad_norm": 0.2932673990726471, |
| "learning_rate": 6.887095978763072e-05, |
| "loss": 0.6749, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8869565217391304, |
| "grad_norm": 0.23213867843151093, |
| "learning_rate": 6.851906133299557e-05, |
| "loss": 0.6631, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8913043478260869, |
| "grad_norm": 0.3275505602359772, |
| "learning_rate": 6.816609648864208e-05, |
| "loss": 0.6758, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8956521739130435, |
| "grad_norm": 0.32032299041748047, |
| "learning_rate": 6.781208557947086e-05, |
| "loss": 0.662, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.26808756589889526, |
| "learning_rate": 6.745704899061843e-05, |
| "loss": 0.6464, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 0.25998106598854065, |
| "learning_rate": 6.710100716628344e-05, |
| "loss": 0.6556, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.908695652173913, |
| "grad_norm": 0.36953797936439514, |
| "learning_rate": 6.674398060854931e-05, |
| "loss": 0.6761, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.43774327635765076, |
| "learning_rate": 6.638598987620375e-05, |
| "loss": 0.6481, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9173913043478261, |
| "grad_norm": 0.24901102483272552, |
| "learning_rate": 6.602705558355486e-05, |
| "loss": 0.675, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9217391304347826, |
| "grad_norm": 0.3668375313282013, |
| "learning_rate": 6.566719839924412e-05, |
| "loss": 0.6619, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9260869565217391, |
| "grad_norm": 0.5943741202354431, |
| "learning_rate": 6.530643904505621e-05, |
| "loss": 0.6561, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9304347826086956, |
| "grad_norm": 0.6538096070289612, |
| "learning_rate": 6.49447982947258e-05, |
| "loss": 0.6297, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9347826086956522, |
| "grad_norm": 0.5622021555900574, |
| "learning_rate": 6.458229697274125e-05, |
| "loss": 0.6602, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9391304347826087, |
| "grad_norm": 0.45731329917907715, |
| "learning_rate": 6.42189559531456e-05, |
| "loss": 0.6686, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9434782608695652, |
| "grad_norm": 0.26856303215026855, |
| "learning_rate": 6.385479615833445e-05, |
| "loss": 0.6358, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9478260869565217, |
| "grad_norm": 0.24898113310337067, |
| "learning_rate": 6.348983855785121e-05, |
| "loss": 0.6579, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9521739130434783, |
| "grad_norm": 0.3039465844631195, |
| "learning_rate": 6.312410416717968e-05, |
| "loss": 0.6493, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.36120837926864624, |
| "learning_rate": 6.27576140465338e-05, |
| "loss": 0.6524, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9608695652173913, |
| "grad_norm": 0.35759392380714417, |
| "learning_rate": 6.2390389299645e-05, |
| "loss": 0.6247, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9652173913043478, |
| "grad_norm": 0.29583072662353516, |
| "learning_rate": 6.202245107254693e-05, |
| "loss": 0.642, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9695652173913043, |
| "grad_norm": 0.24323242902755737, |
| "learning_rate": 6.165382055235783e-05, |
| "loss": 0.6683, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 0.4522090256214142, |
| "learning_rate": 6.128451896606053e-05, |
| "loss": 0.639, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9782608695652174, |
| "grad_norm": 0.6692441701889038, |
| "learning_rate": 6.091456757928008e-05, |
| "loss": 0.6628, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9826086956521739, |
| "grad_norm": 0.7985122203826904, |
| "learning_rate": 6.054398769505924e-05, |
| "loss": 0.6585, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9869565217391304, |
| "grad_norm": 0.6464029550552368, |
| "learning_rate": 6.01728006526317e-05, |
| "loss": 0.6563, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.991304347826087, |
| "grad_norm": 0.36494386196136475, |
| "learning_rate": 5.980102782619342e-05, |
| "loss": 0.648, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9956521739130435, |
| "grad_norm": 0.40735068917274475, |
| "learning_rate": 5.942869062367179e-05, |
| "loss": 0.6502, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.6993163228034973, |
| "learning_rate": 5.905581048549279e-05, |
| "loss": 0.6682, |
| "step": 230 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.970864260913562e+18, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|