| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9966703662597114, |
| "eval_steps": 500, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022197558268590455, |
| "grad_norm": 8.399400554535633, |
| "learning_rate": 1.111111111111111e-08, |
| "loss": 0.6208, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004439511653718091, |
| "grad_norm": 9.23527463594977, |
| "learning_rate": 2.222222222222222e-08, |
| "loss": 0.5876, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006659267480577136, |
| "grad_norm": 7.892116076103547, |
| "learning_rate": 3.3333333333333334e-08, |
| "loss": 0.5594, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008879023307436182, |
| "grad_norm": 8.263962816983785, |
| "learning_rate": 4.444444444444444e-08, |
| "loss": 0.5842, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011098779134295227, |
| "grad_norm": 8.12452966304493, |
| "learning_rate": 5.555555555555555e-08, |
| "loss": 0.5967, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013318534961154272, |
| "grad_norm": 8.349469406229243, |
| "learning_rate": 6.666666666666667e-08, |
| "loss": 0.5914, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01553829078801332, |
| "grad_norm": 8.367872537902208, |
| "learning_rate": 7.777777777777778e-08, |
| "loss": 0.5972, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017758046614872364, |
| "grad_norm": 8.248807052651621, |
| "learning_rate": 8.888888888888888e-08, |
| "loss": 0.5833, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01997780244173141, |
| "grad_norm": 8.458007397509917, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 0.6075, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.022197558268590455, |
| "grad_norm": 8.368446611829034, |
| "learning_rate": 1.111111111111111e-07, |
| "loss": 0.606, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0244173140954495, |
| "grad_norm": 8.308128238411795, |
| "learning_rate": 1.2222222222222222e-07, |
| "loss": 0.6244, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.026637069922308545, |
| "grad_norm": 8.537780094365171, |
| "learning_rate": 1.3333333333333334e-07, |
| "loss": 0.6135, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02885682574916759, |
| "grad_norm": 8.158296304061713, |
| "learning_rate": 1.4444444444444442e-07, |
| "loss": 0.6152, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03107658157602664, |
| "grad_norm": 8.532386684065743, |
| "learning_rate": 1.5555555555555556e-07, |
| "loss": 0.5913, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.033296337402885685, |
| "grad_norm": 8.436502712386789, |
| "learning_rate": 1.6666666666666665e-07, |
| "loss": 0.5808, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03551609322974473, |
| "grad_norm": 8.030910487309407, |
| "learning_rate": 1.7777777777777776e-07, |
| "loss": 0.592, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03773584905660377, |
| "grad_norm": 7.479947998376637, |
| "learning_rate": 1.8888888888888888e-07, |
| "loss": 0.6097, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03995560488346282, |
| "grad_norm": 8.75074608782379, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 0.5939, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.042175360710321866, |
| "grad_norm": 7.880536846496551, |
| "learning_rate": 2.1111111111111108e-07, |
| "loss": 0.5618, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04439511653718091, |
| "grad_norm": 7.603741618669525, |
| "learning_rate": 2.222222222222222e-07, |
| "loss": 0.592, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04661487236403995, |
| "grad_norm": 7.427486297252882, |
| "learning_rate": 2.3333333333333333e-07, |
| "loss": 0.5807, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.048834628190899, |
| "grad_norm": 7.28835072640129, |
| "learning_rate": 2.4444444444444445e-07, |
| "loss": 0.6103, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.051054384017758046, |
| "grad_norm": 7.234395660436924, |
| "learning_rate": 2.5555555555555553e-07, |
| "loss": 0.603, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05327413984461709, |
| "grad_norm": 7.508445944227732, |
| "learning_rate": 2.6666666666666667e-07, |
| "loss": 0.6008, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05549389567147614, |
| "grad_norm": 7.099644123190661, |
| "learning_rate": 2.7777777777777776e-07, |
| "loss": 0.5769, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05771365149833518, |
| "grad_norm": 6.932344388923125, |
| "learning_rate": 2.8888888888888885e-07, |
| "loss": 0.627, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05993340732519423, |
| "grad_norm": 6.645121649801645, |
| "learning_rate": 3e-07, |
| "loss": 0.632, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06215316315205328, |
| "grad_norm": 7.192778252181474, |
| "learning_rate": 3.111111111111111e-07, |
| "loss": 0.5832, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06437291897891231, |
| "grad_norm": 6.6876109677067355, |
| "learning_rate": 3.222222222222222e-07, |
| "loss": 0.5623, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06659267480577137, |
| "grad_norm": 6.2825192558471015, |
| "learning_rate": 3.333333333333333e-07, |
| "loss": 0.5788, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06881243063263041, |
| "grad_norm": 7.481092834682601, |
| "learning_rate": 3.4444444444444444e-07, |
| "loss": 0.571, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07103218645948946, |
| "grad_norm": 6.473626658016538, |
| "learning_rate": 3.5555555555555553e-07, |
| "loss": 0.5928, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0732519422863485, |
| "grad_norm": 6.2535869677634635, |
| "learning_rate": 3.666666666666666e-07, |
| "loss": 0.5691, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07547169811320754, |
| "grad_norm": 6.185332269346511, |
| "learning_rate": 3.7777777777777775e-07, |
| "loss": 0.5601, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07769145394006659, |
| "grad_norm": 6.288484433565551, |
| "learning_rate": 3.8888888888888884e-07, |
| "loss": 0.564, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07991120976692564, |
| "grad_norm": 5.985590937835178, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 0.5568, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08213096559378469, |
| "grad_norm": 6.103172138710466, |
| "learning_rate": 4.1111111111111107e-07, |
| "loss": 0.5643, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08435072142064373, |
| "grad_norm": 6.55206929232527, |
| "learning_rate": 4.2222222222222216e-07, |
| "loss": 0.5953, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08657047724750278, |
| "grad_norm": 5.715926102273091, |
| "learning_rate": 4.3333333333333335e-07, |
| "loss": 0.5619, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08879023307436182, |
| "grad_norm": 5.450445663024957, |
| "learning_rate": 4.444444444444444e-07, |
| "loss": 0.5631, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09100998890122086, |
| "grad_norm": 5.542981405394689, |
| "learning_rate": 4.555555555555556e-07, |
| "loss": 0.5577, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0932297447280799, |
| "grad_norm": 5.725678282306514, |
| "learning_rate": 4.6666666666666666e-07, |
| "loss": 0.5848, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09544950055493896, |
| "grad_norm": 5.382657479056415, |
| "learning_rate": 4.777777777777777e-07, |
| "loss": 0.5737, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.097669256381798, |
| "grad_norm": 5.0680029115322744, |
| "learning_rate": 4.888888888888889e-07, |
| "loss": 0.5716, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09988901220865705, |
| "grad_norm": 5.37896827418845, |
| "learning_rate": 5e-07, |
| "loss": 0.5814, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.10210876803551609, |
| "grad_norm": 5.140988943990483, |
| "learning_rate": 5.111111111111111e-07, |
| "loss": 0.5563, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10432852386237514, |
| "grad_norm": 5.167606311709604, |
| "learning_rate": 5.222222222222222e-07, |
| "loss": 0.5789, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10654827968923418, |
| "grad_norm": 4.910299969476651, |
| "learning_rate": 5.333333333333333e-07, |
| "loss": 0.5374, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10876803551609324, |
| "grad_norm": 4.518136573407695, |
| "learning_rate": 5.444444444444444e-07, |
| "loss": 0.5456, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11098779134295228, |
| "grad_norm": 4.654972052593856, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 0.5756, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11320754716981132, |
| "grad_norm": 4.4424350535018755, |
| "learning_rate": 5.666666666666667e-07, |
| "loss": 0.5694, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11542730299667037, |
| "grad_norm": 4.446642718416529, |
| "learning_rate": 5.777777777777777e-07, |
| "loss": 0.5397, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 4.139602412755836, |
| "learning_rate": 5.888888888888889e-07, |
| "loss": 0.5493, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11986681465038845, |
| "grad_norm": 3.8012053176011866, |
| "learning_rate": 6e-07, |
| "loss": 0.5818, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1220865704772475, |
| "grad_norm": 3.674488655760073, |
| "learning_rate": 6.111111111111111e-07, |
| "loss": 0.5515, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12430632630410655, |
| "grad_norm": 3.773334442965557, |
| "learning_rate": 6.222222222222223e-07, |
| "loss": 0.5285, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12652608213096558, |
| "grad_norm": 3.3471615813958353, |
| "learning_rate": 6.333333333333333e-07, |
| "loss": 0.5283, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12874583795782463, |
| "grad_norm": 3.7680138998687176, |
| "learning_rate": 6.444444444444444e-07, |
| "loss": 0.5468, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1309655937846837, |
| "grad_norm": 3.1334055249642336, |
| "learning_rate": 6.555555555555555e-07, |
| "loss": 0.5356, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13318534961154274, |
| "grad_norm": 2.942044115739925, |
| "learning_rate": 6.666666666666666e-07, |
| "loss": 0.5272, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13540510543840178, |
| "grad_norm": 3.182126804198039, |
| "learning_rate": 6.777777777777778e-07, |
| "loss": 0.5288, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13762486126526083, |
| "grad_norm": 2.6901086101723535, |
| "learning_rate": 6.888888888888889e-07, |
| "loss": 0.5167, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.13984461709211987, |
| "grad_norm": 2.586223950116098, |
| "learning_rate": 7e-07, |
| "loss": 0.5499, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14206437291897892, |
| "grad_norm": 2.5700536482288086, |
| "learning_rate": 7.111111111111111e-07, |
| "loss": 0.5063, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14428412874583796, |
| "grad_norm": 2.4661921220618512, |
| "learning_rate": 7.222222222222221e-07, |
| "loss": 0.5564, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.146503884572697, |
| "grad_norm": 2.330950891833989, |
| "learning_rate": 7.333333333333332e-07, |
| "loss": 0.5182, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.14872364039955605, |
| "grad_norm": 2.5180905748914895, |
| "learning_rate": 7.444444444444444e-07, |
| "loss": 0.532, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1509433962264151, |
| "grad_norm": 2.6095616302569384, |
| "learning_rate": 7.555555555555555e-07, |
| "loss": 0.5358, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15316315205327413, |
| "grad_norm": 2.6685044499493844, |
| "learning_rate": 7.666666666666666e-07, |
| "loss": 0.5094, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15538290788013318, |
| "grad_norm": 2.961669120901193, |
| "learning_rate": 7.777777777777777e-07, |
| "loss": 0.5259, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15760266370699222, |
| "grad_norm": 2.4860738166604515, |
| "learning_rate": 7.888888888888888e-07, |
| "loss": 0.4868, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1598224195338513, |
| "grad_norm": 2.2865818119799464, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.5076, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16204217536071033, |
| "grad_norm": 1.8849597000054163, |
| "learning_rate": 8.11111111111111e-07, |
| "loss": 0.5298, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16426193118756938, |
| "grad_norm": 2.10197468284917, |
| "learning_rate": 8.222222222222221e-07, |
| "loss": 0.5174, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16648168701442842, |
| "grad_norm": 2.134411200325002, |
| "learning_rate": 8.333333333333332e-07, |
| "loss": 0.5101, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16870144284128746, |
| "grad_norm": 2.0548396664198694, |
| "learning_rate": 8.444444444444443e-07, |
| "loss": 0.512, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1709211986681465, |
| "grad_norm": 2.190003103991552, |
| "learning_rate": 8.555555555555556e-07, |
| "loss": 0.5322, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17314095449500555, |
| "grad_norm": 2.3012756353361232, |
| "learning_rate": 8.666666666666667e-07, |
| "loss": 0.5209, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1753607103218646, |
| "grad_norm": 2.767046453765713, |
| "learning_rate": 8.777777777777777e-07, |
| "loss": 0.5438, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17758046614872364, |
| "grad_norm": 1.9909116361275931, |
| "learning_rate": 8.888888888888888e-07, |
| "loss": 0.4869, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17980022197558268, |
| "grad_norm": 2.0137646431651195, |
| "learning_rate": 8.999999999999999e-07, |
| "loss": 0.4994, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18201997780244172, |
| "grad_norm": 1.6363334375526886, |
| "learning_rate": 9.111111111111112e-07, |
| "loss": 0.4962, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18423973362930077, |
| "grad_norm": 1.6903094131455099, |
| "learning_rate": 9.222222222222222e-07, |
| "loss": 0.5035, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1864594894561598, |
| "grad_norm": 1.6665441809105392, |
| "learning_rate": 9.333333333333333e-07, |
| "loss": 0.5172, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 1.9115942542365676, |
| "learning_rate": 9.444444444444444e-07, |
| "loss": 0.5245, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19089900110987792, |
| "grad_norm": 1.8171555565938122, |
| "learning_rate": 9.555555555555554e-07, |
| "loss": 0.5199, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19311875693673697, |
| "grad_norm": 1.5495672352550462, |
| "learning_rate": 9.666666666666666e-07, |
| "loss": 0.5064, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.195338512763596, |
| "grad_norm": 1.6942828602627467, |
| "learning_rate": 9.777777777777778e-07, |
| "loss": 0.4985, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19755826859045506, |
| "grad_norm": 1.4110662925081685, |
| "learning_rate": 9.888888888888888e-07, |
| "loss": 0.5058, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1997780244173141, |
| "grad_norm": 1.3896068677559, |
| "learning_rate": 1e-06, |
| "loss": 0.5138, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20199778024417314, |
| "grad_norm": 1.8634594942686744, |
| "learning_rate": 9.9987640588308e-07, |
| "loss": 0.5005, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.20421753607103219, |
| "grad_norm": 1.601950407868211, |
| "learning_rate": 9.997525365008662e-07, |
| "loss": 0.5351, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.20643729189789123, |
| "grad_norm": 1.7619605783577625, |
| "learning_rate": 9.996283909327387e-07, |
| "loss": 0.4846, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.20865704772475027, |
| "grad_norm": 1.738504362351875, |
| "learning_rate": 9.995039682539681e-07, |
| "loss": 0.5152, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21087680355160932, |
| "grad_norm": 1.9023064418592266, |
| "learning_rate": 9.99379267535692e-07, |
| "loss": 0.4943, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21309655937846836, |
| "grad_norm": 1.5940614863386777, |
| "learning_rate": 9.992542878448919e-07, |
| "loss": 0.4928, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2153163152053274, |
| "grad_norm": 1.388955486975375, |
| "learning_rate": 9.991290282443698e-07, |
| "loss": 0.5046, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.21753607103218647, |
| "grad_norm": 1.341456992180103, |
| "learning_rate": 9.990034877927254e-07, |
| "loss": 0.53, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21975582685904552, |
| "grad_norm": 1.522858356142826, |
| "learning_rate": 9.988776655443322e-07, |
| "loss": 0.4929, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.22197558268590456, |
| "grad_norm": 1.4541516733946238, |
| "learning_rate": 9.987515605493133e-07, |
| "loss": 0.5394, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2241953385127636, |
| "grad_norm": 1.376705603294956, |
| "learning_rate": 9.986251718535183e-07, |
| "loss": 0.5118, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.22641509433962265, |
| "grad_norm": 1.3823606190937259, |
| "learning_rate": 9.984984984984985e-07, |
| "loss": 0.5047, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2286348501664817, |
| "grad_norm": 1.363673309466558, |
| "learning_rate": 9.98371539521483e-07, |
| "loss": 0.4903, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23085460599334073, |
| "grad_norm": 1.416401669190688, |
| "learning_rate": 9.982442939553548e-07, |
| "loss": 0.514, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23307436182019978, |
| "grad_norm": 1.3605729764609202, |
| "learning_rate": 9.981167608286253e-07, |
| "loss": 0.4843, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 1.2651391854034004, |
| "learning_rate": 9.979889391654097e-07, |
| "loss": 0.4934, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.23751387347391786, |
| "grad_norm": 1.3876541204990487, |
| "learning_rate": 9.978608279854033e-07, |
| "loss": 0.5184, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2397336293007769, |
| "grad_norm": 1.3019101475882795, |
| "learning_rate": 9.977324263038547e-07, |
| "loss": 0.5168, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.24195338512763595, |
| "grad_norm": 1.234950346880673, |
| "learning_rate": 9.976037331315424e-07, |
| "loss": 0.512, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.244173140954495, |
| "grad_norm": 1.2715031826930232, |
| "learning_rate": 9.974747474747475e-07, |
| "loss": 0.5051, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24639289678135406, |
| "grad_norm": 1.4815309620586665, |
| "learning_rate": 9.973454683352293e-07, |
| "loss": 0.5585, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2486126526082131, |
| "grad_norm": 1.4584424491170997, |
| "learning_rate": 9.972158947101999e-07, |
| "loss": 0.5183, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.25083240843507215, |
| "grad_norm": 1.4855761461506833, |
| "learning_rate": 9.970860255922969e-07, |
| "loss": 0.4973, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.25305216426193117, |
| "grad_norm": 1.3279548120498825, |
| "learning_rate": 9.969558599695586e-07, |
| "loss": 0.4962, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.25527192008879024, |
| "grad_norm": 1.3134752725185352, |
| "learning_rate": 9.968253968253967e-07, |
| "loss": 0.4947, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.25749167591564925, |
| "grad_norm": 2.592615996953234, |
| "learning_rate": 9.96694635138571e-07, |
| "loss": 0.4884, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2597114317425083, |
| "grad_norm": 1.4087868752655734, |
| "learning_rate": 9.965635738831615e-07, |
| "loss": 0.4791, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2619311875693674, |
| "grad_norm": 1.3013116867327663, |
| "learning_rate": 9.964322120285423e-07, |
| "loss": 0.5039, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2641509433962264, |
| "grad_norm": 1.2841064842046501, |
| "learning_rate": 9.963005485393543e-07, |
| "loss": 0.4973, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2663706992230855, |
| "grad_norm": 1.2439124542464444, |
| "learning_rate": 9.96168582375479e-07, |
| "loss": 0.4913, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2685904550499445, |
| "grad_norm": 1.280079305801681, |
| "learning_rate": 9.960363124920087e-07, |
| "loss": 0.5116, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27081021087680357, |
| "grad_norm": 1.2624652099591023, |
| "learning_rate": 9.959037378392216e-07, |
| "loss": 0.5099, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2730299667036626, |
| "grad_norm": 1.345328014236658, |
| "learning_rate": 9.957708573625527e-07, |
| "loss": 0.473, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.27524972253052166, |
| "grad_norm": 1.3398634155494387, |
| "learning_rate": 9.95637670002566e-07, |
| "loss": 0.5058, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.27746947835738067, |
| "grad_norm": 1.4620545182624154, |
| "learning_rate": 9.95504174694926e-07, |
| "loss": 0.5056, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.27968923418423974, |
| "grad_norm": 1.3816636005003708, |
| "learning_rate": 9.953703703703704e-07, |
| "loss": 0.4986, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.28190899001109876, |
| "grad_norm": 1.4382772137460398, |
| "learning_rate": 9.952362559546802e-07, |
| "loss": 0.5216, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.28412874583795783, |
| "grad_norm": 1.28382749112946, |
| "learning_rate": 9.951018303686517e-07, |
| "loss": 0.5172, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.28634850166481685, |
| "grad_norm": 1.2599739929931204, |
| "learning_rate": 9.949670925280681e-07, |
| "loss": 0.5084, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2885682574916759, |
| "grad_norm": 1.4005233619233985, |
| "learning_rate": 9.94832041343669e-07, |
| "loss": 0.4911, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.290788013318535, |
| "grad_norm": 1.2931159398960264, |
| "learning_rate": 9.946966757211227e-07, |
| "loss": 0.5065, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.293007769145394, |
| "grad_norm": 1.272184000110546, |
| "learning_rate": 9.945609945609945e-07, |
| "loss": 0.4924, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2952275249722531, |
| "grad_norm": 1.2437113070167456, |
| "learning_rate": 9.94424996758719e-07, |
| "loss": 0.4599, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2974472807991121, |
| "grad_norm": 1.4773247625940102, |
| "learning_rate": 9.942886812045691e-07, |
| "loss": 0.4857, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.29966703662597116, |
| "grad_norm": 1.2008656849842716, |
| "learning_rate": 9.941520467836258e-07, |
| "loss": 0.4963, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 1.3929619707536258, |
| "learning_rate": 9.94015092375748e-07, |
| "loss": 0.4684, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.30410654827968925, |
| "grad_norm": 1.2767970377058495, |
| "learning_rate": 9.938778168555424e-07, |
| "loss": 0.4851, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.30632630410654826, |
| "grad_norm": 1.2651068865999724, |
| "learning_rate": 9.937402190923317e-07, |
| "loss": 0.4991, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.30854605993340734, |
| "grad_norm": 1.1342128631392274, |
| "learning_rate": 9.93602297950124e-07, |
| "loss": 0.5015, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.31076581576026635, |
| "grad_norm": 1.3536175243314477, |
| "learning_rate": 9.934640522875816e-07, |
| "loss": 0.4759, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3129855715871254, |
| "grad_norm": 1.2186849663505919, |
| "learning_rate": 9.933254809579898e-07, |
| "loss": 0.4948, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.31520532741398444, |
| "grad_norm": 1.2320342626044847, |
| "learning_rate": 9.931865828092243e-07, |
| "loss": 0.4887, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3174250832408435, |
| "grad_norm": 1.417534749096284, |
| "learning_rate": 9.930473566837202e-07, |
| "loss": 0.4714, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3196448390677026, |
| "grad_norm": 1.2396165037232065, |
| "learning_rate": 9.929078014184397e-07, |
| "loss": 0.4834, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3218645948945616, |
| "grad_norm": 1.494912541496174, |
| "learning_rate": 9.92767915844839e-07, |
| "loss": 0.4882, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.32408435072142067, |
| "grad_norm": 4.4612294268387975, |
| "learning_rate": 9.926276987888362e-07, |
| "loss": 0.4855, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3263041065482797, |
| "grad_norm": 1.2043063588616731, |
| "learning_rate": 9.924871490707788e-07, |
| "loss": 0.4936, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.32852386237513875, |
| "grad_norm": 1.287740870218377, |
| "learning_rate": 9.923462655054104e-07, |
| "loss": 0.4837, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33074361820199777, |
| "grad_norm": 1.5935225239592739, |
| "learning_rate": 9.922050469018363e-07, |
| "loss": 0.4947, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.33296337402885684, |
| "grad_norm": 1.2023766619786924, |
| "learning_rate": 9.92063492063492e-07, |
| "loss": 0.4334, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33518312985571586, |
| "grad_norm": 1.5038127059764161, |
| "learning_rate": 9.919215997881075e-07, |
| "loss": 0.4745, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3374028856825749, |
| "grad_norm": 1.322847323172384, |
| "learning_rate": 9.917793688676744e-07, |
| "loss": 0.4842, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.33962264150943394, |
| "grad_norm": 1.3698735083886853, |
| "learning_rate": 9.91636798088411e-07, |
| "loss": 0.4627, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.341842397336293, |
| "grad_norm": 1.1751076647142753, |
| "learning_rate": 9.914938862307282e-07, |
| "loss": 0.4954, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.34406215316315203, |
| "grad_norm": 1.252282484942035, |
| "learning_rate": 9.91350632069195e-07, |
| "loss": 0.515, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3462819089900111, |
| "grad_norm": 1.1925027486739732, |
| "learning_rate": 9.91207034372502e-07, |
| "loss": 0.5076, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.34850166481687017, |
| "grad_norm": 1.156886995816501, |
| "learning_rate": 9.91063091903428e-07, |
| "loss": 0.4588, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3507214206437292, |
| "grad_norm": 1.254079717620731, |
| "learning_rate": 9.909188034188032e-07, |
| "loss": 0.4387, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 1.3476069702564326, |
| "learning_rate": 9.907741676694746e-07, |
| "loss": 0.5, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3551609322974473, |
| "grad_norm": 1.1852846148649574, |
| "learning_rate": 9.906291834002677e-07, |
| "loss": 0.5027, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.35738068812430634, |
| "grad_norm": 1.1877456827279553, |
| "learning_rate": 9.90483849349953e-07, |
| "loss": 0.4803, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.35960044395116536, |
| "grad_norm": 1.2427891755370146, |
| "learning_rate": 9.903381642512078e-07, |
| "loss": 0.4789, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.36182019977802443, |
| "grad_norm": 1.098532077739215, |
| "learning_rate": 9.90192126830579e-07, |
| "loss": 0.4777, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.36403995560488345, |
| "grad_norm": 1.168399201796612, |
| "learning_rate": 9.900457358084477e-07, |
| "loss": 0.5102, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3662597114317425, |
| "grad_norm": 2.213806937146682, |
| "learning_rate": 9.898989898989898e-07, |
| "loss": 0.5044, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.36847946725860153, |
| "grad_norm": 1.2440142016900406, |
| "learning_rate": 9.8975188781014e-07, |
| "loss": 0.4744, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3706992230854606, |
| "grad_norm": 1.3052497044223825, |
| "learning_rate": 9.896044282435533e-07, |
| "loss": 0.4971, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3729189789123196, |
| "grad_norm": 1.2043761038679413, |
| "learning_rate": 9.89456609894566e-07, |
| "loss": 0.4406, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3751387347391787, |
| "grad_norm": 1.2022759347548375, |
| "learning_rate": 9.893084314521587e-07, |
| "loss": 0.486, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 1.5411131311153146, |
| "learning_rate": 9.89159891598916e-07, |
| "loss": 0.4745, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3795782463928968, |
| "grad_norm": 1.5173133638369167, |
| "learning_rate": 9.89010989010989e-07, |
| "loss": 0.4847, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.38179800221975585, |
| "grad_norm": 1.3709113146054337, |
| "learning_rate": 9.888617223580548e-07, |
| "loss": 0.4859, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.38401775804661487, |
| "grad_norm": 1.1941263065851728, |
| "learning_rate": 9.887120903032776e-07, |
| "loss": 0.499, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.38623751387347394, |
| "grad_norm": 1.2043015575494198, |
| "learning_rate": 9.88562091503268e-07, |
| "loss": 0.4902, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.38845726970033295, |
| "grad_norm": 1.5205275361868167, |
| "learning_rate": 9.884117246080436e-07, |
| "loss": 0.4861, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.390677025527192, |
| "grad_norm": 1.1068775140013685, |
| "learning_rate": 9.882609882609883e-07, |
| "loss": 0.487, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.39289678135405104, |
| "grad_norm": 1.3479560920415647, |
| "learning_rate": 9.88109881098811e-07, |
| "loss": 0.549, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3951165371809101, |
| "grad_norm": 1.1681890050238768, |
| "learning_rate": 9.87958401751505e-07, |
| "loss": 0.4529, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3973362930077691, |
| "grad_norm": 1.1510714354592815, |
| "learning_rate": 9.87806548842307e-07, |
| "loss": 0.4776, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3995560488346282, |
| "grad_norm": 1.183102322925365, |
| "learning_rate": 9.876543209876544e-07, |
| "loss": 0.4897, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4017758046614872, |
| "grad_norm": 3.10446224930389, |
| "learning_rate": 9.875017167971433e-07, |
| "loss": 0.4818, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4039955604883463, |
| "grad_norm": 1.1580041989577758, |
| "learning_rate": 9.873487348734873e-07, |
| "loss": 0.456, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.40621531631520535, |
| "grad_norm": 1.182486697276762, |
| "learning_rate": 9.87195373812474e-07, |
| "loss": 0.479, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.40843507214206437, |
| "grad_norm": 1.1946276510033909, |
| "learning_rate": 9.870416322029225e-07, |
| "loss": 0.4603, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.41065482796892344, |
| "grad_norm": 1.2789687454969172, |
| "learning_rate": 9.86887508626639e-07, |
| "loss": 0.4801, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.41287458379578246, |
| "grad_norm": 1.2477786049263804, |
| "learning_rate": 9.867330016583748e-07, |
| "loss": 0.4859, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.41509433962264153, |
| "grad_norm": 1.2098018904070043, |
| "learning_rate": 9.86578109865781e-07, |
| "loss": 0.4795, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.41731409544950054, |
| "grad_norm": 1.1843208583676361, |
| "learning_rate": 9.864228318093655e-07, |
| "loss": 0.4777, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4195338512763596, |
| "grad_norm": 1.4793512914013776, |
| "learning_rate": 9.862671660424468e-07, |
| "loss": 0.4822, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.42175360710321863, |
| "grad_norm": 1.4592257280425385, |
| "learning_rate": 9.861111111111112e-07, |
| "loss": 0.4575, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4239733629300777, |
| "grad_norm": 1.2669454524463584, |
| "learning_rate": 9.859546655541649e-07, |
| "loss": 0.4555, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4261931187569367, |
| "grad_norm": 1.1138120648848997, |
| "learning_rate": 9.857978279030909e-07, |
| "loss": 0.4837, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4284128745837958, |
| "grad_norm": 1.1842746998711116, |
| "learning_rate": 9.85640596682002e-07, |
| "loss": 0.4705, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4306326304106548, |
| "grad_norm": 1.8958140258222622, |
| "learning_rate": 9.854829704075935e-07, |
| "loss": 0.4773, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4328523862375139, |
| "grad_norm": 1.9842874728486837, |
| "learning_rate": 9.853249475890984e-07, |
| "loss": 0.514, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.43507214206437295, |
| "grad_norm": 1.2411453520509366, |
| "learning_rate": 9.851665267282396e-07, |
| "loss": 0.4689, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.43729189789123196, |
| "grad_norm": 2.6834809304915552, |
| "learning_rate": 9.850077063191818e-07, |
| "loss": 0.4913, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.43951165371809103, |
| "grad_norm": 2.617374737158509, |
| "learning_rate": 9.848484848484847e-07, |
| "loss": 0.4686, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.44173140954495005, |
| "grad_norm": 1.3023938843230873, |
| "learning_rate": 9.846888607950555e-07, |
| "loss": 0.4708, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4439511653718091, |
| "grad_norm": 1.1559453889413471, |
| "learning_rate": 9.845288326300983e-07, |
| "loss": 0.4847, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.44617092119866814, |
| "grad_norm": 1.932913560002246, |
| "learning_rate": 9.84368398817068e-07, |
| "loss": 0.505, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4483906770255272, |
| "grad_norm": 1.2322652967881342, |
| "learning_rate": 9.842075578116187e-07, |
| "loss": 0.4844, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4506104328523862, |
| "grad_norm": 1.1577427278107666, |
| "learning_rate": 9.840463080615557e-07, |
| "loss": 0.48, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 1.1732950223965302, |
| "learning_rate": 9.838846480067854e-07, |
| "loss": 0.4937, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4550499445061043, |
| "grad_norm": 1.2128135155508084, |
| "learning_rate": 9.83722576079264e-07, |
| "loss": 0.4481, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4572697003329634, |
| "grad_norm": 1.3789926682637172, |
| "learning_rate": 9.835600907029478e-07, |
| "loss": 0.4694, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4594894561598224, |
| "grad_norm": 1.3013032938491416, |
| "learning_rate": 9.833971902937419e-07, |
| "loss": 0.4582, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.46170921198668147, |
| "grad_norm": 1.2874771776462224, |
| "learning_rate": 9.832338732594486e-07, |
| "loss": 0.4971, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.46392896781354054, |
| "grad_norm": 1.1720459802557406, |
| "learning_rate": 9.830701379997154e-07, |
| "loss": 0.4612, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.46614872364039955, |
| "grad_norm": 1.3336018286890268, |
| "learning_rate": 9.829059829059829e-07, |
| "loss": 0.4905, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4683684794672586, |
| "grad_norm": 1.163695673111041, |
| "learning_rate": 9.827414063614321e-07, |
| "loss": 0.4661, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 1.1345772287223108, |
| "learning_rate": 9.825764067409312e-07, |
| "loss": 0.4809, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4728079911209767, |
| "grad_norm": 1.1200862265686184, |
| "learning_rate": 9.824109824109824e-07, |
| "loss": 0.4442, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4750277469478357, |
| "grad_norm": 1.0965562558931972, |
| "learning_rate": 9.822451317296677e-07, |
| "loss": 0.48, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4772475027746948, |
| "grad_norm": 1.1933430382619212, |
| "learning_rate": 9.82078853046595e-07, |
| "loss": 0.4563, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4794672586015538, |
| "grad_norm": 1.1998968490880912, |
| "learning_rate": 9.819121447028424e-07, |
| "loss": 0.4756, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4816870144284129, |
| "grad_norm": 1.1502035962314217, |
| "learning_rate": 9.817450050309042e-07, |
| "loss": 0.4703, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4839067702552719, |
| "grad_norm": 1.270076075579517, |
| "learning_rate": 9.815774323546344e-07, |
| "loss": 0.5022, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.48612652608213097, |
| "grad_norm": 1.18116171953247, |
| "learning_rate": 9.814094249891915e-07, |
| "loss": 0.4889, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.48834628190899, |
| "grad_norm": 1.1529220976426569, |
| "learning_rate": 9.812409812409812e-07, |
| "loss": 0.4591, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49056603773584906, |
| "grad_norm": 1.3482489121076722, |
| "learning_rate": 9.810720994076e-07, |
| "loss": 0.5174, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.49278579356270813, |
| "grad_norm": 1.1388000036876147, |
| "learning_rate": 9.809027777777776e-07, |
| "loss": 0.488, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.49500554938956715, |
| "grad_norm": 1.3402730462866097, |
| "learning_rate": 9.807330146313196e-07, |
| "loss": 0.4707, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4972253052164262, |
| "grad_norm": 1.1624876644697852, |
| "learning_rate": 9.805628082390483e-07, |
| "loss": 0.4868, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.49944506104328523, |
| "grad_norm": 1.2049149784394284, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 0.4875, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5016648168701443, |
| "grad_norm": 1.1421640775244137, |
| "learning_rate": 9.802210587550902e-07, |
| "loss": 0.4601, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5038845726970034, |
| "grad_norm": 1.184925206570989, |
| "learning_rate": 9.800495121596038e-07, |
| "loss": 0.4606, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5061043285238623, |
| "grad_norm": 1.1520289725248647, |
| "learning_rate": 9.79877515310586e-07, |
| "loss": 0.4674, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5083240843507214, |
| "grad_norm": 1.252272773720146, |
| "learning_rate": 9.79705066433056e-07, |
| "loss": 0.4836, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5105438401775805, |
| "grad_norm": 1.1392993281880255, |
| "learning_rate": 9.7953216374269e-07, |
| "loss": 0.4582, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5127635960044395, |
| "grad_norm": 1.2813095854917667, |
| "learning_rate": 9.79358805445762e-07, |
| "loss": 0.4837, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5149833518312985, |
| "grad_norm": 1.2741773233692082, |
| "learning_rate": 9.791849897390794e-07, |
| "loss": 0.4822, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5172031076581576, |
| "grad_norm": 1.3581663168459062, |
| "learning_rate": 9.79010714809922e-07, |
| "loss": 0.4656, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5194228634850167, |
| "grad_norm": 2.104111037313451, |
| "learning_rate": 9.788359788359789e-07, |
| "loss": 0.5158, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5216426193118757, |
| "grad_norm": 1.2100715298455516, |
| "learning_rate": 9.78660779985283e-07, |
| "loss": 0.4593, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5238623751387348, |
| "grad_norm": 1.513760002478016, |
| "learning_rate": 9.78485116416151e-07, |
| "loss": 0.4717, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5260821309655938, |
| "grad_norm": 1.1016577801281828, |
| "learning_rate": 9.783089862771138e-07, |
| "loss": 0.4695, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5283018867924528, |
| "grad_norm": 1.9018294843725223, |
| "learning_rate": 9.781323877068556e-07, |
| "loss": 0.4757, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5305216426193119, |
| "grad_norm": 1.1210699506672195, |
| "learning_rate": 9.779553188341472e-07, |
| "loss": 0.475, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.532741398446171, |
| "grad_norm": 1.6761590116014484, |
| "learning_rate": 9.777777777777778e-07, |
| "loss": 0.4586, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5349611542730299, |
| "grad_norm": 1.2531925677261875, |
| "learning_rate": 9.775997626464915e-07, |
| "loss": 0.4936, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.537180910099889, |
| "grad_norm": 1.2820596623624074, |
| "learning_rate": 9.774212715389185e-07, |
| "loss": 0.4765, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5394006659267481, |
| "grad_norm": 1.2797631555506939, |
| "learning_rate": 9.772423025435074e-07, |
| "loss": 0.4621, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5416204217536071, |
| "grad_norm": 1.1547969793343313, |
| "learning_rate": 9.77062853738457e-07, |
| "loss": 0.4926, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5438401775804661, |
| "grad_norm": 1.2176966207623454, |
| "learning_rate": 9.76882923191648e-07, |
| "loss": 0.4658, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5460599334073252, |
| "grad_norm": 1.2205016744946482, |
| "learning_rate": 9.767025089605736e-07, |
| "loss": 0.4734, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5482796892341842, |
| "grad_norm": 1.3715021678576031, |
| "learning_rate": 9.765216090922686e-07, |
| "loss": 0.4634, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5504994450610433, |
| "grad_norm": 1.1265979634435113, |
| "learning_rate": 9.763402216232405e-07, |
| "loss": 0.4831, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5527192008879024, |
| "grad_norm": 1.2027088078575965, |
| "learning_rate": 9.761583445793972e-07, |
| "loss": 0.4913, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5549389567147613, |
| "grad_norm": 1.2098376963577495, |
| "learning_rate": 9.75975975975976e-07, |
| "loss": 0.4867, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5571587125416204, |
| "grad_norm": 1.229754850841517, |
| "learning_rate": 9.75793113817471e-07, |
| "loss": 0.4387, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5593784683684795, |
| "grad_norm": 1.1234804098250084, |
| "learning_rate": 9.756097560975609e-07, |
| "loss": 0.5042, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5615982241953386, |
| "grad_norm": 1.9036530663015045, |
| "learning_rate": 9.75425900799035e-07, |
| "loss": 0.4775, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5638179800221975, |
| "grad_norm": 1.3497568353885407, |
| "learning_rate": 9.752415458937197e-07, |
| "loss": 0.4589, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 1.3514286330767507, |
| "learning_rate": 9.750566893424036e-07, |
| "loss": 0.4751, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5682574916759157, |
| "grad_norm": 1.190120995376288, |
| "learning_rate": 9.748713290947623e-07, |
| "loss": 0.4762, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5704772475027747, |
| "grad_norm": 1.2646510711529224, |
| "learning_rate": 9.74685463089283e-07, |
| "loss": 0.4544, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5726970033296337, |
| "grad_norm": 1.3453068295168764, |
| "learning_rate": 9.744990892531876e-07, |
| "loss": 0.4677, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5749167591564928, |
| "grad_norm": 3.4979722875360952, |
| "learning_rate": 9.74312205502356e-07, |
| "loss": 0.4896, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5771365149833518, |
| "grad_norm": 1.2532683261068736, |
| "learning_rate": 9.74124809741248e-07, |
| "loss": 0.4452, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5793562708102109, |
| "grad_norm": 1.4072099932257984, |
| "learning_rate": 9.739368998628257e-07, |
| "loss": 0.4677, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.58157602663707, |
| "grad_norm": 1.2844489474254415, |
| "learning_rate": 9.737484737484737e-07, |
| "loss": 0.496, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5837957824639289, |
| "grad_norm": 1.1205502247017423, |
| "learning_rate": 9.735595292679198e-07, |
| "loss": 0.4944, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.586015538290788, |
| "grad_norm": 1.1335936128840318, |
| "learning_rate": 9.733700642791553e-07, |
| "loss": 0.4588, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 1.1909628036709445, |
| "learning_rate": 9.731800766283525e-07, |
| "loss": 0.4591, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5904550499445061, |
| "grad_norm": 1.0798410289093272, |
| "learning_rate": 9.729895641497852e-07, |
| "loss": 0.4733, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5926748057713651, |
| "grad_norm": 1.1518991537178038, |
| "learning_rate": 9.727985246657446e-07, |
| "loss": 0.4894, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5948945615982242, |
| "grad_norm": 1.2167928052629051, |
| "learning_rate": 9.726069559864573e-07, |
| "loss": 0.4632, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5971143174250833, |
| "grad_norm": 1.0891996396945058, |
| "learning_rate": 9.724148559100016e-07, |
| "loss": 0.4648, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5993340732519423, |
| "grad_norm": 1.202740165554673, |
| "learning_rate": 9.722222222222222e-07, |
| "loss": 0.4835, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6015538290788013, |
| "grad_norm": 1.1271713441331268, |
| "learning_rate": 9.720290526966466e-07, |
| "loss": 0.4757, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6037735849056604, |
| "grad_norm": 1.268219289831493, |
| "learning_rate": 9.71835345094398e-07, |
| "loss": 0.4518, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6059933407325194, |
| "grad_norm": 1.3048586285945205, |
| "learning_rate": 9.716410971641097e-07, |
| "loss": 0.4686, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6082130965593785, |
| "grad_norm": 1.2646393518299217, |
| "learning_rate": 9.714463066418373e-07, |
| "loss": 0.4544, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6104328523862376, |
| "grad_norm": 1.246098492059234, |
| "learning_rate": 9.712509712509713e-07, |
| "loss": 0.418, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6126526082130965, |
| "grad_norm": 1.2773926202045507, |
| "learning_rate": 9.710550887021476e-07, |
| "loss": 0.4728, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6148723640399556, |
| "grad_norm": 1.1731853445553053, |
| "learning_rate": 9.708586566931587e-07, |
| "loss": 0.4567, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6170921198668147, |
| "grad_norm": 1.3328475152898882, |
| "learning_rate": 9.70661672908864e-07, |
| "loss": 0.4713, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6193118756936737, |
| "grad_norm": 1.1482955010668634, |
| "learning_rate": 9.70464135021097e-07, |
| "loss": 0.4895, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6215316315205327, |
| "grad_norm": 1.164481369160247, |
| "learning_rate": 9.702660406885758e-07, |
| "loss": 0.4527, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6237513873473918, |
| "grad_norm": 1.1898936972633753, |
| "learning_rate": 9.700673875568092e-07, |
| "loss": 0.4636, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6259711431742508, |
| "grad_norm": 1.2690117073282012, |
| "learning_rate": 9.698681732580038e-07, |
| "loss": 0.4924, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6281908990011099, |
| "grad_norm": 1.1267647194841144, |
| "learning_rate": 9.696683954109696e-07, |
| "loss": 0.4444, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6304106548279689, |
| "grad_norm": 1.1860501989885999, |
| "learning_rate": 9.694680516210262e-07, |
| "loss": 0.4841, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.632630410654828, |
| "grad_norm": 1.4878295849057805, |
| "learning_rate": 9.692671394799055e-07, |
| "loss": 0.4591, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.634850166481687, |
| "grad_norm": 1.1868310362481995, |
| "learning_rate": 9.690656565656565e-07, |
| "loss": 0.4654, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6370699223085461, |
| "grad_norm": 1.2316539504907182, |
| "learning_rate": 9.688636004425479e-07, |
| "loss": 0.4518, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6392896781354052, |
| "grad_norm": 1.2704968380561188, |
| "learning_rate": 9.686609686609686e-07, |
| "loss": 0.4798, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6415094339622641, |
| "grad_norm": 1.2632953724431832, |
| "learning_rate": 9.684577587573309e-07, |
| "loss": 0.4471, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6437291897891232, |
| "grad_norm": 1.0780974022320988, |
| "learning_rate": 9.682539682539682e-07, |
| "loss": 0.4555, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6459489456159823, |
| "grad_norm": 1.207590913191836, |
| "learning_rate": 9.680495946590367e-07, |
| "loss": 0.4549, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6481687014428413, |
| "grad_norm": 1.3017930886634022, |
| "learning_rate": 9.67844635466412e-07, |
| "loss": 0.4721, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6503884572697003, |
| "grad_norm": 1.2987311419798997, |
| "learning_rate": 9.676390881555874e-07, |
| "loss": 0.4751, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6526082130965594, |
| "grad_norm": 1.1063543285266058, |
| "learning_rate": 9.674329501915708e-07, |
| "loss": 0.465, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6548279689234184, |
| "grad_norm": 1.2064062875188606, |
| "learning_rate": 9.672262190247801e-07, |
| "loss": 0.4509, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6570477247502775, |
| "grad_norm": 2.002682048611068, |
| "learning_rate": 9.670188920909382e-07, |
| "loss": 0.4593, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6592674805771365, |
| "grad_norm": 1.1171063749595782, |
| "learning_rate": 9.668109668109667e-07, |
| "loss": 0.4584, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6614872364039955, |
| "grad_norm": 1.3329915028296324, |
| "learning_rate": 9.666024405908798e-07, |
| "loss": 0.4809, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6637069922308546, |
| "grad_norm": 1.1190182828582709, |
| "learning_rate": 9.663933108216756e-07, |
| "loss": 0.4596, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6659267480577137, |
| "grad_norm": 1.230691071202789, |
| "learning_rate": 9.66183574879227e-07, |
| "loss": 0.4773, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6681465038845728, |
| "grad_norm": 1.0969420350931496, |
| "learning_rate": 9.659732301241734e-07, |
| "loss": 0.4709, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6703662597114317, |
| "grad_norm": 1.1935278681188108, |
| "learning_rate": 9.657622739018086e-07, |
| "loss": 0.4713, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6725860155382908, |
| "grad_norm": 1.1497579674788543, |
| "learning_rate": 9.6555070354197e-07, |
| "loss": 0.4495, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6748057713651499, |
| "grad_norm": 1.5074031109473398, |
| "learning_rate": 9.653385163589246e-07, |
| "loss": 0.4696, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6770255271920089, |
| "grad_norm": 1.310465827597004, |
| "learning_rate": 9.65125709651257e-07, |
| "loss": 0.4803, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6792452830188679, |
| "grad_norm": 1.1816909676159393, |
| "learning_rate": 9.649122807017545e-07, |
| "loss": 0.4911, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.681465038845727, |
| "grad_norm": 1.3977695099320595, |
| "learning_rate": 9.646982267772897e-07, |
| "loss": 0.4322, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.683684794672586, |
| "grad_norm": 1.0483630262556494, |
| "learning_rate": 9.644835451287064e-07, |
| "loss": 0.4604, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6859045504994451, |
| "grad_norm": 1.143328369080368, |
| "learning_rate": 9.642682329906999e-07, |
| "loss": 0.4711, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6881243063263041, |
| "grad_norm": 1.0876189100922673, |
| "learning_rate": 9.640522875816993e-07, |
| "loss": 0.4589, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6903440621531631, |
| "grad_norm": 1.2496179510548522, |
| "learning_rate": 9.638357061037473e-07, |
| "loss": 0.4442, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6925638179800222, |
| "grad_norm": 1.334399245591051, |
| "learning_rate": 9.636184857423795e-07, |
| "loss": 0.4698, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6947835738068813, |
| "grad_norm": 1.374440791039879, |
| "learning_rate": 9.634006236665025e-07, |
| "loss": 0.461, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6970033296337403, |
| "grad_norm": 1.3126306926020817, |
| "learning_rate": 9.63182117028271e-07, |
| "loss": 0.4677, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6992230854605993, |
| "grad_norm": 1.30568674164611, |
| "learning_rate": 9.629629629629628e-07, |
| "loss": 0.472, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7014428412874584, |
| "grad_norm": 1.2946732132074095, |
| "learning_rate": 9.627431585888558e-07, |
| "loss": 0.4471, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7036625971143174, |
| "grad_norm": 1.1052745863913855, |
| "learning_rate": 9.625227010070992e-07, |
| "loss": 0.4593, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.2464058744248228, |
| "learning_rate": 9.623015873015874e-07, |
| "loss": 0.4499, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7081021087680355, |
| "grad_norm": 1.1991454487689286, |
| "learning_rate": 9.620798145388308e-07, |
| "loss": 0.4929, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7103218645948945, |
| "grad_norm": 1.117849019068085, |
| "learning_rate": 9.618573797678275e-07, |
| "loss": 0.4651, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7125416204217536, |
| "grad_norm": 1.8116588599393262, |
| "learning_rate": 9.616342800199301e-07, |
| "loss": 0.4644, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7147613762486127, |
| "grad_norm": 1.3083582936161693, |
| "learning_rate": 9.614105123087158e-07, |
| "loss": 0.4822, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7169811320754716, |
| "grad_norm": 1.213650512640914, |
| "learning_rate": 9.611860736298516e-07, |
| "loss": 0.4695, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7192008879023307, |
| "grad_norm": 1.1404257325984666, |
| "learning_rate": 9.60960960960961e-07, |
| "loss": 0.4642, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7214206437291898, |
| "grad_norm": 1.2553157610647856, |
| "learning_rate": 9.60735171261487e-07, |
| "loss": 0.4627, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7236403995560489, |
| "grad_norm": 1.0750989884958981, |
| "learning_rate": 9.605087014725568e-07, |
| "loss": 0.4664, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7258601553829079, |
| "grad_norm": 1.146407946651267, |
| "learning_rate": 9.602815485168427e-07, |
| "loss": 0.4704, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7280799112097669, |
| "grad_norm": 1.2052851100604456, |
| "learning_rate": 9.600537092984222e-07, |
| "loss": 0.4482, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.730299667036626, |
| "grad_norm": 1.1376530340216333, |
| "learning_rate": 9.59825180702639e-07, |
| "loss": 0.4371, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.732519422863485, |
| "grad_norm": 1.2633210965308679, |
| "learning_rate": 9.595959595959596e-07, |
| "loss": 0.4647, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7347391786903441, |
| "grad_norm": 1.130890031131311, |
| "learning_rate": 9.593660428258304e-07, |
| "loss": 0.4417, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7369589345172031, |
| "grad_norm": 1.6554313379519792, |
| "learning_rate": 9.591354272205336e-07, |
| "loss": 0.4493, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7391786903440621, |
| "grad_norm": 1.1068174319261177, |
| "learning_rate": 9.58904109589041e-07, |
| "loss": 0.472, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7413984461709212, |
| "grad_norm": 1.5125525973713272, |
| "learning_rate": 9.586720867208672e-07, |
| "loss": 0.4929, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7436182019977803, |
| "grad_norm": 1.3329378728556205, |
| "learning_rate": 9.584393553859202e-07, |
| "loss": 0.4932, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7458379578246392, |
| "grad_norm": 1.067151392962331, |
| "learning_rate": 9.582059123343528e-07, |
| "loss": 0.4669, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7480577136514983, |
| "grad_norm": 1.221315492579946, |
| "learning_rate": 9.579717542964097e-07, |
| "loss": 0.4955, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7502774694783574, |
| "grad_norm": 1.1576509365414092, |
| "learning_rate": 9.577368779822768e-07, |
| "loss": 0.4518, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7524972253052165, |
| "grad_norm": 1.1450956197088513, |
| "learning_rate": 9.575012800819252e-07, |
| "loss": 0.4387, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 1.1595835131376107, |
| "learning_rate": 9.572649572649572e-07, |
| "loss": 0.448, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7569367369589345, |
| "grad_norm": 1.1123527161829898, |
| "learning_rate": 9.570279061804486e-07, |
| "loss": 0.4603, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7591564927857936, |
| "grad_norm": 1.1853821634183275, |
| "learning_rate": 9.567901234567902e-07, |
| "loss": 0.4705, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7613762486126526, |
| "grad_norm": 1.1103107481328276, |
| "learning_rate": 9.565516057015283e-07, |
| "loss": 0.4596, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7635960044395117, |
| "grad_norm": 1.4055764520185114, |
| "learning_rate": 9.563123495012039e-07, |
| "loss": 0.4938, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7658157602663707, |
| "grad_norm": 1.1750825777251128, |
| "learning_rate": 9.560723514211886e-07, |
| "loss": 0.466, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7680355160932297, |
| "grad_norm": 1.5998973140767496, |
| "learning_rate": 9.55831608005521e-07, |
| "loss": 0.4445, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7702552719200888, |
| "grad_norm": 1.2332564173521476, |
| "learning_rate": 9.55590115776741e-07, |
| "loss": 0.4557, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7724750277469479, |
| "grad_norm": 1.178215276166538, |
| "learning_rate": 9.553478712357217e-07, |
| "loss": 0.4689, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7746947835738068, |
| "grad_norm": 1.7533561679314174, |
| "learning_rate": 9.551048708615012e-07, |
| "loss": 0.4562, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7769145394006659, |
| "grad_norm": 1.2467282871005076, |
| "learning_rate": 9.54861111111111e-07, |
| "loss": 0.4965, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.779134295227525, |
| "grad_norm": 1.501873430859811, |
| "learning_rate": 9.546165884194052e-07, |
| "loss": 0.4655, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.781354051054384, |
| "grad_norm": 1.1753902585686589, |
| "learning_rate": 9.543712991988853e-07, |
| "loss": 0.4631, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7835738068812431, |
| "grad_norm": 1.2254133258446995, |
| "learning_rate": 9.541252398395256e-07, |
| "loss": 0.4804, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7857935627081021, |
| "grad_norm": 1.2034719888364724, |
| "learning_rate": 9.538784067085953e-07, |
| "loss": 0.5163, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7880133185349611, |
| "grad_norm": 1.1137953498058453, |
| "learning_rate": 9.536307961504813e-07, |
| "loss": 0.465, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7902330743618202, |
| "grad_norm": 1.2049505304206072, |
| "learning_rate": 9.533824044865054e-07, |
| "loss": 0.4426, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7924528301886793, |
| "grad_norm": 1.146694810534997, |
| "learning_rate": 9.531332280147445e-07, |
| "loss": 0.4595, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7946725860155383, |
| "grad_norm": 1.285324115763439, |
| "learning_rate": 9.528832630098452e-07, |
| "loss": 0.4404, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7968923418423973, |
| "grad_norm": 1.1746063498596073, |
| "learning_rate": 9.526325057228385e-07, |
| "loss": 0.4556, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7991120976692564, |
| "grad_norm": 1.2397369281527393, |
| "learning_rate": 9.523809523809523e-07, |
| "loss": 0.4552, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8013318534961155, |
| "grad_norm": 1.2890444853501573, |
| "learning_rate": 9.521285991874226e-07, |
| "loss": 0.4895, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8035516093229744, |
| "grad_norm": 1.249790528516529, |
| "learning_rate": 9.518754423213023e-07, |
| "loss": 0.4611, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8057713651498335, |
| "grad_norm": 1.1607609355409652, |
| "learning_rate": 9.516214779372675e-07, |
| "loss": 0.4634, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8079911209766926, |
| "grad_norm": 1.1944680113287869, |
| "learning_rate": 9.513667021654242e-07, |
| "loss": 0.4658, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8102108768035516, |
| "grad_norm": 1.212730380100326, |
| "learning_rate": 9.51111111111111e-07, |
| "loss": 0.4652, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8124306326304107, |
| "grad_norm": 1.350952435196083, |
| "learning_rate": 9.508547008547009e-07, |
| "loss": 0.4821, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8146503884572697, |
| "grad_norm": 1.4170872636632865, |
| "learning_rate": 9.505974674514e-07, |
| "loss": 0.4584, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8168701442841287, |
| "grad_norm": 1.1374959977745926, |
| "learning_rate": 9.503394069310468e-07, |
| "loss": 0.4529, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8190899001109878, |
| "grad_norm": 1.1079720865904779, |
| "learning_rate": 9.500805152979066e-07, |
| "loss": 0.4569, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.8213096559378469, |
| "grad_norm": 1.3283932417196296, |
| "learning_rate": 9.498207885304659e-07, |
| "loss": 0.457, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 2.167492019217474, |
| "learning_rate": 9.495602225812242e-07, |
| "loss": 0.4861, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8257491675915649, |
| "grad_norm": 1.1806082837203582, |
| "learning_rate": 9.492988133764832e-07, |
| "loss": 0.4635, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.827968923418424, |
| "grad_norm": 1.2232597162793415, |
| "learning_rate": 9.490365568161355e-07, |
| "loss": 0.4739, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8301886792452831, |
| "grad_norm": 1.2478222292556507, |
| "learning_rate": 9.487734487734488e-07, |
| "loss": 0.435, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.832408435072142, |
| "grad_norm": 1.9264518586562565, |
| "learning_rate": 9.485094850948509e-07, |
| "loss": 0.4612, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8346281908990011, |
| "grad_norm": 1.1851589787911743, |
| "learning_rate": 9.482446615997105e-07, |
| "loss": 0.4741, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8368479467258602, |
| "grad_norm": 1.1482240087848128, |
| "learning_rate": 9.47978974080116e-07, |
| "loss": 0.4677, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8390677025527192, |
| "grad_norm": 1.1666675897500949, |
| "learning_rate": 9.477124183006535e-07, |
| "loss": 0.439, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8412874583795783, |
| "grad_norm": 1.2967015189495499, |
| "learning_rate": 9.474449899981814e-07, |
| "loss": 0.4389, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8435072142064373, |
| "grad_norm": 1.2240339325997547, |
| "learning_rate": 9.471766848816029e-07, |
| "loss": 0.4967, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8457269700332963, |
| "grad_norm": 1.165209891992943, |
| "learning_rate": 9.469074986316366e-07, |
| "loss": 0.4385, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8479467258601554, |
| "grad_norm": 1.1944098330708015, |
| "learning_rate": 9.466374269005847e-07, |
| "loss": 0.4526, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8501664816870145, |
| "grad_norm": 1.3220747050818542, |
| "learning_rate": 9.463664653120996e-07, |
| "loss": 0.4554, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8523862375138734, |
| "grad_norm": 1.156857474774302, |
| "learning_rate": 9.460946094609461e-07, |
| "loss": 0.5017, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8546059933407325, |
| "grad_norm": 2.5207247056310527, |
| "learning_rate": 9.45821854912764e-07, |
| "loss": 0.4877, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8568257491675916, |
| "grad_norm": 1.278904776025417, |
| "learning_rate": 9.455481972038263e-07, |
| "loss": 0.424, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8590455049944506, |
| "grad_norm": 1.3104661256143586, |
| "learning_rate": 9.45273631840796e-07, |
| "loss": 0.4836, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8612652608213096, |
| "grad_norm": 1.1577542337096038, |
| "learning_rate": 9.449981543004798e-07, |
| "loss": 0.4501, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8634850166481687, |
| "grad_norm": 1.2698206722641987, |
| "learning_rate": 9.447217600295803e-07, |
| "loss": 0.4708, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8657047724750278, |
| "grad_norm": 1.26815456990031, |
| "learning_rate": 9.444444444444445e-07, |
| "loss": 0.4801, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8679245283018868, |
| "grad_norm": 1.4800730180210795, |
| "learning_rate": 9.441662029308107e-07, |
| "loss": 0.4446, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8701442841287459, |
| "grad_norm": 1.433745516973746, |
| "learning_rate": 9.438870308435526e-07, |
| "loss": 0.4395, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8723640399556049, |
| "grad_norm": 1.2405606709329904, |
| "learning_rate": 9.436069235064209e-07, |
| "loss": 0.4719, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8745837957824639, |
| "grad_norm": 1.3222046837847288, |
| "learning_rate": 9.433258762117823e-07, |
| "loss": 0.48, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.876803551609323, |
| "grad_norm": 1.0972747969843053, |
| "learning_rate": 9.430438842203547e-07, |
| "loss": 0.4567, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8790233074361821, |
| "grad_norm": 1.231282503350091, |
| "learning_rate": 9.427609427609426e-07, |
| "loss": 0.4253, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.881243063263041, |
| "grad_norm": 1.1148776290337907, |
| "learning_rate": 9.424770470301668e-07, |
| "loss": 0.4665, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8834628190899001, |
| "grad_norm": 1.5167092733459486, |
| "learning_rate": 9.421921921921921e-07, |
| "loss": 0.488, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8856825749167592, |
| "grad_norm": 1.1200324548701535, |
| "learning_rate": 9.419063733784546e-07, |
| "loss": 0.4573, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8879023307436182, |
| "grad_norm": 1.1941442835979987, |
| "learning_rate": 9.416195856873822e-07, |
| "loss": 0.474, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8901220865704772, |
| "grad_norm": 1.2378053597101148, |
| "learning_rate": 9.413318241841163e-07, |
| "loss": 0.4275, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8923418423973363, |
| "grad_norm": 1.1736891938334981, |
| "learning_rate": 9.410430839002267e-07, |
| "loss": 0.4813, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8945615982241953, |
| "grad_norm": 1.2926588136627257, |
| "learning_rate": 9.407533598334279e-07, |
| "loss": 0.4101, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8967813540510544, |
| "grad_norm": 1.335812503096899, |
| "learning_rate": 9.404626469472886e-07, |
| "loss": 0.482, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8990011098779135, |
| "grad_norm": 1.2606281524911447, |
| "learning_rate": 9.401709401709401e-07, |
| "loss": 0.4613, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9012208657047724, |
| "grad_norm": 1.0757260989123811, |
| "learning_rate": 9.398782343987823e-07, |
| "loss": 0.442, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9034406215316315, |
| "grad_norm": 1.2487755708412085, |
| "learning_rate": 9.395845244901848e-07, |
| "loss": 0.4849, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 1.125481879252537, |
| "learning_rate": 9.392898052691868e-07, |
| "loss": 0.4811, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9078801331853497, |
| "grad_norm": 1.2379802179470494, |
| "learning_rate": 9.38994071524192e-07, |
| "loss": 0.454, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9100998890122086, |
| "grad_norm": 1.4638820996239457, |
| "learning_rate": 9.386973180076627e-07, |
| "loss": 0.4568, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9123196448390677, |
| "grad_norm": 1.164619393855904, |
| "learning_rate": 9.383995394358089e-07, |
| "loss": 0.4572, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9145394006659268, |
| "grad_norm": 1.1482541560920236, |
| "learning_rate": 9.381007304882738e-07, |
| "loss": 0.4612, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9167591564927858, |
| "grad_norm": 1.2267789304428984, |
| "learning_rate": 9.378008858078182e-07, |
| "loss": 0.4509, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9189789123196448, |
| "grad_norm": 1.163057869832221, |
| "learning_rate": 9.374999999999999e-07, |
| "loss": 0.4438, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9211986681465039, |
| "grad_norm": 1.1962284300244166, |
| "learning_rate": 9.371980676328503e-07, |
| "loss": 0.4847, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9234184239733629, |
| "grad_norm": 1.4078630335966822, |
| "learning_rate": 9.368950832365467e-07, |
| "loss": 0.4442, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.925638179800222, |
| "grad_norm": 1.2172971912901611, |
| "learning_rate": 9.365910413030831e-07, |
| "loss": 0.4596, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9278579356270811, |
| "grad_norm": 1.0828193308936622, |
| "learning_rate": 9.362859362859364e-07, |
| "loss": 0.516, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.93007769145394, |
| "grad_norm": 1.1924221228533827, |
| "learning_rate": 9.359797625997276e-07, |
| "loss": 0.4652, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9322974472807991, |
| "grad_norm": 1.1409012876229394, |
| "learning_rate": 9.35672514619883e-07, |
| "loss": 0.473, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9345172031076582, |
| "grad_norm": 1.2176749478496565, |
| "learning_rate": 9.353641866822886e-07, |
| "loss": 0.4879, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9367369589345172, |
| "grad_norm": 1.1431631431171365, |
| "learning_rate": 9.350547730829421e-07, |
| "loss": 0.4541, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9389567147613762, |
| "grad_norm": 1.248764858494691, |
| "learning_rate": 9.347442680776014e-07, |
| "loss": 0.4484, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 1.1323458083986906, |
| "learning_rate": 9.344326658814291e-07, |
| "loss": 0.4701, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9433962264150944, |
| "grad_norm": 1.3325163110508749, |
| "learning_rate": 9.341199606686333e-07, |
| "loss": 0.4863, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9456159822419534, |
| "grad_norm": 1.1125334297727054, |
| "learning_rate": 9.33806146572104e-07, |
| "loss": 0.4399, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9478357380688124, |
| "grad_norm": 1.3670229521071158, |
| "learning_rate": 9.334912176830472e-07, |
| "loss": 0.4747, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9500554938956715, |
| "grad_norm": 1.198715980258311, |
| "learning_rate": 9.331751680506128e-07, |
| "loss": 0.4563, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9522752497225305, |
| "grad_norm": 1.2305992586404284, |
| "learning_rate": 9.328579916815211e-07, |
| "loss": 0.4533, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9544950055493896, |
| "grad_norm": 1.2010004180709277, |
| "learning_rate": 9.325396825396825e-07, |
| "loss": 0.4237, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9567147613762487, |
| "grad_norm": 1.177319039118549, |
| "learning_rate": 9.322202345458159e-07, |
| "loss": 0.4447, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9589345172031076, |
| "grad_norm": 1.0816083992867438, |
| "learning_rate": 9.318996415770608e-07, |
| "loss": 0.4583, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9611542730299667, |
| "grad_norm": 1.3105321787098454, |
| "learning_rate": 9.315778974665869e-07, |
| "loss": 0.4602, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9633740288568258, |
| "grad_norm": 1.2708668269742336, |
| "learning_rate": 9.312549960031973e-07, |
| "loss": 0.4674, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9655937846836848, |
| "grad_norm": 1.25271035550266, |
| "learning_rate": 9.309309309309308e-07, |
| "loss": 0.4702, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9678135405105438, |
| "grad_norm": 1.1383606781004785, |
| "learning_rate": 9.306056959486562e-07, |
| "loss": 0.4809, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9700332963374029, |
| "grad_norm": 1.117206827269427, |
| "learning_rate": 9.302792847096645e-07, |
| "loss": 0.4703, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9722530521642619, |
| "grad_norm": 3.9310744991947653, |
| "learning_rate": 9.29951690821256e-07, |
| "loss": 0.453, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.974472807991121, |
| "grad_norm": 1.141380677715348, |
| "learning_rate": 9.296229078443233e-07, |
| "loss": 0.4646, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.97669256381798, |
| "grad_norm": 1.2167415105647477, |
| "learning_rate": 9.292929292929292e-07, |
| "loss": 0.4739, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.978912319644839, |
| "grad_norm": 1.161523722412979, |
| "learning_rate": 9.289617486338798e-07, |
| "loss": 0.4446, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9811320754716981, |
| "grad_norm": 1.2805336942642866, |
| "learning_rate": 9.286293592862935e-07, |
| "loss": 0.4888, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9833518312985572, |
| "grad_norm": 1.311736822354556, |
| "learning_rate": 9.282957546211659e-07, |
| "loss": 0.4404, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9855715871254163, |
| "grad_norm": 1.102716937173303, |
| "learning_rate": 9.279609279609279e-07, |
| "loss": 0.472, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9877913429522752, |
| "grad_norm": 1.1845457089510798, |
| "learning_rate": 9.27624872579001e-07, |
| "loss": 0.4773, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9900110987791343, |
| "grad_norm": 1.348555373283787, |
| "learning_rate": 9.272875816993462e-07, |
| "loss": 0.4512, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9922308546059934, |
| "grad_norm": 1.2463685196593113, |
| "learning_rate": 9.269490484960099e-07, |
| "loss": 0.4632, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.9944506104328524, |
| "grad_norm": 1.304607151889411, |
| "learning_rate": 9.266092660926608e-07, |
| "loss": 0.4482, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9966703662597114, |
| "grad_norm": 1.2795731918669933, |
| "learning_rate": 9.262682275621277e-07, |
| "loss": 0.4793, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9988901220865705, |
| "grad_norm": 1.2183029212205303, |
| "learning_rate": 9.25925925925926e-07, |
| "loss": 0.4554, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.5581011378096141, |
| "learning_rate": 9.255823541537827e-07, |
| "loss": 0.4665, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.002219755826859, |
| "grad_norm": 1.30008543587019, |
| "learning_rate": 9.252375051631556e-07, |
| "loss": 0.4768, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0044395116537181, |
| "grad_norm": 1.1541545328399352, |
| "learning_rate": 9.24891371818746e-07, |
| "loss": 0.4466, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.0066592674805772, |
| "grad_norm": 1.1080167970739996, |
| "learning_rate": 9.245439469320066e-07, |
| "loss": 0.4324, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.0088790233074363, |
| "grad_norm": 1.205265215466603, |
| "learning_rate": 9.241952232606438e-07, |
| "loss": 0.451, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.0110987791342951, |
| "grad_norm": 1.1951346456937733, |
| "learning_rate": 9.238451935081147e-07, |
| "loss": 0.4575, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0133185349611542, |
| "grad_norm": 1.1381714006582448, |
| "learning_rate": 9.234938503231185e-07, |
| "loss": 0.4649, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0155382907880133, |
| "grad_norm": 1.2265651801615038, |
| "learning_rate": 9.23141186299081e-07, |
| "loss": 0.4546, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.0177580466148723, |
| "grad_norm": 1.2250114443086708, |
| "learning_rate": 9.227871939736346e-07, |
| "loss": 0.4529, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0199778024417314, |
| "grad_norm": 1.1144643265198617, |
| "learning_rate": 9.22431865828092e-07, |
| "loss": 0.4721, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0221975582685905, |
| "grad_norm": 1.2857875509373777, |
| "learning_rate": 9.220751942869145e-07, |
| "loss": 0.4742, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0244173140954496, |
| "grad_norm": 1.4558104967510546, |
| "learning_rate": 9.217171717171717e-07, |
| "loss": 0.4828, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.0266370699223086, |
| "grad_norm": 1.393341081291035, |
| "learning_rate": 9.213577904279991e-07, |
| "loss": 0.475, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0288568257491675, |
| "grad_norm": 1.2720063428740336, |
| "learning_rate": 9.209970426700463e-07, |
| "loss": 0.4764, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0310765815760266, |
| "grad_norm": 1.1643324541727542, |
| "learning_rate": 9.206349206349206e-07, |
| "loss": 0.4483, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.0332963374028856, |
| "grad_norm": 1.5806078292788657, |
| "learning_rate": 9.202714164546225e-07, |
| "loss": 0.4405, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0355160932297447, |
| "grad_norm": 1.0571713177484243, |
| "learning_rate": 9.199065222009772e-07, |
| "loss": 0.4582, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0377358490566038, |
| "grad_norm": 1.254164694817346, |
| "learning_rate": 9.195402298850575e-07, |
| "loss": 0.484, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0399556048834628, |
| "grad_norm": 1.2093916690336683, |
| "learning_rate": 9.191725314566005e-07, |
| "loss": 0.4524, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.042175360710322, |
| "grad_norm": 1.2484369194361347, |
| "learning_rate": 9.188034188034187e-07, |
| "loss": 0.4281, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.044395116537181, |
| "grad_norm": 1.23432208173896, |
| "learning_rate": 9.184328837508027e-07, |
| "loss": 0.4592, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.04661487236404, |
| "grad_norm": 1.1814190867646936, |
| "learning_rate": 9.18060918060918e-07, |
| "loss": 0.4501, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.048834628190899, |
| "grad_norm": 1.2396682446950464, |
| "learning_rate": 9.176875134321942e-07, |
| "loss": 0.4796, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.051054384017758, |
| "grad_norm": 1.194735634290447, |
| "learning_rate": 9.173126614987079e-07, |
| "loss": 0.4689, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.053274139844617, |
| "grad_norm": 1.5032349073655937, |
| "learning_rate": 9.169363538295577e-07, |
| "loss": 0.4756, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0554938956714761, |
| "grad_norm": 1.3604468620288677, |
| "learning_rate": 9.165585819282317e-07, |
| "loss": 0.4516, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0577136514983352, |
| "grad_norm": 1.235119564536718, |
| "learning_rate": 9.161793372319687e-07, |
| "loss": 0.4954, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.0599334073251943, |
| "grad_norm": 1.2064481172474952, |
| "learning_rate": 9.157986111111111e-07, |
| "loss": 0.4947, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0621531631520533, |
| "grad_norm": 1.3341949744876858, |
| "learning_rate": 9.154163948684497e-07, |
| "loss": 0.4471, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.0643729189789124, |
| "grad_norm": 1.0774430784246407, |
| "learning_rate": 9.15032679738562e-07, |
| "loss": 0.4367, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0665926748057715, |
| "grad_norm": 1.171666586826098, |
| "learning_rate": 9.146474568871424e-07, |
| "loss": 0.4458, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.0688124306326303, |
| "grad_norm": 1.143814272450129, |
| "learning_rate": 9.142607174103237e-07, |
| "loss": 0.4509, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.0710321864594894, |
| "grad_norm": 1.1649593431642897, |
| "learning_rate": 9.138724523339908e-07, |
| "loss": 0.4691, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.0732519422863485, |
| "grad_norm": 1.5096818227684794, |
| "learning_rate": 9.134826526130873e-07, |
| "loss": 0.4331, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.0754716981132075, |
| "grad_norm": 1.1214569676579733, |
| "learning_rate": 9.130913091309129e-07, |
| "loss": 0.4198, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.0776914539400666, |
| "grad_norm": 1.2255039253369904, |
| "learning_rate": 9.126984126984127e-07, |
| "loss": 0.4359, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.0799112097669257, |
| "grad_norm": 1.2922111222788437, |
| "learning_rate": 9.12303954053457e-07, |
| "loss": 0.4382, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.0821309655937847, |
| "grad_norm": 1.1584633311684494, |
| "learning_rate": 9.119079238601149e-07, |
| "loss": 0.4483, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.0843507214206438, |
| "grad_norm": 1.3615298699495637, |
| "learning_rate": 9.115103127079175e-07, |
| "loss": 0.4785, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.0865704772475029, |
| "grad_norm": 1.291303318155004, |
| "learning_rate": 9.11111111111111e-07, |
| "loss": 0.4471, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0887902330743617, |
| "grad_norm": 1.1619939851316075, |
| "learning_rate": 9.107103095079046e-07, |
| "loss": 0.4465, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.0910099889012208, |
| "grad_norm": 1.1890455479283029, |
| "learning_rate": 9.103078982597053e-07, |
| "loss": 0.4335, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.0932297447280799, |
| "grad_norm": 1.1706446925482716, |
| "learning_rate": 9.099038676503465e-07, |
| "loss": 0.4727, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.095449500554939, |
| "grad_norm": 1.5095826242654946, |
| "learning_rate": 9.094982078853046e-07, |
| "loss": 0.4665, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.097669256381798, |
| "grad_norm": 1.255552038855652, |
| "learning_rate": 9.09090909090909e-07, |
| "loss": 0.4653, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.099889012208657, |
| "grad_norm": 1.358267392724399, |
| "learning_rate": 9.086819613135402e-07, |
| "loss": 0.4586, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.1021087680355162, |
| "grad_norm": 1.1441452405187045, |
| "learning_rate": 9.082713545188189e-07, |
| "loss": 0.4505, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.1043285238623752, |
| "grad_norm": 1.2397036751106139, |
| "learning_rate": 9.078590785907859e-07, |
| "loss": 0.4654, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.106548279689234, |
| "grad_norm": 1.1197301651396006, |
| "learning_rate": 9.074451233310703e-07, |
| "loss": 0.4366, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1087680355160932, |
| "grad_norm": 1.7953863737465532, |
| "learning_rate": 9.070294784580499e-07, |
| "loss": 0.444, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1109877913429522, |
| "grad_norm": 1.2859250505007516, |
| "learning_rate": 9.066121336059985e-07, |
| "loss": 0.4709, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1132075471698113, |
| "grad_norm": 1.2037158956032694, |
| "learning_rate": 9.061930783242258e-07, |
| "loss": 0.4703, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1154273029966704, |
| "grad_norm": 1.172260705011405, |
| "learning_rate": 9.057723020762036e-07, |
| "loss": 0.4427, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 1.3027716139586167, |
| "learning_rate": 9.053497942386829e-07, |
| "loss": 0.4536, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.1198668146503885, |
| "grad_norm": 1.3774370449571356, |
| "learning_rate": 9.049255441008018e-07, |
| "loss": 0.4909, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.1220865704772476, |
| "grad_norm": 1.447300148943117, |
| "learning_rate": 9.044995408631772e-07, |
| "loss": 0.4549, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.1243063263041067, |
| "grad_norm": 1.2807259644533868, |
| "learning_rate": 9.040717736369911e-07, |
| "loss": 0.4341, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.1265260821309655, |
| "grad_norm": 1.1295120428776313, |
| "learning_rate": 9.036422314430613e-07, |
| "loss": 0.444, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1287458379578246, |
| "grad_norm": 3.7694331207240075, |
| "learning_rate": 9.032109032109031e-07, |
| "loss": 0.4533, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.1309655937846836, |
| "grad_norm": 1.2673358154101324, |
| "learning_rate": 9.027777777777778e-07, |
| "loss": 0.4547, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1331853496115427, |
| "grad_norm": 1.0849428645078705, |
| "learning_rate": 9.02342843887729e-07, |
| "loss": 0.448, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1354051054384018, |
| "grad_norm": 1.2751616613231416, |
| "learning_rate": 9.019060901906091e-07, |
| "loss": 0.4482, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1376248612652609, |
| "grad_norm": 1.5515951785162783, |
| "learning_rate": 9.014675052410901e-07, |
| "loss": 0.4327, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.13984461709212, |
| "grad_norm": 1.2001179078592417, |
| "learning_rate": 9.010270774976657e-07, |
| "loss": 0.4714, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.142064372918979, |
| "grad_norm": 1.2832027894003408, |
| "learning_rate": 9.005847953216374e-07, |
| "loss": 0.4276, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.1442841287458378, |
| "grad_norm": 1.390415895978781, |
| "learning_rate": 9.0014064697609e-07, |
| "loss": 0.474, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.146503884572697, |
| "grad_norm": 1.1340741814152506, |
| "learning_rate": 8.996946206248531e-07, |
| "loss": 0.4533, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.148723640399556, |
| "grad_norm": 1.1940733998984663, |
| "learning_rate": 8.9924670433145e-07, |
| "loss": 0.453, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.150943396226415, |
| "grad_norm": 1.2386850801704261, |
| "learning_rate": 8.987968860580326e-07, |
| "loss": 0.4656, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.1531631520532741, |
| "grad_norm": 1.2055848872454982, |
| "learning_rate": 8.983451536643025e-07, |
| "loss": 0.4472, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1553829078801332, |
| "grad_norm": 1.238704680658842, |
| "learning_rate": 8.978914949064202e-07, |
| "loss": 0.4566, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.1576026637069923, |
| "grad_norm": 1.66335973863715, |
| "learning_rate": 8.974358974358974e-07, |
| "loss": 0.4146, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1598224195338513, |
| "grad_norm": 1.86204877500857, |
| "learning_rate": 8.969783487984772e-07, |
| "loss": 0.4367, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1620421753607104, |
| "grad_norm": 1.3492062703447547, |
| "learning_rate": 8.965188364329994e-07, |
| "loss": 0.4767, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1642619311875695, |
| "grad_norm": 1.4884364595297717, |
| "learning_rate": 8.960573476702508e-07, |
| "loss": 0.4576, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.1664816870144283, |
| "grad_norm": 1.0881816727421652, |
| "learning_rate": 8.955938697318007e-07, |
| "loss": 0.4477, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.1687014428412874, |
| "grad_norm": 1.1883566141669124, |
| "learning_rate": 8.951283897288215e-07, |
| "loss": 0.4428, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.1709211986681465, |
| "grad_norm": 1.2612670034718356, |
| "learning_rate": 8.946608946608947e-07, |
| "loss": 0.4731, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.1731409544950056, |
| "grad_norm": 1.3074342572577153, |
| "learning_rate": 8.941913714147987e-07, |
| "loss": 0.4576, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.1753607103218646, |
| "grad_norm": 1.251759051462499, |
| "learning_rate": 8.93719806763285e-07, |
| "loss": 0.485, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.1775804661487237, |
| "grad_norm": 1.3767626029872162, |
| "learning_rate": 8.932461873638343e-07, |
| "loss": 0.4251, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.1798002219755828, |
| "grad_norm": 1.3350789369730232, |
| "learning_rate": 8.927704997573992e-07, |
| "loss": 0.4418, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.1820199778024416, |
| "grad_norm": 1.4193673394872068, |
| "learning_rate": 8.922927303671287e-07, |
| "loss": 0.441, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.1842397336293007, |
| "grad_norm": 1.1272492205345337, |
| "learning_rate": 8.918128654970759e-07, |
| "loss": 0.4512, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.1864594894561598, |
| "grad_norm": 1.3459032882268571, |
| "learning_rate": 8.913308913308914e-07, |
| "loss": 0.4599, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.1886792452830188, |
| "grad_norm": 1.5931812342777447, |
| "learning_rate": 8.908467939304943e-07, |
| "loss": 0.4704, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.190899001109878, |
| "grad_norm": 1.2904912837118774, |
| "learning_rate": 8.903605592347314e-07, |
| "loss": 0.4597, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.193118756936737, |
| "grad_norm": 1.2366314987596871, |
| "learning_rate": 8.898721730580136e-07, |
| "loss": 0.4479, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.195338512763596, |
| "grad_norm": 1.2669676274338157, |
| "learning_rate": 8.89381621088938e-07, |
| "loss": 0.4406, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.197558268590455, |
| "grad_norm": 1.5122726418580126, |
| "learning_rate": 8.888888888888888e-07, |
| "loss": 0.4588, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1997780244173142, |
| "grad_norm": 1.6734165075786887, |
| "learning_rate": 8.88393961890621e-07, |
| "loss": 0.4644, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2019977802441733, |
| "grad_norm": 1.1915259000682752, |
| "learning_rate": 8.878968253968254e-07, |
| "loss": 0.4432, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.204217536071032, |
| "grad_norm": 1.1243580352921179, |
| "learning_rate": 8.873974645786726e-07, |
| "loss": 0.4858, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.2064372918978912, |
| "grad_norm": 1.4122350275641105, |
| "learning_rate": 8.868958644743398e-07, |
| "loss": 0.4413, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2086570477247502, |
| "grad_norm": 1.1853078309680358, |
| "learning_rate": 8.863920099875155e-07, |
| "loss": 0.4656, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2108768035516093, |
| "grad_norm": 1.2556536339152258, |
| "learning_rate": 8.858858858858857e-07, |
| "loss": 0.4379, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.2130965593784684, |
| "grad_norm": 1.2397897495493284, |
| "learning_rate": 8.853774767995986e-07, |
| "loss": 0.4414, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2153163152053275, |
| "grad_norm": 1.1971508516589355, |
| "learning_rate": 8.848667672197082e-07, |
| "loss": 0.4527, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.2175360710321865, |
| "grad_norm": 1.2293665933088456, |
| "learning_rate": 8.843537414965987e-07, |
| "loss": 0.4831, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.2197558268590456, |
| "grad_norm": 1.2843242155663375, |
| "learning_rate": 8.838383838383837e-07, |
| "loss": 0.4388, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2219755826859044, |
| "grad_norm": 1.2647261857821326, |
| "learning_rate": 8.833206783092888e-07, |
| "loss": 0.4831, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.2241953385127635, |
| "grad_norm": 1.2640883445235913, |
| "learning_rate": 8.82800608828006e-07, |
| "loss": 0.4592, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2264150943396226, |
| "grad_norm": 1.226695277140237, |
| "learning_rate": 8.822781591660309e-07, |
| "loss": 0.4496, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.2286348501664817, |
| "grad_norm": 1.3665638844069508, |
| "learning_rate": 8.817533129459734e-07, |
| "loss": 0.4377, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2308546059933407, |
| "grad_norm": 1.1377261082753805, |
| "learning_rate": 8.812260536398466e-07, |
| "loss": 0.4619, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2330743618201998, |
| "grad_norm": 1.3404047561588681, |
| "learning_rate": 8.806963645673323e-07, |
| "loss": 0.4296, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 1.239224322237675, |
| "learning_rate": 8.801642288940208e-07, |
| "loss": 0.4483, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.237513873473918, |
| "grad_norm": 1.41317135673617, |
| "learning_rate": 8.796296296296296e-07, |
| "loss": 0.4726, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.239733629300777, |
| "grad_norm": 1.2246570480421979, |
| "learning_rate": 8.790925496261922e-07, |
| "loss": 0.4657, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2419533851276359, |
| "grad_norm": 1.145084471090032, |
| "learning_rate": 8.785529715762273e-07, |
| "loss": 0.4681, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.244173140954495, |
| "grad_norm": 1.1950811185929102, |
| "learning_rate": 8.780108780108779e-07, |
| "loss": 0.4571, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.246392896781354, |
| "grad_norm": 1.6802303305526853, |
| "learning_rate": 8.774662512980269e-07, |
| "loss": 0.5134, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.248612652608213, |
| "grad_norm": 1.399146976432964, |
| "learning_rate": 8.76919073640385e-07, |
| "loss": 0.4767, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.2508324084350722, |
| "grad_norm": 1.3028199076735263, |
| "learning_rate": 8.763693270735523e-07, |
| "loss": 0.4544, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2530521642619312, |
| "grad_norm": 1.219614197871295, |
| "learning_rate": 8.758169934640523e-07, |
| "loss": 0.4519, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2552719200887903, |
| "grad_norm": 1.5682463354398537, |
| "learning_rate": 8.752620545073375e-07, |
| "loss": 0.4523, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2574916759156491, |
| "grad_norm": 1.3495608997045074, |
| "learning_rate": 8.747044917257683e-07, |
| "loss": 0.4495, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.2597114317425082, |
| "grad_norm": 1.1434776149739643, |
| "learning_rate": 8.741442864665613e-07, |
| "loss": 0.4335, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2619311875693673, |
| "grad_norm": 1.2041213920739398, |
| "learning_rate": 8.735814198997095e-07, |
| "loss": 0.4563, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.2641509433962264, |
| "grad_norm": 1.5799823438116973, |
| "learning_rate": 8.730158730158729e-07, |
| "loss": 0.4615, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2663706992230854, |
| "grad_norm": 1.1717065129212314, |
| "learning_rate": 8.724476266242374e-07, |
| "loss": 0.4475, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.2685904550499445, |
| "grad_norm": 1.4247152366950546, |
| "learning_rate": 8.718766613503455e-07, |
| "loss": 0.4716, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.2708102108768036, |
| "grad_norm": 1.1615063544388773, |
| "learning_rate": 8.713029576338928e-07, |
| "loss": 0.4679, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.2730299667036626, |
| "grad_norm": 1.5053455438727286, |
| "learning_rate": 8.707264957264957e-07, |
| "loss": 0.4325, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.2752497225305217, |
| "grad_norm": 1.3431594762650616, |
| "learning_rate": 8.701472556894243e-07, |
| "loss": 0.4643, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.2774694783573808, |
| "grad_norm": 1.1121465911199522, |
| "learning_rate": 8.695652173913042e-07, |
| "loss": 0.4644, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.2796892341842399, |
| "grad_norm": 1.3638372065146298, |
| "learning_rate": 8.689803605057842e-07, |
| "loss": 0.4508, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.2819089900110987, |
| "grad_norm": 1.5304826167436472, |
| "learning_rate": 8.683926645091693e-07, |
| "loss": 0.4841, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.2841287458379578, |
| "grad_norm": 1.3770541470358233, |
| "learning_rate": 8.678021086780211e-07, |
| "loss": 0.4744, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.2863485016648168, |
| "grad_norm": 1.1966392163007855, |
| "learning_rate": 8.672086720867207e-07, |
| "loss": 0.464, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.288568257491676, |
| "grad_norm": 1.575072361063041, |
| "learning_rate": 8.666123336049986e-07, |
| "loss": 0.447, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.290788013318535, |
| "grad_norm": 1.4678136512766098, |
| "learning_rate": 8.660130718954247e-07, |
| "loss": 0.4647, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.293007769145394, |
| "grad_norm": 1.2838206658949551, |
| "learning_rate": 8.654108654108654e-07, |
| "loss": 0.4491, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.2952275249722531, |
| "grad_norm": 1.3185270999095682, |
| "learning_rate": 8.648056923918991e-07, |
| "loss": 0.4223, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.297447280799112, |
| "grad_norm": 1.1670301771260112, |
| "learning_rate": 8.641975308641973e-07, |
| "loss": 0.4498, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.299667036625971, |
| "grad_norm": 1.1945169622452185, |
| "learning_rate": 8.635863586358635e-07, |
| "loss": 0.4578, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.3018867924528301, |
| "grad_norm": 1.3353650702342224, |
| "learning_rate": 8.629721532947337e-07, |
| "loss": 0.4327, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3041065482796892, |
| "grad_norm": 1.2887658691742558, |
| "learning_rate": 8.623548922056385e-07, |
| "loss": 0.4506, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.3063263041065483, |
| "grad_norm": 1.3153941558260245, |
| "learning_rate": 8.617345525076198e-07, |
| "loss": 0.4619, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3085460599334073, |
| "grad_norm": 1.084957644832423, |
| "learning_rate": 8.611111111111111e-07, |
| "loss": 0.4689, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3107658157602664, |
| "grad_norm": 1.2958884686931098, |
| "learning_rate": 8.604845446950709e-07, |
| "loss": 0.4393, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.3129855715871255, |
| "grad_norm": 1.2704036344255825, |
| "learning_rate": 8.598548297040758e-07, |
| "loss": 0.4621, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3152053274139845, |
| "grad_norm": 1.181804955291271, |
| "learning_rate": 8.592219423453679e-07, |
| "loss": 0.4512, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.3174250832408436, |
| "grad_norm": 1.213923802037942, |
| "learning_rate": 8.585858585858585e-07, |
| "loss": 0.4351, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3196448390677027, |
| "grad_norm": 1.4427504658682893, |
| "learning_rate": 8.579465541490858e-07, |
| "loss": 0.4479, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.3218645948945615, |
| "grad_norm": 1.2101667184282197, |
| "learning_rate": 8.573040045121262e-07, |
| "loss": 0.4531, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.3240843507214206, |
| "grad_norm": 1.3091204432988863, |
| "learning_rate": 8.566581849024596e-07, |
| "loss": 0.4551, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.3263041065482797, |
| "grad_norm": 1.148635010119325, |
| "learning_rate": 8.560090702947845e-07, |
| "loss": 0.4581, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.3285238623751388, |
| "grad_norm": 1.3555133948699525, |
| "learning_rate": 8.553566354077861e-07, |
| "loss": 0.4502, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.3307436182019978, |
| "grad_norm": 1.206500098872861, |
| "learning_rate": 8.547008547008546e-07, |
| "loss": 0.4621, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.332963374028857, |
| "grad_norm": 1.4696355238727896, |
| "learning_rate": 8.540417023707511e-07, |
| "loss": 0.3981, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3351831298557157, |
| "grad_norm": 1.2754461892433568, |
| "learning_rate": 8.533791523482246e-07, |
| "loss": 0.433, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.3374028856825748, |
| "grad_norm": 1.1181521102343912, |
| "learning_rate": 8.527131782945737e-07, |
| "loss": 0.446, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3396226415094339, |
| "grad_norm": 1.2949604135230177, |
| "learning_rate": 8.520437535981577e-07, |
| "loss": 0.4226, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.341842397336293, |
| "grad_norm": 2.189494969676846, |
| "learning_rate": 8.513708513708513e-07, |
| "loss": 0.4601, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.344062153163152, |
| "grad_norm": 1.3067648560060685, |
| "learning_rate": 8.506944444444443e-07, |
| "loss": 0.484, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.346281908990011, |
| "grad_norm": 1.3900814826076342, |
| "learning_rate": 8.500145053669857e-07, |
| "loss": 0.4759, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3485016648168702, |
| "grad_norm": 1.1602214205697887, |
| "learning_rate": 8.493310063990691e-07, |
| "loss": 0.4253, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3507214206437292, |
| "grad_norm": 1.152221622632636, |
| "learning_rate": 8.486439195100612e-07, |
| "loss": 0.4041, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 2.0055544359824835, |
| "learning_rate": 8.479532163742691e-07, |
| "loss": 0.4666, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3551609322974474, |
| "grad_norm": 1.1841127652388874, |
| "learning_rate": 8.472588683670478e-07, |
| "loss": 0.4699, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3573806881243065, |
| "grad_norm": 1.2389603354426622, |
| "learning_rate": 8.465608465608465e-07, |
| "loss": 0.443, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3596004439511653, |
| "grad_norm": 1.1564359890832918, |
| "learning_rate": 8.458591217211907e-07, |
| "loss": 0.4429, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.3618201997780244, |
| "grad_norm": 1.41227328222579, |
| "learning_rate": 8.451536643026004e-07, |
| "loss": 0.4454, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.3640399556048834, |
| "grad_norm": 1.1079876664577466, |
| "learning_rate": 8.444444444444443e-07, |
| "loss": 0.4752, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.3662597114317425, |
| "grad_norm": 1.1899499269279072, |
| "learning_rate": 8.437314319667262e-07, |
| "loss": 0.4702, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.3684794672586016, |
| "grad_norm": 1.3226199727516845, |
| "learning_rate": 8.430145963658029e-07, |
| "loss": 0.4412, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.3706992230854607, |
| "grad_norm": 1.3142527110965414, |
| "learning_rate": 8.422939068100359e-07, |
| "loss": 0.4631, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.3729189789123195, |
| "grad_norm": 1.2703215823532485, |
| "learning_rate": 8.415693321353698e-07, |
| "loss": 0.4096, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.3751387347391786, |
| "grad_norm": 1.1283519440796594, |
| "learning_rate": 8.408408408408408e-07, |
| "loss": 0.4582, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3773584905660377, |
| "grad_norm": 1.1637932666394057, |
| "learning_rate": 8.401084010840107e-07, |
| "loss": 0.4424, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.3795782463928967, |
| "grad_norm": 1.3654727451004818, |
| "learning_rate": 8.393719806763283e-07, |
| "loss": 0.4493, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.3817980022197558, |
| "grad_norm": 1.6030535704205855, |
| "learning_rate": 8.386315470784134e-07, |
| "loss": 0.4474, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.3840177580466149, |
| "grad_norm": 11.646261061824008, |
| "learning_rate": 8.378870673952642e-07, |
| "loss": 0.4641, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.386237513873474, |
| "grad_norm": 1.6285236345285772, |
| "learning_rate": 8.371385083713851e-07, |
| "loss": 0.4587, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.388457269700333, |
| "grad_norm": 1.258299361572604, |
| "learning_rate": 8.363858363858362e-07, |
| "loss": 0.4583, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.390677025527192, |
| "grad_norm": 1.382346104349446, |
| "learning_rate": 8.356290174471993e-07, |
| "loss": 0.4608, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.3928967813540512, |
| "grad_norm": 1.6092979440429054, |
| "learning_rate": 8.348680171884591e-07, |
| "loss": 0.517, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.3951165371809102, |
| "grad_norm": 1.1511935463486243, |
| "learning_rate": 8.341028008618035e-07, |
| "loss": 0.4187, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.397336293007769, |
| "grad_norm": 1.4893180915287862, |
| "learning_rate": 8.333333333333332e-07, |
| "loss": 0.4465, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3995560488346281, |
| "grad_norm": 1.1654172290058302, |
| "learning_rate": 8.325595790776849e-07, |
| "loss": 0.4592, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4017758046614872, |
| "grad_norm": 1.2468188833825287, |
| "learning_rate": 8.317815021725636e-07, |
| "loss": 0.4525, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4039955604883463, |
| "grad_norm": 1.1758197640195867, |
| "learning_rate": 8.309990662931839e-07, |
| "loss": 0.4227, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.4062153163152054, |
| "grad_norm": 1.302048112682148, |
| "learning_rate": 8.302122347066167e-07, |
| "loss": 0.4494, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.4084350721420644, |
| "grad_norm": 1.3410878474631271, |
| "learning_rate": 8.294209702660407e-07, |
| "loss": 0.429, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.4106548279689235, |
| "grad_norm": 1.5784172471454891, |
| "learning_rate": 8.286252354048963e-07, |
| "loss": 0.4518, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4128745837957823, |
| "grad_norm": 1.3266078326407231, |
| "learning_rate": 8.27824992130941e-07, |
| "loss": 0.4561, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4150943396226414, |
| "grad_norm": 1.1730794847154962, |
| "learning_rate": 8.27020202020202e-07, |
| "loss": 0.4498, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.4173140954495005, |
| "grad_norm": 1.4070270520482746, |
| "learning_rate": 8.262108262108262e-07, |
| "loss": 0.4474, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4195338512763596, |
| "grad_norm": 1.213825849940393, |
| "learning_rate": 8.253968253968254e-07, |
| "loss": 0.4538, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4217536071032186, |
| "grad_norm": 1.1622227713378128, |
| "learning_rate": 8.245781598217128e-07, |
| "loss": 0.4265, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.4239733629300777, |
| "grad_norm": 1.3722638535641742, |
| "learning_rate": 8.237547892720306e-07, |
| "loss": 0.4232, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4261931187569368, |
| "grad_norm": 2.166606954999253, |
| "learning_rate": 8.229266730707652e-07, |
| "loss": 0.4542, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.4284128745837958, |
| "grad_norm": 1.1966806088779693, |
| "learning_rate": 8.220937700706485e-07, |
| "loss": 0.4427, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.430632630410655, |
| "grad_norm": 1.2876269724582017, |
| "learning_rate": 8.212560386473431e-07, |
| "loss": 0.451, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.432852386237514, |
| "grad_norm": 1.2565208495830507, |
| "learning_rate": 8.204134366925064e-07, |
| "loss": 0.4819, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.435072142064373, |
| "grad_norm": 1.1280465608470205, |
| "learning_rate": 8.19565921606738e-07, |
| "loss": 0.4408, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.437291897891232, |
| "grad_norm": 1.1454315806648652, |
| "learning_rate": 8.187134502923975e-07, |
| "loss": 0.4647, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.439511653718091, |
| "grad_norm": 1.1995949528833032, |
| "learning_rate": 8.178559791463017e-07, |
| "loss": 0.4449, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.44173140954495, |
| "grad_norm": 1.213107151128081, |
| "learning_rate": 8.169934640522875e-07, |
| "loss": 0.4423, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4439511653718091, |
| "grad_norm": 1.1669562141188647, |
| "learning_rate": 8.161258603736478e-07, |
| "loss": 0.4564, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.4461709211986682, |
| "grad_norm": 1.2542020168248378, |
| "learning_rate": 8.152531229454307e-07, |
| "loss": 0.4807, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.4483906770255273, |
| "grad_norm": 1.2337597702311685, |
| "learning_rate": 8.143752060666006e-07, |
| "loss": 0.4578, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4506104328523861, |
| "grad_norm": 1.1163506747553094, |
| "learning_rate": 8.134920634920636e-07, |
| "loss": 0.4507, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4528301886792452, |
| "grad_norm": 1.2061276153808267, |
| "learning_rate": 8.126036484245439e-07, |
| "loss": 0.4692, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.4550499445061043, |
| "grad_norm": 1.1005759181356636, |
| "learning_rate": 8.117099135063206e-07, |
| "loss": 0.4205, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4572697003329633, |
| "grad_norm": 1.1848316729798107, |
| "learning_rate": 8.108108108108107e-07, |
| "loss": 0.441, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.4594894561598224, |
| "grad_norm": 1.19691331615668, |
| "learning_rate": 8.099062918340026e-07, |
| "loss": 0.4295, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4617092119866815, |
| "grad_norm": 1.469767362231428, |
| "learning_rate": 8.089963074857335e-07, |
| "loss": 0.4715, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.4639289678135405, |
| "grad_norm": 1.3533624301127642, |
| "learning_rate": 8.080808080808079e-07, |
| "loss": 0.4351, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.4661487236403996, |
| "grad_norm": 1.2855778063785157, |
| "learning_rate": 8.071597433299561e-07, |
| "loss": 0.4668, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.4683684794672587, |
| "grad_norm": 1.2831187882443813, |
| "learning_rate": 8.062330623306233e-07, |
| "loss": 0.4395, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 1.2080799785694776, |
| "learning_rate": 8.053007135575944e-07, |
| "loss": 0.4555, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.4728079911209768, |
| "grad_norm": 1.5842263462761013, |
| "learning_rate": 8.043626448534423e-07, |
| "loss": 0.4186, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.4750277469478357, |
| "grad_norm": 1.5084192437587065, |
| "learning_rate": 8.034188034188033e-07, |
| "loss": 0.4542, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.4772475027746947, |
| "grad_norm": 1.2075109855494262, |
| "learning_rate": 8.02469135802469e-07, |
| "loss": 0.4295, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.4794672586015538, |
| "grad_norm": 1.3944924708355404, |
| "learning_rate": 8.015135878912968e-07, |
| "loss": 0.4511, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.4816870144284129, |
| "grad_norm": 1.1322175488542188, |
| "learning_rate": 8.005521048999311e-07, |
| "loss": 0.4442, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.483906770255272, |
| "grad_norm": 1.196489191309896, |
| "learning_rate": 7.995846313603322e-07, |
| "loss": 0.4798, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.486126526082131, |
| "grad_norm": 1.2141015327602915, |
| "learning_rate": 7.986111111111112e-07, |
| "loss": 0.4623, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.4883462819089899, |
| "grad_norm": 1.1570484813036512, |
| "learning_rate": 7.976314872866596e-07, |
| "loss": 0.4358, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.490566037735849, |
| "grad_norm": 1.1307507968914452, |
| "learning_rate": 7.966457023060795e-07, |
| "loss": 0.4929, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.492785793562708, |
| "grad_norm": 1.3352988179015075, |
| "learning_rate": 7.956536978618998e-07, |
| "loss": 0.4622, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.495005549389567, |
| "grad_norm": 1.2820634824407855, |
| "learning_rate": 7.946554149085794e-07, |
| "loss": 0.4454, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.4972253052164262, |
| "grad_norm": 1.1826692037429019, |
| "learning_rate": 7.936507936507937e-07, |
| "loss": 0.4604, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.4994450610432852, |
| "grad_norm": 1.3318875370925376, |
| "learning_rate": 7.926397735314932e-07, |
| "loss": 0.4625, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.5016648168701443, |
| "grad_norm": 1.2519631673745286, |
| "learning_rate": 7.916222932197372e-07, |
| "loss": 0.4385, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.5038845726970034, |
| "grad_norm": 1.2914098984220168, |
| "learning_rate": 7.905982905982905e-07, |
| "loss": 0.4353, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5061043285238624, |
| "grad_norm": 1.1899729635623018, |
| "learning_rate": 7.895677027509823e-07, |
| "loss": 0.443, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.5083240843507215, |
| "grad_norm": 1.190444535694286, |
| "learning_rate": 7.885304659498207e-07, |
| "loss": 0.4614, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5105438401775806, |
| "grad_norm": 1.1706400844430933, |
| "learning_rate": 7.874865156418553e-07, |
| "loss": 0.4331, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.5127635960044397, |
| "grad_norm": 1.4812029130894295, |
| "learning_rate": 7.864357864357864e-07, |
| "loss": 0.458, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5149833518312985, |
| "grad_norm": 1.5221542759514808, |
| "learning_rate": 7.853782120883096e-07, |
| "loss": 0.4573, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.5172031076581576, |
| "grad_norm": 1.178963451844953, |
| "learning_rate": 7.843137254901962e-07, |
| "loss": 0.4401, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5194228634850167, |
| "grad_norm": 1.1351996419308124, |
| "learning_rate": 7.832422586520947e-07, |
| "loss": 0.4916, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.5216426193118757, |
| "grad_norm": 1.09628101612322, |
| "learning_rate": 7.821637426900585e-07, |
| "loss": 0.4332, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5238623751387348, |
| "grad_norm": 1.213021644918351, |
| "learning_rate": 7.81078107810781e-07, |
| "loss": 0.4498, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.5260821309655936, |
| "grad_norm": 1.1545827521479424, |
| "learning_rate": 7.799852832965415e-07, |
| "loss": 0.4458, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5283018867924527, |
| "grad_norm": 1.4669602766610934, |
| "learning_rate": 7.788851974898486e-07, |
| "loss": 0.453, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.5305216426193118, |
| "grad_norm": 1.2805205310095251, |
| "learning_rate": 7.777777777777777e-07, |
| "loss": 0.4475, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.5327413984461709, |
| "grad_norm": 1.356427768134019, |
| "learning_rate": 7.76662950575994e-07, |
| "loss": 0.4363, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.53496115427303, |
| "grad_norm": 2.519051772383195, |
| "learning_rate": 7.755406413124534e-07, |
| "loss": 0.4676, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.537180910099889, |
| "grad_norm": 1.4450670338822147, |
| "learning_rate": 7.744107744107744e-07, |
| "loss": 0.452, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.539400665926748, |
| "grad_norm": 1.2099248717480078, |
| "learning_rate": 7.732732732732732e-07, |
| "loss": 0.4321, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.5416204217536071, |
| "grad_norm": 1.090590842333066, |
| "learning_rate": 7.721280602636534e-07, |
| "loss": 0.4705, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.5438401775804662, |
| "grad_norm": 1.3443127837215874, |
| "learning_rate": 7.709750566893423e-07, |
| "loss": 0.4407, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5460599334073253, |
| "grad_norm": 1.1806195813187879, |
| "learning_rate": 7.698141827834659e-07, |
| "loss": 0.4495, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5482796892341844, |
| "grad_norm": 1.3811319584265567, |
| "learning_rate": 7.686453576864536e-07, |
| "loss": 0.4396, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5504994450610434, |
| "grad_norm": 1.6274469192909502, |
| "learning_rate": 7.674684994272622e-07, |
| "loss": 0.4607, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5527192008879025, |
| "grad_norm": 1.1151313287955884, |
| "learning_rate": 7.662835249042146e-07, |
| "loss": 0.4654, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5549389567147613, |
| "grad_norm": 1.107422673857088, |
| "learning_rate": 7.650903498654364e-07, |
| "loss": 0.4654, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.5571587125416204, |
| "grad_norm": 3.3935262710809413, |
| "learning_rate": 7.638888888888888e-07, |
| "loss": 0.4136, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.5593784683684795, |
| "grad_norm": 1.3044030455695086, |
| "learning_rate": 7.626790553619821e-07, |
| "loss": 0.4806, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.5615982241953386, |
| "grad_norm": 1.6105121644457865, |
| "learning_rate": 7.614607614607614e-07, |
| "loss": 0.458, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.5638179800221974, |
| "grad_norm": 1.1574126639573554, |
| "learning_rate": 7.602339181286549e-07, |
| "loss": 0.4326, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.5660377358490565, |
| "grad_norm": 2.166517215956068, |
| "learning_rate": 7.589984350547731e-07, |
| "loss": 0.4513, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.5682574916759155, |
| "grad_norm": 1.264720713924714, |
| "learning_rate": 7.577542206517472e-07, |
| "loss": 0.4527, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.5704772475027746, |
| "grad_norm": 1.1937319067279388, |
| "learning_rate": 7.565011820330969e-07, |
| "loss": 0.4309, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.5726970033296337, |
| "grad_norm": 1.3157411175887248, |
| "learning_rate": 7.552392249901146e-07, |
| "loss": 0.4419, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.5749167591564928, |
| "grad_norm": 1.6021534077017254, |
| "learning_rate": 7.539682539682539e-07, |
| "loss": 0.4693, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.5771365149833518, |
| "grad_norm": 1.3815775452863337, |
| "learning_rate": 7.526881720430106e-07, |
| "loss": 0.4225, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.579356270810211, |
| "grad_norm": 1.1669990156242764, |
| "learning_rate": 7.513988808952837e-07, |
| "loss": 0.4427, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.58157602663707, |
| "grad_norm": 1.336716126017333, |
| "learning_rate": 7.501002807862012e-07, |
| "loss": 0.4764, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.583795782463929, |
| "grad_norm": 1.1491190746227358, |
| "learning_rate": 7.48792270531401e-07, |
| "loss": 0.4724, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.5860155382907881, |
| "grad_norm": 1.4714988469601522, |
| "learning_rate": 7.474747474747474e-07, |
| "loss": 0.4381, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 1.330675570974309, |
| "learning_rate": 7.46147607461476e-07, |
| "loss": 0.4319, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.5904550499445063, |
| "grad_norm": 1.1325025765131167, |
| "learning_rate": 7.448107448107446e-07, |
| "loss": 0.4531, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.592674805771365, |
| "grad_norm": 1.324548351381287, |
| "learning_rate": 7.434640522875816e-07, |
| "loss": 0.4659, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.5948945615982242, |
| "grad_norm": 1.3011201860764343, |
| "learning_rate": 7.421074210742108e-07, |
| "loss": 0.4439, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.5971143174250833, |
| "grad_norm": 1.6130819582835183, |
| "learning_rate": 7.407407407407406e-07, |
| "loss": 0.4421, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5993340732519423, |
| "grad_norm": 1.5662724419530687, |
| "learning_rate": 7.393638992152003e-07, |
| "loss": 0.4602, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6015538290788012, |
| "grad_norm": 1.1942408444514543, |
| "learning_rate": 7.37976782752902e-07, |
| "loss": 0.4527, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6037735849056602, |
| "grad_norm": 1.2859091953871118, |
| "learning_rate": 7.365792759051186e-07, |
| "loss": 0.4335, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6059933407325193, |
| "grad_norm": 1.3567371753220179, |
| "learning_rate": 7.351712614870509e-07, |
| "loss": 0.4482, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.6082130965593784, |
| "grad_norm": 1.227995279262155, |
| "learning_rate": 7.337526205450733e-07, |
| "loss": 0.435, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.6104328523862375, |
| "grad_norm": 1.894681494953077, |
| "learning_rate": 7.323232323232324e-07, |
| "loss": 0.4, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6126526082130965, |
| "grad_norm": 1.7711751997207135, |
| "learning_rate": 7.308829742289818e-07, |
| "loss": 0.4532, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.6148723640399556, |
| "grad_norm": 1.2189223180486914, |
| "learning_rate": 7.294317217981341e-07, |
| "loss": 0.4361, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.6170921198668147, |
| "grad_norm": 1.221687708753864, |
| "learning_rate": 7.279693486590037e-07, |
| "loss": 0.4503, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.6193118756936737, |
| "grad_norm": 1.4298000334341903, |
| "learning_rate": 7.264957264957265e-07, |
| "loss": 0.4704, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6215316315205328, |
| "grad_norm": 1.435956678641849, |
| "learning_rate": 7.250107250107249e-07, |
| "loss": 0.4317, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.6237513873473919, |
| "grad_norm": 1.1874120488254374, |
| "learning_rate": 7.235142118863048e-07, |
| "loss": 0.4452, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.625971143174251, |
| "grad_norm": 1.2271468200041469, |
| "learning_rate": 7.220060527453523e-07, |
| "loss": 0.4732, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.62819089900111, |
| "grad_norm": 1.256323602386584, |
| "learning_rate": 7.204861111111112e-07, |
| "loss": 0.4229, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6304106548279689, |
| "grad_norm": 1.2589209693187742, |
| "learning_rate": 7.189542483660131e-07, |
| "loss": 0.4629, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.632630410654828, |
| "grad_norm": 1.1321916007978945, |
| "learning_rate": 7.174103237095362e-07, |
| "loss": 0.4413, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.634850166481687, |
| "grad_norm": 1.2308758075551893, |
| "learning_rate": 7.158541941150637e-07, |
| "loss": 0.4484, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.637069922308546, |
| "grad_norm": 1.4080143965250038, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.4315, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.6392896781354052, |
| "grad_norm": 1.1972733836836713, |
| "learning_rate": 7.12704736609119e-07, |
| "loss": 0.4617, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.641509433962264, |
| "grad_norm": 1.444178694974046, |
| "learning_rate": 7.11111111111111e-07, |
| "loss": 0.4308, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.643729189789123, |
| "grad_norm": 1.9630648369634558, |
| "learning_rate": 7.095046854082998e-07, |
| "loss": 0.4358, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6459489456159822, |
| "grad_norm": 1.0478295513396192, |
| "learning_rate": 7.078853046594981e-07, |
| "loss": 0.4354, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.6481687014428412, |
| "grad_norm": 1.5259200054970437, |
| "learning_rate": 7.062528115159693e-07, |
| "loss": 0.4542, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.6503884572697003, |
| "grad_norm": 1.3033791571298337, |
| "learning_rate": 7.046070460704607e-07, |
| "loss": 0.4604, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.6526082130965594, |
| "grad_norm": 1.2686542276794928, |
| "learning_rate": 7.029478458049886e-07, |
| "loss": 0.4468, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.6548279689234184, |
| "grad_norm": 1.389909299627517, |
| "learning_rate": 7.012750455373405e-07, |
| "loss": 0.4311, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.6570477247502775, |
| "grad_norm": 1.2863040122952574, |
| "learning_rate": 6.99588477366255e-07, |
| "loss": 0.4397, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.6592674805771366, |
| "grad_norm": 1.5830202579995283, |
| "learning_rate": 6.978879706152433e-07, |
| "loss": 0.4405, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.6614872364039956, |
| "grad_norm": 1.5295124154011601, |
| "learning_rate": 6.961733517750114e-07, |
| "loss": 0.461, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.6637069922308547, |
| "grad_norm": 1.2908712828825855, |
| "learning_rate": 6.944444444444444e-07, |
| "loss": 0.4427, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6659267480577138, |
| "grad_norm": 1.3248070106181145, |
| "learning_rate": 6.92701069270107e-07, |
| "loss": 0.4571, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.6681465038845729, |
| "grad_norm": 1.9717448900035122, |
| "learning_rate": 6.909430438842202e-07, |
| "loss": 0.454, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.6703662597114317, |
| "grad_norm": 1.1474725389983733, |
| "learning_rate": 6.891701828410688e-07, |
| "loss": 0.4518, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.6725860155382908, |
| "grad_norm": 1.1422547185949583, |
| "learning_rate": 6.87382297551789e-07, |
| "loss": 0.4319, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.6748057713651499, |
| "grad_norm": 1.3008377268645959, |
| "learning_rate": 6.855791962174942e-07, |
| "loss": 0.4496, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.677025527192009, |
| "grad_norm": 2.5374443382266274, |
| "learning_rate": 6.837606837606838e-07, |
| "loss": 0.4627, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.6792452830188678, |
| "grad_norm": 1.3126351528616036, |
| "learning_rate": 6.819265617548878e-07, |
| "loss": 0.474, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.6814650388457268, |
| "grad_norm": 1.1532261673764852, |
| "learning_rate": 6.800766283524904e-07, |
| "loss": 0.4146, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.683684794672586, |
| "grad_norm": 1.2578942626083018, |
| "learning_rate": 6.782106782106782e-07, |
| "loss": 0.4427, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.685904550499445, |
| "grad_norm": 1.171921369101573, |
| "learning_rate": 6.763285024154589e-07, |
| "loss": 0.4545, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.688124306326304, |
| "grad_norm": 1.336852590496218, |
| "learning_rate": 6.744298884036874e-07, |
| "loss": 0.4391, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.6903440621531631, |
| "grad_norm": 1.567111800099075, |
| "learning_rate": 6.725146198830411e-07, |
| "loss": 0.4275, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.6925638179800222, |
| "grad_norm": 1.232121475002925, |
| "learning_rate": 6.705824767498777e-07, |
| "loss": 0.451, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.6947835738068813, |
| "grad_norm": 1.588473784999118, |
| "learning_rate": 6.686332350049164e-07, |
| "loss": 0.4423, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.6970033296337403, |
| "grad_norm": 1.1992763714793029, |
| "learning_rate": 6.666666666666666e-07, |
| "loss": 0.4518, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.6992230854605994, |
| "grad_norm": 1.2848849550190784, |
| "learning_rate": 6.646825396825396e-07, |
| "loss": 0.4525, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.7014428412874585, |
| "grad_norm": 1.1951992270325973, |
| "learning_rate": 6.626806178375684e-07, |
| "loss": 0.4299, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.7036625971143176, |
| "grad_norm": 1.376140764505536, |
| "learning_rate": 6.606606606606606e-07, |
| "loss": 0.4398, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 1.2201459082778296, |
| "learning_rate": 6.586224233283057e-07, |
| "loss": 0.4319, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.7081021087680355, |
| "grad_norm": 1.3935864054018974, |
| "learning_rate": 6.565656565656566e-07, |
| "loss": 0.4729, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7103218645948945, |
| "grad_norm": 1.5233429855618519, |
| "learning_rate": 6.54490106544901e-07, |
| "loss": 0.4468, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.7125416204217536, |
| "grad_norm": 1.2950847008110937, |
| "learning_rate": 6.523955147808359e-07, |
| "loss": 0.4488, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7147613762486127, |
| "grad_norm": 1.1452116937420966, |
| "learning_rate": 6.502816180235534e-07, |
| "loss": 0.4654, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.7169811320754715, |
| "grad_norm": 1.595146736998358, |
| "learning_rate": 6.481481481481481e-07, |
| "loss": 0.4497, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.7192008879023306, |
| "grad_norm": 1.3087928114755394, |
| "learning_rate": 6.459948320413435e-07, |
| "loss": 0.4484, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.7214206437291897, |
| "grad_norm": 1.8195299066925699, |
| "learning_rate": 6.438213914849428e-07, |
| "loss": 0.4467, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7236403995560488, |
| "grad_norm": 1.0905833294557823, |
| "learning_rate": 6.416275430359938e-07, |
| "loss": 0.4479, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.7258601553829078, |
| "grad_norm": 1.3285142739698166, |
| "learning_rate": 6.394129979035639e-07, |
| "loss": 0.4532, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.728079911209767, |
| "grad_norm": 1.263445398115666, |
| "learning_rate": 6.371774618220115e-07, |
| "loss": 0.4311, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.730299667036626, |
| "grad_norm": 1.2264280529869596, |
| "learning_rate": 6.349206349206349e-07, |
| "loss": 0.4214, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.732519422863485, |
| "grad_norm": 1.369934602758291, |
| "learning_rate": 6.326422115895799e-07, |
| "loss": 0.4458, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.734739178690344, |
| "grad_norm": 1.5901424052124737, |
| "learning_rate": 6.303418803418803e-07, |
| "loss": 0.4239, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7369589345172032, |
| "grad_norm": 1.580914386725684, |
| "learning_rate": 6.280193236714975e-07, |
| "loss": 0.433, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7391786903440623, |
| "grad_norm": 1.9007391003117418, |
| "learning_rate": 6.256742179072276e-07, |
| "loss": 0.4532, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.7413984461709213, |
| "grad_norm": 1.307226977166083, |
| "learning_rate": 6.233062330623306e-07, |
| "loss": 0.4782, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.7436182019977804, |
| "grad_norm": 1.2581634341891148, |
| "learning_rate": 6.209150326797385e-07, |
| "loss": 0.4772, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.7458379578246392, |
| "grad_norm": 1.3123852294397897, |
| "learning_rate": 6.185002736726874e-07, |
| "loss": 0.4502, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7480577136514983, |
| "grad_norm": 1.315432695618896, |
| "learning_rate": 6.16061606160616e-07, |
| "loss": 0.4793, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.7502774694783574, |
| "grad_norm": 1.3331739427739435, |
| "learning_rate": 6.135986733001658e-07, |
| "loss": 0.4351, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.7524972253052165, |
| "grad_norm": 1.1750674279086586, |
| "learning_rate": 6.111111111111112e-07, |
| "loss": 0.4221, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.7547169811320755, |
| "grad_norm": 1.154791123197388, |
| "learning_rate": 6.085985482970407e-07, |
| "loss": 0.4308, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.7569367369589344, |
| "grad_norm": 1.3127678091741015, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 0.4453, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.7591564927857934, |
| "grad_norm": 1.2866260542006003, |
| "learning_rate": 6.034968979131415e-07, |
| "loss": 0.4532, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.7613762486126525, |
| "grad_norm": 1.2476359497788672, |
| "learning_rate": 6.009070294784579e-07, |
| "loss": 0.4429, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.7635960044395116, |
| "grad_norm": 1.1932146525046583, |
| "learning_rate": 5.982905982905982e-07, |
| "loss": 0.4782, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.7658157602663707, |
| "grad_norm": 1.2447276146507429, |
| "learning_rate": 5.956471935853378e-07, |
| "loss": 0.4509, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.7680355160932297, |
| "grad_norm": 1.154358093852454, |
| "learning_rate": 5.929763960852043e-07, |
| "loss": 0.4278, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.7702552719200888, |
| "grad_norm": 1.1339930293132137, |
| "learning_rate": 5.902777777777778e-07, |
| "loss": 0.4396, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.7724750277469479, |
| "grad_norm": 1.353145494940799, |
| "learning_rate": 5.875509016870273e-07, |
| "loss": 0.4506, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.774694783573807, |
| "grad_norm": 1.3269883420055035, |
| "learning_rate": 5.847953216374269e-07, |
| "loss": 0.4377, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.776914539400666, |
| "grad_norm": 1.7657131677608475, |
| "learning_rate": 5.820105820105819e-07, |
| "loss": 0.4807, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.779134295227525, |
| "grad_norm": 1.2762956654212174, |
| "learning_rate": 5.791962174940897e-07, |
| "loss": 0.4496, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.7813540510543842, |
| "grad_norm": 1.447030988390941, |
| "learning_rate": 5.763517528223411e-07, |
| "loss": 0.4488, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.7835738068812432, |
| "grad_norm": 1.4386048928316586, |
| "learning_rate": 5.734767025089605e-07, |
| "loss": 0.4639, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.785793562708102, |
| "grad_norm": 1.2290691126902766, |
| "learning_rate": 5.705705705705706e-07, |
| "loss": 0.501, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.7880133185349611, |
| "grad_norm": 1.1130486023477824, |
| "learning_rate": 5.67632850241546e-07, |
| "loss": 0.4492, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.7902330743618202, |
| "grad_norm": 1.27395889325988, |
| "learning_rate": 5.646630236794171e-07, |
| "loss": 0.4267, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.7924528301886793, |
| "grad_norm": 1.081240362636203, |
| "learning_rate": 5.616605616605615e-07, |
| "loss": 0.4457, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.7946725860155381, |
| "grad_norm": 1.2001786727599388, |
| "learning_rate": 5.586249232658072e-07, |
| "loss": 0.4244, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.7968923418423972, |
| "grad_norm": 1.2942776238812326, |
| "learning_rate": 5.555555555555554e-07, |
| "loss": 0.4396, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.7991120976692563, |
| "grad_norm": 1.3095924522448517, |
| "learning_rate": 5.524518932340161e-07, |
| "loss": 0.4394, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.8013318534961154, |
| "grad_norm": 1.1837983669154903, |
| "learning_rate": 5.493133583021223e-07, |
| "loss": 0.4739, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.8035516093229744, |
| "grad_norm": 1.3161140920438619, |
| "learning_rate": 5.461393596986818e-07, |
| "loss": 0.4466, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.8057713651498335, |
| "grad_norm": 1.4519854795085234, |
| "learning_rate": 5.42929292929293e-07, |
| "loss": 0.4476, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8079911209766926, |
| "grad_norm": 1.1806490538563992, |
| "learning_rate": 5.396825396825396e-07, |
| "loss": 0.4523, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8102108768035516, |
| "grad_norm": 1.2972477037538859, |
| "learning_rate": 5.363984674329501e-07, |
| "loss": 0.4516, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.8124306326304107, |
| "grad_norm": 1.2156981658718196, |
| "learning_rate": 5.330764290301862e-07, |
| "loss": 0.4682, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8146503884572698, |
| "grad_norm": 1.3383256730414803, |
| "learning_rate": 5.297157622739017e-07, |
| "loss": 0.4441, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.8168701442841289, |
| "grad_norm": 1.1561855543216444, |
| "learning_rate": 5.263157894736842e-07, |
| "loss": 0.4385, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.819089900110988, |
| "grad_norm": 1.3166365086952017, |
| "learning_rate": 5.22875816993464e-07, |
| "loss": 0.4412, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.821309655937847, |
| "grad_norm": 1.3947882182298343, |
| "learning_rate": 5.193951347797501e-07, |
| "loss": 0.4431, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 1.2940515335047955, |
| "learning_rate": 5.158730158730157e-07, |
| "loss": 0.4718, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.825749167591565, |
| "grad_norm": 1.6597858983745644, |
| "learning_rate": 5.123087159015301e-07, |
| "loss": 0.4494, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.827968923418424, |
| "grad_norm": 1.3713070534311818, |
| "learning_rate": 5.087014725568942e-07, |
| "loss": 0.458, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.830188679245283, |
| "grad_norm": 1.3214602635726282, |
| "learning_rate": 5.050505050505049e-07, |
| "loss": 0.4208, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.832408435072142, |
| "grad_norm": 1.19733028360872, |
| "learning_rate": 5.013550135501355e-07, |
| "loss": 0.4495, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.834628190899001, |
| "grad_norm": 1.1839632614546902, |
| "learning_rate": 4.976141785957736e-07, |
| "loss": 0.4593, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.83684794672586, |
| "grad_norm": 1.3407995957608345, |
| "learning_rate": 4.938271604938272e-07, |
| "loss": 0.4534, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.8390677025527191, |
| "grad_norm": 1.1447226640936632, |
| "learning_rate": 4.899930986887508e-07, |
| "loss": 0.4243, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8412874583795782, |
| "grad_norm": 1.1716186165798743, |
| "learning_rate": 4.86111111111111e-07, |
| "loss": 0.4254, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8435072142064373, |
| "grad_norm": 1.4155964443562337, |
| "learning_rate": 4.821802935010482e-07, |
| "loss": 0.4827, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.8457269700332963, |
| "grad_norm": 1.0602142782272528, |
| "learning_rate": 4.781997187060478e-07, |
| "loss": 0.4235, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.8479467258601554, |
| "grad_norm": 2.6338388785846347, |
| "learning_rate": 4.741684359518754e-07, |
| "loss": 0.4381, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.8501664816870145, |
| "grad_norm": 1.2197211361897908, |
| "learning_rate": 4.7008547008547005e-07, |
| "loss": 0.4388, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.8523862375138735, |
| "grad_norm": 1.7745318375917518, |
| "learning_rate": 4.659498207885304e-07, |
| "loss": 0.4871, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.8546059933407326, |
| "grad_norm": 1.721485464100693, |
| "learning_rate": 4.6176046176046174e-07, |
| "loss": 0.4741, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.8568257491675917, |
| "grad_norm": 1.1767003327204806, |
| "learning_rate": 4.57516339869281e-07, |
| "loss": 0.4103, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.8590455049944508, |
| "grad_norm": 1.6104783471864577, |
| "learning_rate": 4.532163742690058e-07, |
| "loss": 0.4706, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.8612652608213096, |
| "grad_norm": 1.237482652598919, |
| "learning_rate": 4.48859455481972e-07, |
| "loss": 0.4352, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.8634850166481687, |
| "grad_norm": 1.5155898726016581, |
| "learning_rate": 4.4444444444444433e-07, |
| "loss": 0.4588, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.8657047724750278, |
| "grad_norm": 2.617740589380866, |
| "learning_rate": 4.399701715137956e-07, |
| "loss": 0.4668, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.8679245283018868, |
| "grad_norm": 1.1020239968511378, |
| "learning_rate": 4.3543543543543544e-07, |
| "loss": 0.4349, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.870144284128746, |
| "grad_norm": 1.35587074648194, |
| "learning_rate": 4.308390022675737e-07, |
| "loss": 0.4259, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.8723640399556047, |
| "grad_norm": 1.1609067209173856, |
| "learning_rate": 4.26179604261796e-07, |
| "loss": 0.4586, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.8745837957824638, |
| "grad_norm": 1.1568537405414596, |
| "learning_rate": 4.214559386973179e-07, |
| "loss": 0.4667, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.8768035516093229, |
| "grad_norm": 1.1053999396633238, |
| "learning_rate": 4.166666666666666e-07, |
| "loss": 0.4447, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.879023307436182, |
| "grad_norm": 1.1469720416952864, |
| "learning_rate": 4.118104118104118e-07, |
| "loss": 0.4105, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.881243063263041, |
| "grad_norm": 1.4950241181155033, |
| "learning_rate": 4.0688575899843503e-07, |
| "loss": 0.4523, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.8834628190899, |
| "grad_norm": 1.783385866887097, |
| "learning_rate": 4.0189125295508264e-07, |
| "loss": 0.473, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.8856825749167592, |
| "grad_norm": 1.3445672475572865, |
| "learning_rate": 3.968253968253968e-07, |
| "loss": 0.4438, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.8879023307436182, |
| "grad_norm": 1.2800024731322417, |
| "learning_rate": 3.9168665067945643e-07, |
| "loss": 0.4613, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.8901220865704773, |
| "grad_norm": 1.1359156205032364, |
| "learning_rate": 3.864734299516908e-07, |
| "loss": 0.4135, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.8923418423973364, |
| "grad_norm": 1.1663061656581661, |
| "learning_rate": 3.81184103811841e-07, |
| "loss": 0.4661, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.8945615982241955, |
| "grad_norm": 1.079911454850862, |
| "learning_rate": 3.758169934640523e-07, |
| "loss": 0.3968, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.8967813540510545, |
| "grad_norm": 1.1354316719905342, |
| "learning_rate": 3.703703703703703e-07, |
| "loss": 0.4685, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.8990011098779136, |
| "grad_norm": 1.2597607085859803, |
| "learning_rate": 3.6484245439469314e-07, |
| "loss": 0.4482, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.9012208657047724, |
| "grad_norm": 1.2607408697644482, |
| "learning_rate": 3.592314118629908e-07, |
| "loss": 0.4292, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9034406215316315, |
| "grad_norm": 1.3646381579934892, |
| "learning_rate": 3.535353535353535e-07, |
| "loss": 0.4717, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9056603773584906, |
| "grad_norm": 1.1311629124430989, |
| "learning_rate": 3.477523324851569e-07, |
| "loss": 0.4686, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9078801331853497, |
| "grad_norm": 1.3891775849805408, |
| "learning_rate": 3.4188034188034184e-07, |
| "loss": 0.4417, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9100998890122085, |
| "grad_norm": 1.2686903993236105, |
| "learning_rate": 3.359173126614987e-07, |
| "loss": 0.4435, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.9123196448390676, |
| "grad_norm": 1.23697755590465, |
| "learning_rate": 3.298611111111111e-07, |
| "loss": 0.444, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.9145394006659266, |
| "grad_norm": 1.1646889799724214, |
| "learning_rate": 3.237095363079614e-07, |
| "loss": 0.4461, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9167591564927857, |
| "grad_norm": 1.1690789125536099, |
| "learning_rate": 3.1746031746031743e-07, |
| "loss": 0.4381, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9189789123196448, |
| "grad_norm": 1.856131712657273, |
| "learning_rate": 3.11111111111111e-07, |
| "loss": 0.4303, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.9211986681465039, |
| "grad_norm": 1.2861751372732355, |
| "learning_rate": 3.046594982078853e-07, |
| "loss": 0.4721, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.923418423973363, |
| "grad_norm": 1.1448253628790064, |
| "learning_rate": 2.9810298102981023e-07, |
| "loss": 0.4303, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.925638179800222, |
| "grad_norm": 1.5061396832602665, |
| "learning_rate": 2.914389799635701e-07, |
| "loss": 0.4459, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.927857935627081, |
| "grad_norm": 1.6049126455575795, |
| "learning_rate": 2.846648301193756e-07, |
| "loss": 0.5042, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9300776914539401, |
| "grad_norm": 1.3466952658104057, |
| "learning_rate": 2.7777777777777776e-07, |
| "loss": 0.4514, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.9322974472807992, |
| "grad_norm": 1.2359889482809692, |
| "learning_rate": 2.707749766573296e-07, |
| "loss": 0.4615, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9345172031076583, |
| "grad_norm": 1.3803667899720569, |
| "learning_rate": 2.63653483992467e-07, |
| "loss": 0.4753, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.9367369589345174, |
| "grad_norm": 1.3418543813232986, |
| "learning_rate": 2.5641025641025636e-07, |
| "loss": 0.4426, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9389567147613762, |
| "grad_norm": 1.1541425203967928, |
| "learning_rate": 2.4904214559386974e-07, |
| "loss": 0.436, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 1.2229762566685807, |
| "learning_rate": 2.4154589371980677e-07, |
| "loss": 0.4561, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.9433962264150944, |
| "grad_norm": 1.365624157409681, |
| "learning_rate": 2.3391812865497075e-07, |
| "loss": 0.4738, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.9456159822419534, |
| "grad_norm": 1.0957773012671013, |
| "learning_rate": 2.2615535889872173e-07, |
| "loss": 0.4272, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.9478357380688123, |
| "grad_norm": 1.2975570955631437, |
| "learning_rate": 2.1825396825396822e-07, |
| "loss": 0.4617, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.9500554938956713, |
| "grad_norm": 1.1855387460284055, |
| "learning_rate": 2.1021021021021017e-07, |
| "loss": 0.4445, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.9522752497225304, |
| "grad_norm": 1.2084720652589793, |
| "learning_rate": 2.0202020202020197e-07, |
| "loss": 0.4423, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.9544950055493895, |
| "grad_norm": 1.1657717383702717, |
| "learning_rate": 1.9367991845056064e-07, |
| "loss": 0.4102, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.9567147613762486, |
| "grad_norm": 1.1672852942067808, |
| "learning_rate": 1.8518518518518516e-07, |
| "loss": 0.4333, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.9589345172031076, |
| "grad_norm": 1.2025436670810457, |
| "learning_rate": 1.7653167185877466e-07, |
| "loss": 0.4474, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.9611542730299667, |
| "grad_norm": 1.1252788533397284, |
| "learning_rate": 1.6771488469601673e-07, |
| "loss": 0.4477, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.9633740288568258, |
| "grad_norm": 1.1170346454418365, |
| "learning_rate": 1.5873015873015872e-07, |
| "loss": 0.4553, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.9655937846836848, |
| "grad_norm": 1.2907160443119068, |
| "learning_rate": 1.4957264957264952e-07, |
| "loss": 0.4592, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.967813540510544, |
| "grad_norm": 1.2266346238748143, |
| "learning_rate": 1.4023732470334413e-07, |
| "loss": 0.4694, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.970033296337403, |
| "grad_norm": 1.4351882414602743, |
| "learning_rate": 1.30718954248366e-07, |
| "loss": 0.4587, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.972253052164262, |
| "grad_norm": 1.1407373000182206, |
| "learning_rate": 1.2101210121012102e-07, |
| "loss": 0.443, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.9744728079911211, |
| "grad_norm": 1.0938698878218327, |
| "learning_rate": 1.1111111111111108e-07, |
| "loss": 0.4529, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.97669256381798, |
| "grad_norm": 1.8232355279940384, |
| "learning_rate": 1.01010101010101e-07, |
| "loss": 0.463, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.978912319644839, |
| "grad_norm": 1.2544295442701097, |
| "learning_rate": 9.070294784580498e-08, |
| "loss": 0.4321, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.9811320754716981, |
| "grad_norm": 1.4748170150022004, |
| "learning_rate": 8.018327605956471e-08, |
| "loss": 0.4776, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.9833518312985572, |
| "grad_norm": 1.278573261536441, |
| "learning_rate": 6.944444444444444e-08, |
| "loss": 0.4288, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.9855715871254163, |
| "grad_norm": 1.2558621483454577, |
| "learning_rate": 5.8479532163742687e-08, |
| "loss": 0.46, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.987791342952275, |
| "grad_norm": 1.3026759598014417, |
| "learning_rate": 4.7281323877068556e-08, |
| "loss": 0.4658, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.9900110987791342, |
| "grad_norm": 1.5203732374448793, |
| "learning_rate": 3.584229390681003e-08, |
| "loss": 0.4388, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.9922308546059933, |
| "grad_norm": 1.4656249651411055, |
| "learning_rate": 2.4154589371980675e-08, |
| "loss": 0.4527, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.9944506104328523, |
| "grad_norm": 4.406801594675205, |
| "learning_rate": 1.221001221001221e-08, |
| "loss": 0.4363, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.9966703662597114, |
| "grad_norm": 1.2228266856157815, |
| "learning_rate": 0, |
| "loss": 0.4676, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 225, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.993206625754153e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|