| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011111111111111111, | |
| "grad_norm": 5.2032976150512695, | |
| "learning_rate": 4.999984769144476e-05, | |
| "loss": 5.3058, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0022222222222222222, | |
| "grad_norm": 6.879838466644287, | |
| "learning_rate": 4.999939076763487e-05, | |
| "loss": 5.0848, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0033333333333333335, | |
| "grad_norm": 6.035982131958008, | |
| "learning_rate": 4.999862923413781e-05, | |
| "loss": 5.5976, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0044444444444444444, | |
| "grad_norm": 7.264829635620117, | |
| "learning_rate": 4.999756310023261e-05, | |
| "loss": 5.007, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005555555555555556, | |
| "grad_norm": 4.736705303192139, | |
| "learning_rate": 4.9996192378909786e-05, | |
| "loss": 4.6123, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006666666666666667, | |
| "grad_norm": 6.610605239868164, | |
| "learning_rate": 4.999451708687114e-05, | |
| "loss": 4.7884, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0077777777777777776, | |
| "grad_norm": 6.082452774047852, | |
| "learning_rate": 4.999253724452958e-05, | |
| "loss": 4.913, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008888888888888889, | |
| "grad_norm": 4.39306116104126, | |
| "learning_rate": 4.999025287600886e-05, | |
| "loss": 5.2733, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 4.614330291748047, | |
| "learning_rate": 4.998766400914329e-05, | |
| "loss": 4.6074, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": 5.944769859313965, | |
| "learning_rate": 4.99847706754774e-05, | |
| "loss": 4.9337, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012222222222222223, | |
| "grad_norm": 7.276998519897461, | |
| "learning_rate": 4.998157291026553e-05, | |
| "loss": 5.0143, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013333333333333334, | |
| "grad_norm": 5.569228172302246, | |
| "learning_rate": 4.997807075247146e-05, | |
| "loss": 5.1253, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.014444444444444444, | |
| "grad_norm": 5.123626232147217, | |
| "learning_rate": 4.997426424476787e-05, | |
| "loss": 4.4759, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.015555555555555555, | |
| "grad_norm": 4.314916133880615, | |
| "learning_rate": 4.997015343353585e-05, | |
| "loss": 5.193, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.016666666666666666, | |
| "grad_norm": 4.61911153793335, | |
| "learning_rate": 4.996573836886435e-05, | |
| "loss": 4.3899, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.017777777777777778, | |
| "grad_norm": 6.143403053283691, | |
| "learning_rate": 4.996101910454953e-05, | |
| "loss": 4.2176, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01888888888888889, | |
| "grad_norm": 6.195430278778076, | |
| "learning_rate": 4.995599569809414e-05, | |
| "loss": 4.1796, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 5.779390335083008, | |
| "learning_rate": 4.995066821070679e-05, | |
| "loss": 5.0214, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021111111111111112, | |
| "grad_norm": 5.847035884857178, | |
| "learning_rate": 4.994503670730125e-05, | |
| "loss": 4.5121, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": 5.528200626373291, | |
| "learning_rate": 4.993910125649561e-05, | |
| "loss": 4.2415, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023333333333333334, | |
| "grad_norm": 5.237406253814697, | |
| "learning_rate": 4.9932861930611454e-05, | |
| "loss": 5.0282, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.024444444444444446, | |
| "grad_norm": 5.065497875213623, | |
| "learning_rate": 4.992631880567301e-05, | |
| "loss": 4.525, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.025555555555555557, | |
| "grad_norm": 5.5612688064575195, | |
| "learning_rate": 4.991947196140618e-05, | |
| "loss": 4.9982, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02666666666666667, | |
| "grad_norm": 5.090909481048584, | |
| "learning_rate": 4.991232148123761e-05, | |
| "loss": 4.5534, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.027777777777777776, | |
| "grad_norm": 5.165072441101074, | |
| "learning_rate": 4.990486745229364e-05, | |
| "loss": 4.6862, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.028888888888888888, | |
| "grad_norm": 4.630911827087402, | |
| "learning_rate": 4.989710996539926e-05, | |
| "loss": 4.8492, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 3.68540358543396, | |
| "learning_rate": 4.9889049115077005e-05, | |
| "loss": 5.1254, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03111111111111111, | |
| "grad_norm": 5.599917888641357, | |
| "learning_rate": 4.988068499954578e-05, | |
| "loss": 4.9527, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03222222222222222, | |
| "grad_norm": 5.534451007843018, | |
| "learning_rate": 4.987201772071971e-05, | |
| "loss": 4.912, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 4.299800395965576, | |
| "learning_rate": 4.9863047384206835e-05, | |
| "loss": 5.1243, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.034444444444444444, | |
| "grad_norm": 3.687239646911621, | |
| "learning_rate": 4.985377409930789e-05, | |
| "loss": 4.5257, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 5.489537239074707, | |
| "learning_rate": 4.984419797901491e-05, | |
| "loss": 4.9116, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03666666666666667, | |
| "grad_norm": 4.619030475616455, | |
| "learning_rate": 4.983431914000991e-05, | |
| "loss": 4.718, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03777777777777778, | |
| "grad_norm": 5.1001200675964355, | |
| "learning_rate": 4.982413770266342e-05, | |
| "loss": 5.0285, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03888888888888889, | |
| "grad_norm": 4.231574058532715, | |
| "learning_rate": 4.9813653791033057e-05, | |
| "loss": 4.7938, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 3.560554027557373, | |
| "learning_rate": 4.980286753286195e-05, | |
| "loss": 4.962, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04111111111111111, | |
| "grad_norm": 3.8664653301239014, | |
| "learning_rate": 4.979177905957726e-05, | |
| "loss": 4.9856, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.042222222222222223, | |
| "grad_norm": 4.1073784828186035, | |
| "learning_rate": 4.978038850628854e-05, | |
| "loss": 5.2019, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.043333333333333335, | |
| "grad_norm": 4.941130638122559, | |
| "learning_rate": 4.976869601178609e-05, | |
| "loss": 4.6499, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 5.749270915985107, | |
| "learning_rate": 4.975670171853926e-05, | |
| "loss": 4.1511, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04555555555555556, | |
| "grad_norm": 3.7464685440063477, | |
| "learning_rate": 4.9744405772694725e-05, | |
| "loss": 4.9937, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04666666666666667, | |
| "grad_norm": 4.391846656799316, | |
| "learning_rate": 4.9731808324074717e-05, | |
| "loss": 4.9573, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04777777777777778, | |
| "grad_norm": 4.163111209869385, | |
| "learning_rate": 4.971890952617515e-05, | |
| "loss": 4.8546, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04888888888888889, | |
| "grad_norm": 3.859717607498169, | |
| "learning_rate": 4.9705709536163824e-05, | |
| "loss": 4.8448, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 4.045307636260986, | |
| "learning_rate": 4.9692208514878444e-05, | |
| "loss": 4.8979, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.051111111111111114, | |
| "grad_norm": 3.083608627319336, | |
| "learning_rate": 4.96784066268247e-05, | |
| "loss": 4.6191, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.052222222222222225, | |
| "grad_norm": 3.6996843814849854, | |
| "learning_rate": 4.966430404017424e-05, | |
| "loss": 4.1142, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 5.001142501831055, | |
| "learning_rate": 4.964990092676263e-05, | |
| "loss": 4.673, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05444444444444444, | |
| "grad_norm": 4.148028373718262, | |
| "learning_rate": 4.963519746208726e-05, | |
| "loss": 4.4178, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 3.529871940612793, | |
| "learning_rate": 4.962019382530521e-05, | |
| "loss": 5.134, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.056666666666666664, | |
| "grad_norm": 3.791576385498047, | |
| "learning_rate": 4.960489019923105e-05, | |
| "loss": 4.5824, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.057777777777777775, | |
| "grad_norm": 3.236461877822876, | |
| "learning_rate": 4.9589286770334654e-05, | |
| "loss": 4.9126, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.058888888888888886, | |
| "grad_norm": 3.4092698097229004, | |
| "learning_rate": 4.957338372873886e-05, | |
| "loss": 4.9913, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 4.24392557144165, | |
| "learning_rate": 4.9557181268217227e-05, | |
| "loss": 4.3492, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06111111111111111, | |
| "grad_norm": 3.5253679752349854, | |
| "learning_rate": 4.9540679586191605e-05, | |
| "loss": 4.5665, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06222222222222222, | |
| "grad_norm": 4.3137688636779785, | |
| "learning_rate": 4.952387888372979e-05, | |
| "loss": 4.3782, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06333333333333334, | |
| "grad_norm": 3.4922027587890625, | |
| "learning_rate": 4.9506779365543046e-05, | |
| "loss": 4.4069, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06444444444444444, | |
| "grad_norm": 3.7192225456237793, | |
| "learning_rate": 4.94893812399836e-05, | |
| "loss": 4.6152, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06555555555555556, | |
| "grad_norm": 3.398974895477295, | |
| "learning_rate": 4.947168471904213e-05, | |
| "loss": 4.8951, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 2.9628076553344727, | |
| "learning_rate": 4.9453690018345144e-05, | |
| "loss": 4.5419, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06777777777777778, | |
| "grad_norm": 2.703320026397705, | |
| "learning_rate": 4.94353973571524e-05, | |
| "loss": 4.9154, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06888888888888889, | |
| "grad_norm": 2.9003796577453613, | |
| "learning_rate": 4.94168069583542e-05, | |
| "loss": 4.8565, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.6896684169769287, | |
| "learning_rate": 4.939791904846869e-05, | |
| "loss": 4.6401, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 3.679429292678833, | |
| "learning_rate": 4.937873385763908e-05, | |
| "loss": 4.5216, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07222222222222222, | |
| "grad_norm": 3.837848424911499, | |
| "learning_rate": 4.9359251619630886e-05, | |
| "loss": 4.7913, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07333333333333333, | |
| "grad_norm": 4.7550368309021, | |
| "learning_rate": 4.933947257182901e-05, | |
| "loss": 4.6804, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07444444444444444, | |
| "grad_norm": 3.387397289276123, | |
| "learning_rate": 4.931939695523492e-05, | |
| "loss": 5.1575, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07555555555555556, | |
| "grad_norm": 2.715179204940796, | |
| "learning_rate": 4.929902501446366e-05, | |
| "loss": 4.8116, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07666666666666666, | |
| "grad_norm": 3.598045587539673, | |
| "learning_rate": 4.9278356997740904e-05, | |
| "loss": 4.8033, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07777777777777778, | |
| "grad_norm": 3.2445831298828125, | |
| "learning_rate": 4.925739315689991e-05, | |
| "loss": 5.0033, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07888888888888888, | |
| "grad_norm": 3.411445379257202, | |
| "learning_rate": 4.9236133747378475e-05, | |
| "loss": 4.7147, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.331087589263916, | |
| "learning_rate": 4.9214579028215776e-05, | |
| "loss": 4.3199, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0811111111111111, | |
| "grad_norm": 5.33408784866333, | |
| "learning_rate": 4.919272926204929e-05, | |
| "loss": 4.882, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08222222222222222, | |
| "grad_norm": 2.8994922637939453, | |
| "learning_rate": 4.917058471511149e-05, | |
| "loss": 4.678, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 2.394202709197998, | |
| "learning_rate": 4.914814565722671e-05, | |
| "loss": 4.7618, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08444444444444445, | |
| "grad_norm": 3.3278257846832275, | |
| "learning_rate": 4.912541236180779e-05, | |
| "loss": 4.5066, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08555555555555555, | |
| "grad_norm": 5.1034836769104, | |
| "learning_rate": 4.910238510585276e-05, | |
| "loss": 4.9339, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08666666666666667, | |
| "grad_norm": 3.776923179626465, | |
| "learning_rate": 4.907906416994146e-05, | |
| "loss": 4.264, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08777777777777777, | |
| "grad_norm": 3.5819032192230225, | |
| "learning_rate": 4.905544983823214e-05, | |
| "loss": 4.6317, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 4.019664764404297, | |
| "learning_rate": 4.9031542398457974e-05, | |
| "loss": 4.2868, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 3.2063353061676025, | |
| "learning_rate": 4.900734214192358e-05, | |
| "loss": 4.7617, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09111111111111111, | |
| "grad_norm": 3.4615073204040527, | |
| "learning_rate": 4.898284936350144e-05, | |
| "loss": 4.6781, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09222222222222222, | |
| "grad_norm": 3.8503334522247314, | |
| "learning_rate": 4.895806436162833e-05, | |
| "loss": 5.0211, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.09333333333333334, | |
| "grad_norm": 3.9291231632232666, | |
| "learning_rate": 4.893298743830168e-05, | |
| "loss": 4.8865, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09444444444444444, | |
| "grad_norm": 3.537541389465332, | |
| "learning_rate": 4.890761889907589e-05, | |
| "loss": 4.5888, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09555555555555556, | |
| "grad_norm": 2.426281690597534, | |
| "learning_rate": 4.888195905305859e-05, | |
| "loss": 4.3387, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09666666666666666, | |
| "grad_norm": 3.3084747791290283, | |
| "learning_rate": 4.8856008212906925e-05, | |
| "loss": 4.9159, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09777777777777778, | |
| "grad_norm": 4.331256866455078, | |
| "learning_rate": 4.882976669482367e-05, | |
| "loss": 4.7531, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09888888888888889, | |
| "grad_norm": 3.6446895599365234, | |
| "learning_rate": 4.880323481855347e-05, | |
| "loss": 4.2317, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 4.512236595153809, | |
| "learning_rate": 4.877641290737884e-05, | |
| "loss": 4.5889, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10111111111111111, | |
| "grad_norm": 4.778031349182129, | |
| "learning_rate": 4.874930128811631e-05, | |
| "loss": 4.7279, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10222222222222223, | |
| "grad_norm": 2.602832794189453, | |
| "learning_rate": 4.8721900291112415e-05, | |
| "loss": 4.9481, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10333333333333333, | |
| "grad_norm": 2.8278868198394775, | |
| "learning_rate": 4.869421025023965e-05, | |
| "loss": 4.5763, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10444444444444445, | |
| "grad_norm": 3.5263729095458984, | |
| "learning_rate": 4.8666231502892415e-05, | |
| "loss": 4.3702, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10555555555555556, | |
| "grad_norm": 3.6424851417541504, | |
| "learning_rate": 4.8637964389982926e-05, | |
| "loss": 4.0502, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 3.5338454246520996, | |
| "learning_rate": 4.860940925593703e-05, | |
| "loss": 4.7823, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10777777777777778, | |
| "grad_norm": 3.6265504360198975, | |
| "learning_rate": 4.858056644869002e-05, | |
| "loss": 4.7303, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10888888888888888, | |
| "grad_norm": 2.4503519535064697, | |
| "learning_rate": 4.855143631968242e-05, | |
| "loss": 4.4291, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 3.9208950996398926, | |
| "learning_rate": 4.852201922385564e-05, | |
| "loss": 4.876, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 2.9791765213012695, | |
| "learning_rate": 4.849231551964771e-05, | |
| "loss": 4.8165, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11222222222222222, | |
| "grad_norm": 3.589217185974121, | |
| "learning_rate": 4.84623255689889e-05, | |
| "loss": 4.7452, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.11333333333333333, | |
| "grad_norm": 3.037071943283081, | |
| "learning_rate": 4.843204973729729e-05, | |
| "loss": 4.7103, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11444444444444445, | |
| "grad_norm": 2.5937793254852295, | |
| "learning_rate": 4.840148839347434e-05, | |
| "loss": 4.4314, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11555555555555555, | |
| "grad_norm": 2.879254102706909, | |
| "learning_rate": 4.837064190990036e-05, | |
| "loss": 4.4885, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11666666666666667, | |
| "grad_norm": 3.404500722885132, | |
| "learning_rate": 4.8339510662430046e-05, | |
| "loss": 4.4227, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11777777777777777, | |
| "grad_norm": 3.4791483879089355, | |
| "learning_rate": 4.830809503038781e-05, | |
| "loss": 4.8543, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11888888888888889, | |
| "grad_norm": 3.072810649871826, | |
| "learning_rate": 4.827639539656321e-05, | |
| "loss": 4.7271, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 3.365445375442505, | |
| "learning_rate": 4.8244412147206284e-05, | |
| "loss": 4.8491, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.12111111111111111, | |
| "grad_norm": 3.6025092601776123, | |
| "learning_rate": 4.8212145672022844e-05, | |
| "loss": 4.7209, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.12222222222222222, | |
| "grad_norm": 4.458660125732422, | |
| "learning_rate": 4.817959636416969e-05, | |
| "loss": 4.1645, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12333333333333334, | |
| "grad_norm": 3.9988503456115723, | |
| "learning_rate": 4.814676462024988e-05, | |
| "loss": 4.6813, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12444444444444444, | |
| "grad_norm": 2.8452532291412354, | |
| "learning_rate": 4.8113650840307834e-05, | |
| "loss": 5.0675, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12555555555555556, | |
| "grad_norm": 2.9468061923980713, | |
| "learning_rate": 4.808025542782453e-05, | |
| "loss": 4.8562, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12666666666666668, | |
| "grad_norm": 3.0511226654052734, | |
| "learning_rate": 4.8046578789712515e-05, | |
| "loss": 4.5727, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12777777777777777, | |
| "grad_norm": 2.0922510623931885, | |
| "learning_rate": 4.8012621336311016e-05, | |
| "loss": 4.7914, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1288888888888889, | |
| "grad_norm": 2.8942031860351562, | |
| "learning_rate": 4.797838348138086e-05, | |
| "loss": 4.6763, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 3.84708571434021, | |
| "learning_rate": 4.794386564209953e-05, | |
| "loss": 4.2561, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.13111111111111112, | |
| "grad_norm": 2.471663236618042, | |
| "learning_rate": 4.790906823905599e-05, | |
| "loss": 4.4677, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1322222222222222, | |
| "grad_norm": 2.5082037448883057, | |
| "learning_rate": 4.7873991696245624e-05, | |
| "loss": 4.56, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 2.900052309036255, | |
| "learning_rate": 4.783863644106502e-05, | |
| "loss": 4.8909, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13444444444444445, | |
| "grad_norm": 3.5951879024505615, | |
| "learning_rate": 4.780300290430682e-05, | |
| "loss": 4.6476, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.13555555555555557, | |
| "grad_norm": 4.468568325042725, | |
| "learning_rate": 4.776709152015443e-05, | |
| "loss": 4.3256, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13666666666666666, | |
| "grad_norm": 3.0081839561462402, | |
| "learning_rate": 4.773090272617672e-05, | |
| "loss": 4.7223, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.13777777777777778, | |
| "grad_norm": 3.8555331230163574, | |
| "learning_rate": 4.769443696332272e-05, | |
| "loss": 4.4773, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1388888888888889, | |
| "grad_norm": 3.729095697402954, | |
| "learning_rate": 4.765769467591625e-05, | |
| "loss": 4.7924, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 2.2823543548583984, | |
| "learning_rate": 4.762067631165049e-05, | |
| "loss": 4.5892, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1411111111111111, | |
| "grad_norm": 3.335906982421875, | |
| "learning_rate": 4.758338232158252e-05, | |
| "loss": 4.8221, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 5.226222038269043, | |
| "learning_rate": 4.754581316012785e-05, | |
| "loss": 4.5129, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14333333333333334, | |
| "grad_norm": 3.1001462936401367, | |
| "learning_rate": 4.7507969285054845e-05, | |
| "loss": 4.4719, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14444444444444443, | |
| "grad_norm": 3.3555104732513428, | |
| "learning_rate": 4.7469851157479177e-05, | |
| "loss": 4.5403, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14555555555555555, | |
| "grad_norm": 2.935755968093872, | |
| "learning_rate": 4.743145924185821e-05, | |
| "loss": 4.8928, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.14666666666666667, | |
| "grad_norm": 2.488250970840454, | |
| "learning_rate": 4.7392794005985326e-05, | |
| "loss": 4.2008, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.14777777777777779, | |
| "grad_norm": 3.4012887477874756, | |
| "learning_rate": 4.73538559209842e-05, | |
| "loss": 4.6079, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14888888888888888, | |
| "grad_norm": 2.7918901443481445, | |
| "learning_rate": 4.731464546130314e-05, | |
| "loss": 4.7116, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 3.9989566802978516, | |
| "learning_rate": 4.72751631047092e-05, | |
| "loss": 4.3616, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1511111111111111, | |
| "grad_norm": 3.592566967010498, | |
| "learning_rate": 4.723540933228244e-05, | |
| "loss": 4.7092, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15222222222222223, | |
| "grad_norm": 2.825819730758667, | |
| "learning_rate": 4.719538462841003e-05, | |
| "loss": 4.8076, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.15333333333333332, | |
| "grad_norm": 3.5768320560455322, | |
| "learning_rate": 4.715508948078037e-05, | |
| "loss": 4.2689, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.15444444444444444, | |
| "grad_norm": 2.7928998470306396, | |
| "learning_rate": 4.71145243803771e-05, | |
| "loss": 4.5123, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15555555555555556, | |
| "grad_norm": 3.065845251083374, | |
| "learning_rate": 4.707368982147318e-05, | |
| "loss": 4.5658, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15666666666666668, | |
| "grad_norm": 3.1111562252044678, | |
| "learning_rate": 4.70325863016248e-05, | |
| "loss": 4.6722, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15777777777777777, | |
| "grad_norm": 3.132770299911499, | |
| "learning_rate": 4.6991214321665414e-05, | |
| "loss": 4.3566, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.15888888888888889, | |
| "grad_norm": 3.0841097831726074, | |
| "learning_rate": 4.694957438569951e-05, | |
| "loss": 4.9723, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 4.105175018310547, | |
| "learning_rate": 4.690766700109659e-05, | |
| "loss": 4.4099, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.16111111111111112, | |
| "grad_norm": 4.112144470214844, | |
| "learning_rate": 4.6865492678484895e-05, | |
| "loss": 4.2418, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1622222222222222, | |
| "grad_norm": 2.671475648880005, | |
| "learning_rate": 4.682305193174524e-05, | |
| "loss": 4.823, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16333333333333333, | |
| "grad_norm": 3.42596697807312, | |
| "learning_rate": 4.678034527800474e-05, | |
| "loss": 4.6529, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16444444444444445, | |
| "grad_norm": 3.2327771186828613, | |
| "learning_rate": 4.6737373237630476e-05, | |
| "loss": 4.6662, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16555555555555557, | |
| "grad_norm": 3.2889630794525146, | |
| "learning_rate": 4.669413633422322e-05, | |
| "loss": 4.3048, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 2.366293430328369, | |
| "learning_rate": 4.665063509461097e-05, | |
| "loss": 4.7887, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16777777777777778, | |
| "grad_norm": 2.6844308376312256, | |
| "learning_rate": 4.6606870048842624e-05, | |
| "loss": 4.954, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1688888888888889, | |
| "grad_norm": 3.2190423011779785, | |
| "learning_rate": 4.656284173018144e-05, | |
| "loss": 5.189, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 3.640512466430664, | |
| "learning_rate": 4.65185506750986e-05, | |
| "loss": 4.4657, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.1711111111111111, | |
| "grad_norm": 2.7704906463623047, | |
| "learning_rate": 4.6473997423266614e-05, | |
| "loss": 4.6634, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.17222222222222222, | |
| "grad_norm": 2.7830865383148193, | |
| "learning_rate": 4.642918251755281e-05, | |
| "loss": 4.5943, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.17333333333333334, | |
| "grad_norm": 2.327153444290161, | |
| "learning_rate": 4.638410650401267e-05, | |
| "loss": 4.8456, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.17444444444444446, | |
| "grad_norm": 3.3280811309814453, | |
| "learning_rate": 4.6338769931883185e-05, | |
| "loss": 4.6068, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.17555555555555555, | |
| "grad_norm": 3.1970295906066895, | |
| "learning_rate": 4.629317335357619e-05, | |
| "loss": 4.6854, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.17666666666666667, | |
| "grad_norm": 2.481355667114258, | |
| "learning_rate": 4.6247317324671605e-05, | |
| "loss": 4.5796, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 2.445061683654785, | |
| "learning_rate": 4.620120240391065e-05, | |
| "loss": 4.3907, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17888888888888888, | |
| "grad_norm": 3.381376028060913, | |
| "learning_rate": 4.615482915318911e-05, | |
| "loss": 4.6822, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.4204893112182617, | |
| "learning_rate": 4.610819813755038e-05, | |
| "loss": 4.436, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1811111111111111, | |
| "grad_norm": 2.725168228149414, | |
| "learning_rate": 4.606130992517869e-05, | |
| "loss": 5.0643, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.18222222222222223, | |
| "grad_norm": 3.7455644607543945, | |
| "learning_rate": 4.601416508739211e-05, | |
| "loss": 4.7741, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.18333333333333332, | |
| "grad_norm": 2.5998661518096924, | |
| "learning_rate": 4.5966764198635606e-05, | |
| "loss": 4.8321, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.18444444444444444, | |
| "grad_norm": 4.380634784698486, | |
| "learning_rate": 4.591910783647404e-05, | |
| "loss": 4.6678, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18555555555555556, | |
| "grad_norm": 2.3288722038269043, | |
| "learning_rate": 4.5871196581585166e-05, | |
| "loss": 4.8369, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.18666666666666668, | |
| "grad_norm": 2.959716320037842, | |
| "learning_rate": 4.5823031017752485e-05, | |
| "loss": 4.0906, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.18777777777777777, | |
| "grad_norm": 2.6955947875976562, | |
| "learning_rate": 4.577461173185821e-05, | |
| "loss": 4.6623, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.18888888888888888, | |
| "grad_norm": 4.677174091339111, | |
| "learning_rate": 4.572593931387604e-05, | |
| "loss": 4.9871, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 2.5706987380981445, | |
| "learning_rate": 4.567701435686404e-05, | |
| "loss": 4.8683, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.19111111111111112, | |
| "grad_norm": 3.341663122177124, | |
| "learning_rate": 4.562783745695738e-05, | |
| "loss": 4.6751, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1922222222222222, | |
| "grad_norm": 2.941930055618286, | |
| "learning_rate": 4.557840921336105e-05, | |
| "loss": 4.8538, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.19333333333333333, | |
| "grad_norm": 2.8567423820495605, | |
| "learning_rate": 4.5528730228342605e-05, | |
| "loss": 4.5703, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.19444444444444445, | |
| "grad_norm": 2.6613831520080566, | |
| "learning_rate": 4.54788011072248e-05, | |
| "loss": 4.4107, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19555555555555557, | |
| "grad_norm": 2.5689127445220947, | |
| "learning_rate": 4.542862245837821e-05, | |
| "loss": 4.7408, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19666666666666666, | |
| "grad_norm": 3.576414108276367, | |
| "learning_rate": 4.537819489321386e-05, | |
| "loss": 4.5211, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.19777777777777777, | |
| "grad_norm": 3.1265640258789062, | |
| "learning_rate": 4.532751902617569e-05, | |
| "loss": 4.0729, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.1988888888888889, | |
| "grad_norm": 3.3458447456359863, | |
| "learning_rate": 4.527659547473317e-05, | |
| "loss": 5.1058, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 4.459259033203125, | |
| "learning_rate": 4.522542485937369e-05, | |
| "loss": 4.3809, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2011111111111111, | |
| "grad_norm": 2.7210464477539062, | |
| "learning_rate": 4.5174007803595055e-05, | |
| "loss": 4.5236, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.20222222222222222, | |
| "grad_norm": 3.285710334777832, | |
| "learning_rate": 4.512234493389785e-05, | |
| "loss": 4.6732, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.20333333333333334, | |
| "grad_norm": 3.063709020614624, | |
| "learning_rate": 4.5070436879777865e-05, | |
| "loss": 4.2399, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.20444444444444446, | |
| "grad_norm": 2.4527218341827393, | |
| "learning_rate": 4.5018284273718336e-05, | |
| "loss": 4.8007, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20555555555555555, | |
| "grad_norm": 3.8102920055389404, | |
| "learning_rate": 4.496588775118232e-05, | |
| "loss": 4.8862, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.20666666666666667, | |
| "grad_norm": 4.2287139892578125, | |
| "learning_rate": 4.491324795060491e-05, | |
| "loss": 4.3253, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.20777777777777778, | |
| "grad_norm": 3.0381033420562744, | |
| "learning_rate": 4.4860365513385456e-05, | |
| "loss": 4.5634, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2088888888888889, | |
| "grad_norm": 3.7136149406433105, | |
| "learning_rate": 4.480724108387977e-05, | |
| "loss": 5.318, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.9353251457214355, | |
| "learning_rate": 4.4753875309392266e-05, | |
| "loss": 4.6681, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2111111111111111, | |
| "grad_norm": 2.4687249660491943, | |
| "learning_rate": 4.4700268840168045e-05, | |
| "loss": 4.7589, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21222222222222223, | |
| "grad_norm": 2.1315102577209473, | |
| "learning_rate": 4.464642232938505e-05, | |
| "loss": 4.737, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 3.893827438354492, | |
| "learning_rate": 4.4592336433146e-05, | |
| "loss": 4.1764, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.21444444444444444, | |
| "grad_norm": 2.8332619667053223, | |
| "learning_rate": 4.453801181047047e-05, | |
| "loss": 4.7298, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.21555555555555556, | |
| "grad_norm": 2.2660417556762695, | |
| "learning_rate": 4.448344912328686e-05, | |
| "loss": 4.6808, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21666666666666667, | |
| "grad_norm": 3.361409902572632, | |
| "learning_rate": 4.442864903642428e-05, | |
| "loss": 4.6071, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21777777777777776, | |
| "grad_norm": 2.6645731925964355, | |
| "learning_rate": 4.4373612217604496e-05, | |
| "loss": 4.552, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21888888888888888, | |
| "grad_norm": 3.0278093814849854, | |
| "learning_rate": 4.431833933743378e-05, | |
| "loss": 4.9144, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 3.6761045455932617, | |
| "learning_rate": 4.426283106939474e-05, | |
| "loss": 4.6239, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.22111111111111112, | |
| "grad_norm": 3.8143444061279297, | |
| "learning_rate": 4.420708808983809e-05, | |
| "loss": 4.5675, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 3.473196506500244, | |
| "learning_rate": 4.415111107797445e-05, | |
| "loss": 4.6748, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22333333333333333, | |
| "grad_norm": 3.977616310119629, | |
| "learning_rate": 4.4094900715866064e-05, | |
| "loss": 4.3232, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.22444444444444445, | |
| "grad_norm": 3.6012306213378906, | |
| "learning_rate": 4.403845768841842e-05, | |
| "loss": 4.975, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22555555555555556, | |
| "grad_norm": 3.368455171585083, | |
| "learning_rate": 4.3981782683372016e-05, | |
| "loss": 4.5316, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.22666666666666666, | |
| "grad_norm": 4.002109050750732, | |
| "learning_rate": 4.3924876391293915e-05, | |
| "loss": 4.2947, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.22777777777777777, | |
| "grad_norm": 2.9084315299987793, | |
| "learning_rate": 4.386773950556931e-05, | |
| "loss": 4.4741, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2288888888888889, | |
| "grad_norm": 2.937263011932373, | |
| "learning_rate": 4.381037272239311e-05, | |
| "loss": 4.1499, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.508908987045288, | |
| "learning_rate": 4.375277674076149e-05, | |
| "loss": 4.856, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2311111111111111, | |
| "grad_norm": 3.1218552589416504, | |
| "learning_rate": 4.36949522624633e-05, | |
| "loss": 4.7484, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.23222222222222222, | |
| "grad_norm": 3.868100881576538, | |
| "learning_rate": 4.363689999207156e-05, | |
| "loss": 4.4354, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.23333333333333334, | |
| "grad_norm": 2.4734623432159424, | |
| "learning_rate": 4.357862063693486e-05, | |
| "loss": 4.978, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.23444444444444446, | |
| "grad_norm": 3.2189548015594482, | |
| "learning_rate": 4.352011490716875e-05, | |
| "loss": 4.9206, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.23555555555555555, | |
| "grad_norm": 3.757636308670044, | |
| "learning_rate": 4.3461383515647106e-05, | |
| "loss": 4.5211, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.23666666666666666, | |
| "grad_norm": 4.024762153625488, | |
| "learning_rate": 4.3402427177993366e-05, | |
| "loss": 4.5448, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.23777777777777778, | |
| "grad_norm": 3.536659002304077, | |
| "learning_rate": 4.334324661257191e-05, | |
| "loss": 5.0555, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2388888888888889, | |
| "grad_norm": 2.2506678104400635, | |
| "learning_rate": 4.3283842540479264e-05, | |
| "loss": 4.8103, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.8261499404907227, | |
| "learning_rate": 4.3224215685535294e-05, | |
| "loss": 4.449, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2411111111111111, | |
| "grad_norm": 3.2074854373931885, | |
| "learning_rate": 4.31643667742744e-05, | |
| "loss": 4.3977, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.24222222222222223, | |
| "grad_norm": 2.743082284927368, | |
| "learning_rate": 4.3104296535936695e-05, | |
| "loss": 4.2452, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.24333333333333335, | |
| "grad_norm": 2.7638344764709473, | |
| "learning_rate": 4.304400570245906e-05, | |
| "loss": 4.6636, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.24444444444444444, | |
| "grad_norm": 3.1931586265563965, | |
| "learning_rate": 4.2983495008466276e-05, | |
| "loss": 4.3444, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24555555555555555, | |
| "grad_norm": 3.946772575378418, | |
| "learning_rate": 4.292276519126207e-05, | |
| "loss": 4.4841, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.24666666666666667, | |
| "grad_norm": 2.5195651054382324, | |
| "learning_rate": 4.2861816990820084e-05, | |
| "loss": 4.7453, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2477777777777778, | |
| "grad_norm": 2.1805219650268555, | |
| "learning_rate": 4.280065114977492e-05, | |
| "loss": 4.7288, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.24888888888888888, | |
| "grad_norm": 2.361443519592285, | |
| "learning_rate": 4.273926841341302e-05, | |
| "loss": 4.9004, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 3.45947265625, | |
| "learning_rate": 4.267766952966369e-05, | |
| "loss": 4.3567, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2511111111111111, | |
| "grad_norm": 3.4783213138580322, | |
| "learning_rate": 4.261585524908987e-05, | |
| "loss": 4.9095, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.25222222222222224, | |
| "grad_norm": 2.565812110900879, | |
| "learning_rate": 4.2553826324879064e-05, | |
| "loss": 4.5359, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.25333333333333335, | |
| "grad_norm": 3.349132776260376, | |
| "learning_rate": 4.249158351283414e-05, | |
| "loss": 4.3791, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2544444444444444, | |
| "grad_norm": 2.278238534927368, | |
| "learning_rate": 4.242912757136412e-05, | |
| "loss": 4.5528, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.25555555555555554, | |
| "grad_norm": 2.851348400115967, | |
| "learning_rate": 4.2366459261474933e-05, | |
| "loss": 4.1264, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25666666666666665, | |
| "grad_norm": 2.4230828285217285, | |
| "learning_rate": 4.230357934676017e-05, | |
| "loss": 4.8431, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2577777777777778, | |
| "grad_norm": 3.563849687576294, | |
| "learning_rate": 4.224048859339175e-05, | |
| "loss": 4.2379, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2588888888888889, | |
| "grad_norm": 3.419377088546753, | |
| "learning_rate": 4.2177187770110576e-05, | |
| "loss": 4.5906, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 3.992064952850342, | |
| "learning_rate": 4.211367764821722e-05, | |
| "loss": 4.1902, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2611111111111111, | |
| "grad_norm": 2.2617876529693604, | |
| "learning_rate": 4.2049959001562464e-05, | |
| "loss": 4.6505, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.26222222222222225, | |
| "grad_norm": 2.8081510066986084, | |
| "learning_rate": 4.198603260653792e-05, | |
| "loss": 4.9376, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2633333333333333, | |
| "grad_norm": 3.759847402572632, | |
| "learning_rate": 4.192189924206652e-05, | |
| "loss": 4.4958, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2644444444444444, | |
| "grad_norm": 3.2556324005126953, | |
| "learning_rate": 4.185755968959308e-05, | |
| "loss": 4.3312, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.26555555555555554, | |
| "grad_norm": 2.438190221786499, | |
| "learning_rate": 4.179301473307476e-05, | |
| "loss": 4.8255, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 3.00315523147583, | |
| "learning_rate": 4.172826515897146e-05, | |
| "loss": 4.788, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2677777777777778, | |
| "grad_norm": 3.5244944095611572, | |
| "learning_rate": 4.166331175623631e-05, | |
| "loss": 4.5552, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2688888888888889, | |
| "grad_norm": 2.436034917831421, | |
| "learning_rate": 4.1598155316306044e-05, | |
| "loss": 4.3463, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 3.473583698272705, | |
| "learning_rate": 4.1532796633091296e-05, | |
| "loss": 4.6629, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.27111111111111114, | |
| "grad_norm": 2.3865156173706055, | |
| "learning_rate": 4.146723650296701e-05, | |
| "loss": 4.5326, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2722222222222222, | |
| "grad_norm": 3.0051960945129395, | |
| "learning_rate": 4.140147572476268e-05, | |
| "loss": 4.6408, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2733333333333333, | |
| "grad_norm": 2.631802797317505, | |
| "learning_rate": 4.133551509975264e-05, | |
| "loss": 4.6096, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.27444444444444444, | |
| "grad_norm": 3.8499889373779297, | |
| "learning_rate": 4.1269355431646274e-05, | |
| "loss": 4.3807, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.27555555555555555, | |
| "grad_norm": 4.838550090789795, | |
| "learning_rate": 4.1202997526578276e-05, | |
| "loss": 4.1733, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.27666666666666667, | |
| "grad_norm": 3.210563898086548, | |
| "learning_rate": 4.113644219309877e-05, | |
| "loss": 4.633, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 3.254894256591797, | |
| "learning_rate": 4.1069690242163484e-05, | |
| "loss": 4.3214, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2788888888888889, | |
| "grad_norm": 2.7834694385528564, | |
| "learning_rate": 4.100274248712389e-05, | |
| "loss": 4.3556, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.8591508865356445, | |
| "learning_rate": 4.093559974371725e-05, | |
| "loss": 4.4838, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2811111111111111, | |
| "grad_norm": 3.7769737243652344, | |
| "learning_rate": 4.086826283005669e-05, | |
| "loss": 5.1812, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2822222222222222, | |
| "grad_norm": 4.0656914710998535, | |
| "learning_rate": 4.080073256662127e-05, | |
| "loss": 4.4083, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2833333333333333, | |
| "grad_norm": 3.192784547805786, | |
| "learning_rate": 4.073300977624594e-05, | |
| "loss": 4.7642, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 3.2855887413024902, | |
| "learning_rate": 4.066509528411152e-05, | |
| "loss": 4.2253, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.28555555555555556, | |
| "grad_norm": 4.624244213104248, | |
| "learning_rate": 4.059698991773466e-05, | |
| "loss": 4.4538, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2866666666666667, | |
| "grad_norm": 3.160623073577881, | |
| "learning_rate": 4.052869450695776e-05, | |
| "loss": 4.1262, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2877777777777778, | |
| "grad_norm": 3.2087790966033936, | |
| "learning_rate": 4.046020988393885e-05, | |
| "loss": 4.3326, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.28888888888888886, | |
| "grad_norm": 3.1688692569732666, | |
| "learning_rate": 4.039153688314145e-05, | |
| "loss": 4.7679, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 2.6120312213897705, | |
| "learning_rate": 4.0322676341324415e-05, | |
| "loss": 4.9559, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2911111111111111, | |
| "grad_norm": 3.8062994480133057, | |
| "learning_rate": 4.02536290975317e-05, | |
| "loss": 4.3511, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2922222222222222, | |
| "grad_norm": 3.093778610229492, | |
| "learning_rate": 4.018439599308217e-05, | |
| "loss": 4.3003, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.29333333333333333, | |
| "grad_norm": 2.905430316925049, | |
| "learning_rate": 4.011497787155938e-05, | |
| "loss": 4.4313, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.29444444444444445, | |
| "grad_norm": 3.0712385177612305, | |
| "learning_rate": 4.0045375578801214e-05, | |
| "loss": 4.4432, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.29555555555555557, | |
| "grad_norm": 3.544624090194702, | |
| "learning_rate": 3.997558996288965e-05, | |
| "loss": 4.5969, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2966666666666667, | |
| "grad_norm": 3.2956557273864746, | |
| "learning_rate": 3.99056218741404e-05, | |
| "loss": 4.5313, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.29777777777777775, | |
| "grad_norm": 2.8312222957611084, | |
| "learning_rate": 3.983547216509254e-05, | |
| "loss": 4.9315, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.29888888888888887, | |
| "grad_norm": 2.3425047397613525, | |
| "learning_rate": 3.976514169049814e-05, | |
| "loss": 4.3562, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 3.2110278606414795, | |
| "learning_rate": 3.969463130731183e-05, | |
| "loss": 4.3582, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3011111111111111, | |
| "grad_norm": 2.364408493041992, | |
| "learning_rate": 3.962394187468039e-05, | |
| "loss": 4.8083, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3022222222222222, | |
| "grad_norm": 3.1101303100585938, | |
| "learning_rate": 3.955307425393224e-05, | |
| "loss": 4.5352, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.30333333333333334, | |
| "grad_norm": 2.396379232406616, | |
| "learning_rate": 3.948202930856697e-05, | |
| "loss": 4.3274, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.30444444444444446, | |
| "grad_norm": 2.183039426803589, | |
| "learning_rate": 3.941080790424484e-05, | |
| "loss": 4.6847, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3055555555555556, | |
| "grad_norm": 2.731586456298828, | |
| "learning_rate": 3.933941090877615e-05, | |
| "loss": 4.4002, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.30666666666666664, | |
| "grad_norm": 2.9183268547058105, | |
| "learning_rate": 3.92678391921108e-05, | |
| "loss": 4.7244, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.30777777777777776, | |
| "grad_norm": 2.450711965560913, | |
| "learning_rate": 3.919609362632753e-05, | |
| "loss": 4.5988, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3088888888888889, | |
| "grad_norm": 3.4320664405822754, | |
| "learning_rate": 3.912417508562345e-05, | |
| "loss": 4.4192, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 3.2206807136535645, | |
| "learning_rate": 3.905208444630327e-05, | |
| "loss": 4.3554, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": 3.739584445953369, | |
| "learning_rate": 3.897982258676867e-05, | |
| "loss": 4.4398, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.31222222222222223, | |
| "grad_norm": 3.239889144897461, | |
| "learning_rate": 3.8907390387507625e-05, | |
| "loss": 4.3802, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.31333333333333335, | |
| "grad_norm": 2.5824975967407227, | |
| "learning_rate": 3.883478873108361e-05, | |
| "loss": 4.7278, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.31444444444444447, | |
| "grad_norm": 2.3867881298065186, | |
| "learning_rate": 3.8762018502124894e-05, | |
| "loss": 4.5456, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.31555555555555553, | |
| "grad_norm": 3.0963456630706787, | |
| "learning_rate": 3.868908058731376e-05, | |
| "loss": 3.8366, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.31666666666666665, | |
| "grad_norm": 2.538454532623291, | |
| "learning_rate": 3.861597587537568e-05, | |
| "loss": 4.2254, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.31777777777777777, | |
| "grad_norm": 3.2098913192749023, | |
| "learning_rate": 3.85427052570685e-05, | |
| "loss": 5.1547, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3188888888888889, | |
| "grad_norm": 2.2710459232330322, | |
| "learning_rate": 3.8469269625171576e-05, | |
| "loss": 4.5172, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 3.4076178073883057, | |
| "learning_rate": 3.8395669874474915e-05, | |
| "loss": 4.2652, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3211111111111111, | |
| "grad_norm": 2.152460813522339, | |
| "learning_rate": 3.832190690176825e-05, | |
| "loss": 4.4623, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.32222222222222224, | |
| "grad_norm": 2.5687735080718994, | |
| "learning_rate": 3.824798160583012e-05, | |
| "loss": 4.7519, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3233333333333333, | |
| "grad_norm": 2.60406494140625, | |
| "learning_rate": 3.8173894887416945e-05, | |
| "loss": 4.7309, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.3244444444444444, | |
| "grad_norm": 3.08526349067688, | |
| "learning_rate": 3.8099647649251986e-05, | |
| "loss": 4.5816, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.32555555555555554, | |
| "grad_norm": 3.4984188079833984, | |
| "learning_rate": 3.802524079601442e-05, | |
| "loss": 4.5818, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.32666666666666666, | |
| "grad_norm": 4.528430938720703, | |
| "learning_rate": 3.795067523432826e-05, | |
| "loss": 4.1268, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3277777777777778, | |
| "grad_norm": 3.826263904571533, | |
| "learning_rate": 3.787595187275136e-05, | |
| "loss": 4.4605, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3288888888888889, | |
| "grad_norm": 2.9818341732025146, | |
| "learning_rate": 3.780107162176429e-05, | |
| "loss": 4.789, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.642854928970337, | |
| "learning_rate": 3.7726035393759285e-05, | |
| "loss": 4.7423, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.33111111111111113, | |
| "grad_norm": 2.6310813426971436, | |
| "learning_rate": 3.765084410302909e-05, | |
| "loss": 4.1751, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3322222222222222, | |
| "grad_norm": 4.018439769744873, | |
| "learning_rate": 3.757549866575588e-05, | |
| "loss": 4.5056, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.364558696746826, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 4.3372, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.33444444444444443, | |
| "grad_norm": 2.8973915576934814, | |
| "learning_rate": 3.742434902568889e-05, | |
| "loss": 4.3383, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.33555555555555555, | |
| "grad_norm": 3.0985381603240967, | |
| "learning_rate": 3.7348546664605777e-05, | |
| "loss": 4.3256, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.33666666666666667, | |
| "grad_norm": 2.420278310775757, | |
| "learning_rate": 3.727259384037852e-05, | |
| "loss": 4.1622, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3377777777777778, | |
| "grad_norm": 2.669018030166626, | |
| "learning_rate": 3.719649147846832e-05, | |
| "loss": 4.6532, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3388888888888889, | |
| "grad_norm": 2.5606846809387207, | |
| "learning_rate": 3.712024050615843e-05, | |
| "loss": 4.8205, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.7075791358947754, | |
| "learning_rate": 3.704384185254288e-05, | |
| "loss": 4.7873, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3411111111111111, | |
| "grad_norm": 2.576284170150757, | |
| "learning_rate": 3.696729644851518e-05, | |
| "loss": 4.2416, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3422222222222222, | |
| "grad_norm": 2.5355241298675537, | |
| "learning_rate": 3.689060522675689e-05, | |
| "loss": 4.6843, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3433333333333333, | |
| "grad_norm": 3.172502040863037, | |
| "learning_rate": 3.681376912172636e-05, | |
| "loss": 4.8512, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.34444444444444444, | |
| "grad_norm": 3.2482614517211914, | |
| "learning_rate": 3.673678906964727e-05, | |
| "loss": 4.6384, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.34555555555555556, | |
| "grad_norm": 3.223466634750366, | |
| "learning_rate": 3.665966600849728e-05, | |
| "loss": 4.593, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3466666666666667, | |
| "grad_norm": 3.437298536300659, | |
| "learning_rate": 3.6582400877996546e-05, | |
| "loss": 4.376, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3477777777777778, | |
| "grad_norm": 3.4591798782348633, | |
| "learning_rate": 3.6504994619596294e-05, | |
| "loss": 4.5369, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3488888888888889, | |
| "grad_norm": 2.1622931957244873, | |
| "learning_rate": 3.642744817646736e-05, | |
| "loss": 4.4165, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.5694704055786133, | |
| "learning_rate": 3.634976249348867e-05, | |
| "loss": 4.4281, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3511111111111111, | |
| "grad_norm": 3.8053269386291504, | |
| "learning_rate": 3.627193851723577e-05, | |
| "loss": 4.4582, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3522222222222222, | |
| "grad_norm": 2.7068371772766113, | |
| "learning_rate": 3.619397719596924e-05, | |
| "loss": 4.141, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.35333333333333333, | |
| "grad_norm": 3.1135122776031494, | |
| "learning_rate": 3.611587947962319e-05, | |
| "loss": 4.4257, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.35444444444444445, | |
| "grad_norm": 2.5378129482269287, | |
| "learning_rate": 3.603764631979363e-05, | |
| "loss": 4.495, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 3.563612937927246, | |
| "learning_rate": 3.5959278669726935e-05, | |
| "loss": 4.5916, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3566666666666667, | |
| "grad_norm": 3.765002727508545, | |
| "learning_rate": 3.588077748430819e-05, | |
| "loss": 4.7184, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.35777777777777775, | |
| "grad_norm": 2.8586552143096924, | |
| "learning_rate": 3.580214372004956e-05, | |
| "loss": 4.6151, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.35888888888888887, | |
| "grad_norm": 3.664820432662964, | |
| "learning_rate": 3.572337833507865e-05, | |
| "loss": 4.0914, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 2.808751344680786, | |
| "learning_rate": 3.564448228912682e-05, | |
| "loss": 4.4035, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3611111111111111, | |
| "grad_norm": 3.5564141273498535, | |
| "learning_rate": 3.556545654351749e-05, | |
| "loss": 4.4184, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3622222222222222, | |
| "grad_norm": 3.4853861331939697, | |
| "learning_rate": 3.548630206115443e-05, | |
| "loss": 4.5303, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.36333333333333334, | |
| "grad_norm": 3.2625653743743896, | |
| "learning_rate": 3.540701980651003e-05, | |
| "loss": 4.5295, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.36444444444444446, | |
| "grad_norm": 2.611847162246704, | |
| "learning_rate": 3.532761074561355e-05, | |
| "loss": 4.8323, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3655555555555556, | |
| "grad_norm": 2.4942116737365723, | |
| "learning_rate": 3.524807584603932e-05, | |
| "loss": 4.245, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.36666666666666664, | |
| "grad_norm": 2.132793664932251, | |
| "learning_rate": 3.516841607689501e-05, | |
| "loss": 4.6669, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.36777777777777776, | |
| "grad_norm": 2.5251405239105225, | |
| "learning_rate": 3.5088632408809755e-05, | |
| "loss": 5.0771, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3688888888888889, | |
| "grad_norm": 3.042750358581543, | |
| "learning_rate": 3.5008725813922386e-05, | |
| "loss": 4.562, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 3.445188283920288, | |
| "learning_rate": 3.4928697265869515e-05, | |
| "loss": 4.6474, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3711111111111111, | |
| "grad_norm": 2.4179251194000244, | |
| "learning_rate": 3.484854773977378e-05, | |
| "loss": 4.4356, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.37222222222222223, | |
| "grad_norm": 3.5280330181121826, | |
| "learning_rate": 3.476827821223184e-05, | |
| "loss": 4.1546, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 3.564363718032837, | |
| "learning_rate": 3.4687889661302576e-05, | |
| "loss": 4.7374, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.37444444444444447, | |
| "grad_norm": 2.1853668689727783, | |
| "learning_rate": 3.460738306649509e-05, | |
| "loss": 4.4073, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.37555555555555553, | |
| "grad_norm": 4.435403823852539, | |
| "learning_rate": 3.452675940875686e-05, | |
| "loss": 4.4693, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.37666666666666665, | |
| "grad_norm": 2.2928574085235596, | |
| "learning_rate": 3.444601967046168e-05, | |
| "loss": 4.1636, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.37777777777777777, | |
| "grad_norm": 2.858842611312866, | |
| "learning_rate": 3.436516483539781e-05, | |
| "loss": 4.7424, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3788888888888889, | |
| "grad_norm": 2.4767720699310303, | |
| "learning_rate": 3.428419588875588e-05, | |
| "loss": 4.6306, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 4.104574680328369, | |
| "learning_rate": 3.4203113817116957e-05, | |
| "loss": 4.2592, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3811111111111111, | |
| "grad_norm": 3.349961757659912, | |
| "learning_rate": 3.412191960844049e-05, | |
| "loss": 4.2351, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.38222222222222224, | |
| "grad_norm": 2.902287244796753, | |
| "learning_rate": 3.4040614252052305e-05, | |
| "loss": 4.1556, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.38333333333333336, | |
| "grad_norm": 2.7805283069610596, | |
| "learning_rate": 3.39591987386325e-05, | |
| "loss": 4.8241, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3844444444444444, | |
| "grad_norm": 3.494743585586548, | |
| "learning_rate": 3.387767406020343e-05, | |
| "loss": 4.4414, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.38555555555555554, | |
| "grad_norm": 3.4807887077331543, | |
| "learning_rate": 3.3796041210117546e-05, | |
| "loss": 4.4356, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.38666666666666666, | |
| "grad_norm": 2.875729560852051, | |
| "learning_rate": 3.3714301183045385e-05, | |
| "loss": 4.6104, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3877777777777778, | |
| "grad_norm": 2.3670778274536133, | |
| "learning_rate": 3.363245497496337e-05, | |
| "loss": 4.6181, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 3.511547088623047, | |
| "learning_rate": 3.355050358314172e-05, | |
| "loss": 4.5967, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 3.4935336112976074, | |
| "learning_rate": 3.346844800613229e-05, | |
| "loss": 4.4003, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.39111111111111113, | |
| "grad_norm": 3.7835776805877686, | |
| "learning_rate": 3.338628924375638e-05, | |
| "loss": 4.3227, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.39222222222222225, | |
| "grad_norm": 3.659778594970703, | |
| "learning_rate": 3.330402829709258e-05, | |
| "loss": 4.735, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3933333333333333, | |
| "grad_norm": 2.280862331390381, | |
| "learning_rate": 3.322166616846458e-05, | |
| "loss": 4.3854, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.39444444444444443, | |
| "grad_norm": 3.5552432537078857, | |
| "learning_rate": 3.313920386142892e-05, | |
| "loss": 4.494, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.39555555555555555, | |
| "grad_norm": 3.2624123096466064, | |
| "learning_rate": 3.305664238076278e-05, | |
| "loss": 4.441, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.39666666666666667, | |
| "grad_norm": 2.5981621742248535, | |
| "learning_rate": 3.2973982732451755e-05, | |
| "loss": 4.1962, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3977777777777778, | |
| "grad_norm": 3.8529744148254395, | |
| "learning_rate": 3.289122592367757e-05, | |
| "loss": 4.1754, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3988888888888889, | |
| "grad_norm": 3.049600839614868, | |
| "learning_rate": 3.2808372962805816e-05, | |
| "loss": 4.638, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 3.134927988052368, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 4.4763, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4011111111111111, | |
| "grad_norm": 2.298032522201538, | |
| "learning_rate": 3.264238262407764e-05, | |
| "loss": 4.8264, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.4022222222222222, | |
| "grad_norm": 3.3620493412017822, | |
| "learning_rate": 3.2559247268761115e-05, | |
| "loss": 4.3332, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4033333333333333, | |
| "grad_norm": 3.7335774898529053, | |
| "learning_rate": 3.247601980640217e-05, | |
| "loss": 4.351, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.40444444444444444, | |
| "grad_norm": 2.7008893489837646, | |
| "learning_rate": 3.239270125110117e-05, | |
| "loss": 4.3373, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.40555555555555556, | |
| "grad_norm": 2.3377201557159424, | |
| "learning_rate": 3.230929261806842e-05, | |
| "loss": 4.6638, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4066666666666667, | |
| "grad_norm": 2.796996831893921, | |
| "learning_rate": 3.222579492361179e-05, | |
| "loss": 4.4803, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4077777777777778, | |
| "grad_norm": 3.004497766494751, | |
| "learning_rate": 3.214220918512434e-05, | |
| "loss": 4.8025, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4088888888888889, | |
| "grad_norm": 2.762565851211548, | |
| "learning_rate": 3.205853642107192e-05, | |
| "loss": 4.3787, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 3.7421979904174805, | |
| "learning_rate": 3.1974777650980735e-05, | |
| "loss": 4.509, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4111111111111111, | |
| "grad_norm": 2.6516170501708984, | |
| "learning_rate": 3.1890933895424976e-05, | |
| "loss": 4.8942, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4122222222222222, | |
| "grad_norm": 3.2065646648406982, | |
| "learning_rate": 3.180700617601436e-05, | |
| "loss": 4.4659, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.41333333333333333, | |
| "grad_norm": 3.3278090953826904, | |
| "learning_rate": 3.172299551538164e-05, | |
| "loss": 4.3103, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.41444444444444445, | |
| "grad_norm": 3.3703420162200928, | |
| "learning_rate": 3.163890293717022e-05, | |
| "loss": 4.4312, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.41555555555555557, | |
| "grad_norm": 3.629591464996338, | |
| "learning_rate": 3.155472946602162e-05, | |
| "loss": 4.8446, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 3.0027589797973633, | |
| "learning_rate": 3.147047612756302e-05, | |
| "loss": 4.5204, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4177777777777778, | |
| "grad_norm": 3.2268624305725098, | |
| "learning_rate": 3.138614394839476e-05, | |
| "loss": 4.1844, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.41888888888888887, | |
| "grad_norm": 3.958721160888672, | |
| "learning_rate": 3.130173395607785e-05, | |
| "loss": 4.3071, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.1055030822753906, | |
| "learning_rate": 3.121724717912138e-05, | |
| "loss": 4.2903, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4211111111111111, | |
| "grad_norm": 3.0535993576049805, | |
| "learning_rate": 3.1132684646970064e-05, | |
| "loss": 4.5096, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4222222222222222, | |
| "grad_norm": 2.5504136085510254, | |
| "learning_rate": 3.104804738999169e-05, | |
| "loss": 4.4653, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.42333333333333334, | |
| "grad_norm": 3.1297409534454346, | |
| "learning_rate": 3.0963336439464526e-05, | |
| "loss": 4.3652, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.42444444444444446, | |
| "grad_norm": 2.2639200687408447, | |
| "learning_rate": 3.087855282756475e-05, | |
| "loss": 4.8119, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4255555555555556, | |
| "grad_norm": 2.581587314605713, | |
| "learning_rate": 3.079369758735393e-05, | |
| "loss": 4.0573, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 2.61521577835083, | |
| "learning_rate": 3.0708771752766394e-05, | |
| "loss": 4.3563, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.42777777777777776, | |
| "grad_norm": 3.1898045539855957, | |
| "learning_rate": 3.062377635859663e-05, | |
| "loss": 5.091, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4288888888888889, | |
| "grad_norm": 3.8822860717773438, | |
| "learning_rate": 3.053871244048669e-05, | |
| "loss": 4.1214, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 3.2095978260040283, | |
| "learning_rate": 3.045358103491357e-05, | |
| "loss": 4.6263, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4311111111111111, | |
| "grad_norm": 2.3496782779693604, | |
| "learning_rate": 3.0368383179176585e-05, | |
| "loss": 4.2816, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.43222222222222223, | |
| "grad_norm": 3.5458877086639404, | |
| "learning_rate": 3.028311991138472e-05, | |
| "loss": 4.1913, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.43333333333333335, | |
| "grad_norm": 2.9822726249694824, | |
| "learning_rate": 3.0197792270443982e-05, | |
| "loss": 4.5714, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.43444444444444447, | |
| "grad_norm": 2.740432024002075, | |
| "learning_rate": 3.0112401296044757e-05, | |
| "loss": 4.565, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.43555555555555553, | |
| "grad_norm": 3.301563024520874, | |
| "learning_rate": 3.002694802864912e-05, | |
| "loss": 4.4515, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.43666666666666665, | |
| "grad_norm": 3.0773849487304688, | |
| "learning_rate": 2.9941433509478156e-05, | |
| "loss": 4.4196, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.43777777777777777, | |
| "grad_norm": 2.6573879718780518, | |
| "learning_rate": 2.98558587804993e-05, | |
| "loss": 4.2847, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4388888888888889, | |
| "grad_norm": 3.498271942138672, | |
| "learning_rate": 2.9770224884413623e-05, | |
| "loss": 4.1811, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.6309518814086914, | |
| "learning_rate": 2.9684532864643122e-05, | |
| "loss": 4.223, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4411111111111111, | |
| "grad_norm": 3.3358840942382812, | |
| "learning_rate": 2.9598783765318007e-05, | |
| "loss": 4.1402, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.44222222222222224, | |
| "grad_norm": 2.367056369781494, | |
| "learning_rate": 2.9512978631264006e-05, | |
| "loss": 4.3926, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.44333333333333336, | |
| "grad_norm": 3.7848873138427734, | |
| "learning_rate": 2.9427118507989586e-05, | |
| "loss": 4.1587, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 2.969773769378662, | |
| "learning_rate": 2.9341204441673266e-05, | |
| "loss": 4.1827, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.44555555555555554, | |
| "grad_norm": 3.8604671955108643, | |
| "learning_rate": 2.9255237479150816e-05, | |
| "loss": 4.7396, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.44666666666666666, | |
| "grad_norm": 2.6314451694488525, | |
| "learning_rate": 2.916921866790256e-05, | |
| "loss": 4.7571, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4477777777777778, | |
| "grad_norm": 3.2930240631103516, | |
| "learning_rate": 2.908314905604056e-05, | |
| "loss": 4.6127, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4488888888888889, | |
| "grad_norm": 2.809821367263794, | |
| "learning_rate": 2.8997029692295874e-05, | |
| "loss": 4.9782, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 3.108168363571167, | |
| "learning_rate": 2.8910861626005776e-05, | |
| "loss": 4.8604, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.45111111111111113, | |
| "grad_norm": 3.2190604209899902, | |
| "learning_rate": 2.8824645907100954e-05, | |
| "loss": 4.4294, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.45222222222222225, | |
| "grad_norm": 3.8671491146087646, | |
| "learning_rate": 2.8738383586092745e-05, | |
| "loss": 4.5494, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4533333333333333, | |
| "grad_norm": 3.078843355178833, | |
| "learning_rate": 2.8652075714060295e-05, | |
| "loss": 4.4064, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.45444444444444443, | |
| "grad_norm": 2.9444501399993896, | |
| "learning_rate": 2.8565723342637796e-05, | |
| "loss": 4.6974, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.45555555555555555, | |
| "grad_norm": 3.3965888023376465, | |
| "learning_rate": 2.8479327524001636e-05, | |
| "loss": 4.6253, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.45666666666666667, | |
| "grad_norm": 2.8222734928131104, | |
| "learning_rate": 2.8392889310857612e-05, | |
| "loss": 4.4421, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4577777777777778, | |
| "grad_norm": 3.996683359146118, | |
| "learning_rate": 2.8306409756428064e-05, | |
| "loss": 3.9591, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4588888888888889, | |
| "grad_norm": 3.7605364322662354, | |
| "learning_rate": 2.8219889914439074e-05, | |
| "loss": 4.0585, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 3.199702739715576, | |
| "learning_rate": 2.8133330839107608e-05, | |
| "loss": 4.5694, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.46111111111111114, | |
| "grad_norm": 2.3009800910949707, | |
| "learning_rate": 2.8046733585128687e-05, | |
| "loss": 4.5112, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4622222222222222, | |
| "grad_norm": 3.8310000896453857, | |
| "learning_rate": 2.7960099207662532e-05, | |
| "loss": 4.1967, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4633333333333333, | |
| "grad_norm": 4.325408458709717, | |
| "learning_rate": 2.787342876232167e-05, | |
| "loss": 4.1846, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.46444444444444444, | |
| "grad_norm": 3.2290215492248535, | |
| "learning_rate": 2.7786723305158136e-05, | |
| "loss": 4.3148, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.46555555555555556, | |
| "grad_norm": 3.246396541595459, | |
| "learning_rate": 2.7699983892650573e-05, | |
| "loss": 4.2778, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.4666666666666667, | |
| "grad_norm": 3.091440200805664, | |
| "learning_rate": 2.761321158169134e-05, | |
| "loss": 4.6429, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4677777777777778, | |
| "grad_norm": 2.5828170776367188, | |
| "learning_rate": 2.7526407429573657e-05, | |
| "loss": 4.6686, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4688888888888889, | |
| "grad_norm": 2.3290863037109375, | |
| "learning_rate": 2.7439572493978736e-05, | |
| "loss": 4.1659, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 4.536559104919434, | |
| "learning_rate": 2.7352707832962865e-05, | |
| "loss": 3.9368, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4711111111111111, | |
| "grad_norm": 2.5557074546813965, | |
| "learning_rate": 2.726581450494451e-05, | |
| "loss": 4.7282, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4722222222222222, | |
| "grad_norm": 2.8363335132598877, | |
| "learning_rate": 2.717889356869146e-05, | |
| "loss": 4.4236, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.47333333333333333, | |
| "grad_norm": 4.480076313018799, | |
| "learning_rate": 2.7091946083307896e-05, | |
| "loss": 4.3532, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.47444444444444445, | |
| "grad_norm": 2.9685823917388916, | |
| "learning_rate": 2.7004973108221472e-05, | |
| "loss": 3.8564, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.47555555555555556, | |
| "grad_norm": 2.388016939163208, | |
| "learning_rate": 2.6917975703170466e-05, | |
| "loss": 4.6472, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4766666666666667, | |
| "grad_norm": 3.0806846618652344, | |
| "learning_rate": 2.6830954928190794e-05, | |
| "loss": 4.4021, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4777777777777778, | |
| "grad_norm": 2.4739530086517334, | |
| "learning_rate": 2.674391184360313e-05, | |
| "loss": 4.3841, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.47888888888888886, | |
| "grad_norm": 2.37337327003479, | |
| "learning_rate": 2.6656847510000012e-05, | |
| "loss": 4.5123, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.335357904434204, | |
| "learning_rate": 2.656976298823284e-05, | |
| "loss": 4.5225, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4811111111111111, | |
| "grad_norm": 2.888369083404541, | |
| "learning_rate": 2.6482659339399045e-05, | |
| "loss": 4.0533, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.4822222222222222, | |
| "grad_norm": 3.412767171859741, | |
| "learning_rate": 2.6395537624829096e-05, | |
| "loss": 4.6112, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.48333333333333334, | |
| "grad_norm": 3.0738167762756348, | |
| "learning_rate": 2.63083989060736e-05, | |
| "loss": 4.0807, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.48444444444444446, | |
| "grad_norm": 2.038522720336914, | |
| "learning_rate": 2.6221244244890336e-05, | |
| "loss": 4.3389, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4855555555555556, | |
| "grad_norm": 2.4719579219818115, | |
| "learning_rate": 2.6134074703231344e-05, | |
| "loss": 4.1841, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4866666666666667, | |
| "grad_norm": 2.653308153152466, | |
| "learning_rate": 2.604689134322999e-05, | |
| "loss": 4.6983, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.48777777777777775, | |
| "grad_norm": 3.113577127456665, | |
| "learning_rate": 2.5959695227188004e-05, | |
| "loss": 4.46, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4888888888888889, | |
| "grad_norm": 2.463456153869629, | |
| "learning_rate": 2.587248741756253e-05, | |
| "loss": 4.5327, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 3.733887195587158, | |
| "learning_rate": 2.578526897695321e-05, | |
| "loss": 4.2684, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4911111111111111, | |
| "grad_norm": 3.1210319995880127, | |
| "learning_rate": 2.5698040968089225e-05, | |
| "loss": 4.2219, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4922222222222222, | |
| "grad_norm": 2.173025369644165, | |
| "learning_rate": 2.5610804453816333e-05, | |
| "loss": 4.7141, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.49333333333333335, | |
| "grad_norm": 3.4339637756347656, | |
| "learning_rate": 2.5523560497083926e-05, | |
| "loss": 4.8913, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.49444444444444446, | |
| "grad_norm": 3.317446231842041, | |
| "learning_rate": 2.5436310160932092e-05, | |
| "loss": 4.6514, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4955555555555556, | |
| "grad_norm": 3.317653179168701, | |
| "learning_rate": 2.5349054508478637e-05, | |
| "loss": 4.7476, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.49666666666666665, | |
| "grad_norm": 2.9672698974609375, | |
| "learning_rate": 2.5261794602906145e-05, | |
| "loss": 4.6061, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.49777777777777776, | |
| "grad_norm": 3.32000470161438, | |
| "learning_rate": 2.517453150744904e-05, | |
| "loss": 4.1218, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4988888888888889, | |
| "grad_norm": 4.345942497253418, | |
| "learning_rate": 2.5087266285380596e-05, | |
| "loss": 4.4872, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.055283784866333, | |
| "learning_rate": 2.5e-05, | |
| "loss": 4.2396, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5011111111111111, | |
| "grad_norm": 3.0910513401031494, | |
| "learning_rate": 2.4912733714619417e-05, | |
| "loss": 4.9336, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5022222222222222, | |
| "grad_norm": 2.6645710468292236, | |
| "learning_rate": 2.4825468492550964e-05, | |
| "loss": 4.7848, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5033333333333333, | |
| "grad_norm": 3.63999342918396, | |
| "learning_rate": 2.4738205397093864e-05, | |
| "loss": 4.357, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5044444444444445, | |
| "grad_norm": 2.507779598236084, | |
| "learning_rate": 2.4650945491521372e-05, | |
| "loss": 4.5025, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5055555555555555, | |
| "grad_norm": 2.3816704750061035, | |
| "learning_rate": 2.4563689839067913e-05, | |
| "loss": 4.5438, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5066666666666667, | |
| "grad_norm": 3.367776870727539, | |
| "learning_rate": 2.447643950291608e-05, | |
| "loss": 3.8387, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5077777777777778, | |
| "grad_norm": 3.0507357120513916, | |
| "learning_rate": 2.4389195546183673e-05, | |
| "loss": 4.3121, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5088888888888888, | |
| "grad_norm": 4.110062122344971, | |
| "learning_rate": 2.4301959031910784e-05, | |
| "loss": 4.2072, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 3.110203504562378, | |
| "learning_rate": 2.4214731023046793e-05, | |
| "loss": 4.3653, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5111111111111111, | |
| "grad_norm": 3.3167712688446045, | |
| "learning_rate": 2.4127512582437485e-05, | |
| "loss": 4.2322, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5122222222222222, | |
| "grad_norm": 2.507969856262207, | |
| "learning_rate": 2.4040304772812002e-05, | |
| "loss": 4.382, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5133333333333333, | |
| "grad_norm": 3.365709066390991, | |
| "learning_rate": 2.3953108656770016e-05, | |
| "loss": 4.0521, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5144444444444445, | |
| "grad_norm": 2.8844547271728516, | |
| "learning_rate": 2.386592529676866e-05, | |
| "loss": 4.5033, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5155555555555555, | |
| "grad_norm": 3.0520310401916504, | |
| "learning_rate": 2.377875575510967e-05, | |
| "loss": 4.8204, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5166666666666667, | |
| "grad_norm": 4.429820537567139, | |
| "learning_rate": 2.3691601093926404e-05, | |
| "loss": 4.3879, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5177777777777778, | |
| "grad_norm": 2.3193674087524414, | |
| "learning_rate": 2.3604462375170906e-05, | |
| "loss": 4.0077, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5188888888888888, | |
| "grad_norm": 3.9970319271087646, | |
| "learning_rate": 2.3517340660600964e-05, | |
| "loss": 4.2043, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 2.8194077014923096, | |
| "learning_rate": 2.3430237011767167e-05, | |
| "loss": 4.5441, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5211111111111111, | |
| "grad_norm": 2.7785353660583496, | |
| "learning_rate": 2.3343152490000004e-05, | |
| "loss": 5.0854, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5222222222222223, | |
| "grad_norm": 3.0042474269866943, | |
| "learning_rate": 2.3256088156396868e-05, | |
| "loss": 4.7248, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5233333333333333, | |
| "grad_norm": 3.4051711559295654, | |
| "learning_rate": 2.3169045071809215e-05, | |
| "loss": 4.1062, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5244444444444445, | |
| "grad_norm": 3.4314067363739014, | |
| "learning_rate": 2.3082024296829536e-05, | |
| "loss": 4.1021, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5255555555555556, | |
| "grad_norm": 3.356543779373169, | |
| "learning_rate": 2.299502689177853e-05, | |
| "loss": 4.1495, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5266666666666666, | |
| "grad_norm": 4.4954633712768555, | |
| "learning_rate": 2.2908053916692117e-05, | |
| "loss": 4.1691, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5277777777777778, | |
| "grad_norm": 3.4520392417907715, | |
| "learning_rate": 2.2821106431308544e-05, | |
| "loss": 4.5688, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5288888888888889, | |
| "grad_norm": 2.547987699508667, | |
| "learning_rate": 2.2734185495055503e-05, | |
| "loss": 4.8845, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 2.991994619369507, | |
| "learning_rate": 2.2647292167037144e-05, | |
| "loss": 4.5536, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5311111111111111, | |
| "grad_norm": 3.409557819366455, | |
| "learning_rate": 2.2560427506021266e-05, | |
| "loss": 4.674, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5322222222222223, | |
| "grad_norm": 2.5158376693725586, | |
| "learning_rate": 2.247359257042634e-05, | |
| "loss": 3.8624, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 5.460207939147949, | |
| "learning_rate": 2.238678841830867e-05, | |
| "loss": 3.9847, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5344444444444445, | |
| "grad_norm": 3.18215274810791, | |
| "learning_rate": 2.230001610734943e-05, | |
| "loss": 4.4479, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5355555555555556, | |
| "grad_norm": 3.3176145553588867, | |
| "learning_rate": 2.2213276694841866e-05, | |
| "loss": 4.5647, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5366666666666666, | |
| "grad_norm": 2.4659605026245117, | |
| "learning_rate": 2.212657123767834e-05, | |
| "loss": 4.6482, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5377777777777778, | |
| "grad_norm": 3.418905019760132, | |
| "learning_rate": 2.2039900792337474e-05, | |
| "loss": 4.0517, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5388888888888889, | |
| "grad_norm": 2.7777280807495117, | |
| "learning_rate": 2.195326641487132e-05, | |
| "loss": 3.9006, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 2.1448440551757812, | |
| "learning_rate": 2.186666916089239e-05, | |
| "loss": 4.5849, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5411111111111111, | |
| "grad_norm": 2.304466485977173, | |
| "learning_rate": 2.1780110085560935e-05, | |
| "loss": 4.6166, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5422222222222223, | |
| "grad_norm": 4.101543426513672, | |
| "learning_rate": 2.1693590243571938e-05, | |
| "loss": 4.1855, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5433333333333333, | |
| "grad_norm": 4.019572734832764, | |
| "learning_rate": 2.1607110689142393e-05, | |
| "loss": 4.511, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5444444444444444, | |
| "grad_norm": 3.2479324340820312, | |
| "learning_rate": 2.1520672475998373e-05, | |
| "loss": 4.694, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5455555555555556, | |
| "grad_norm": 2.5916500091552734, | |
| "learning_rate": 2.1434276657362213e-05, | |
| "loss": 4.4817, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5466666666666666, | |
| "grad_norm": 2.4654204845428467, | |
| "learning_rate": 2.1347924285939714e-05, | |
| "loss": 4.241, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5477777777777778, | |
| "grad_norm": 3.4962589740753174, | |
| "learning_rate": 2.1261616413907265e-05, | |
| "loss": 4.2751, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5488888888888889, | |
| "grad_norm": 2.41613507270813, | |
| "learning_rate": 2.117535409289905e-05, | |
| "loss": 4.6923, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 2.8937885761260986, | |
| "learning_rate": 2.1089138373994223e-05, | |
| "loss": 4.5164, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5511111111111111, | |
| "grad_norm": 3.6244256496429443, | |
| "learning_rate": 2.1002970307704132e-05, | |
| "loss": 4.0702, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5522222222222222, | |
| "grad_norm": 2.670847177505493, | |
| "learning_rate": 2.0916850943959452e-05, | |
| "loss": 4.1953, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5533333333333333, | |
| "grad_norm": 3.030318021774292, | |
| "learning_rate": 2.0830781332097446e-05, | |
| "loss": 4.2784, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5544444444444444, | |
| "grad_norm": 3.4792563915252686, | |
| "learning_rate": 2.0744762520849193e-05, | |
| "loss": 4.7421, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 4.749427318572998, | |
| "learning_rate": 2.0658795558326743e-05, | |
| "loss": 4.0809, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5566666666666666, | |
| "grad_norm": 2.382559061050415, | |
| "learning_rate": 2.057288149201042e-05, | |
| "loss": 4.2373, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5577777777777778, | |
| "grad_norm": 4.044615745544434, | |
| "learning_rate": 2.0487021368736003e-05, | |
| "loss": 4.3278, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5588888888888889, | |
| "grad_norm": 2.4403457641601562, | |
| "learning_rate": 2.0401216234681995e-05, | |
| "loss": 4.6996, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.6123414039611816, | |
| "learning_rate": 2.031546713535688e-05, | |
| "loss": 3.7175, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5611111111111111, | |
| "grad_norm": 2.930072784423828, | |
| "learning_rate": 2.022977511558638e-05, | |
| "loss": 3.8051, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5622222222222222, | |
| "grad_norm": 2.092438220977783, | |
| "learning_rate": 2.0144141219500705e-05, | |
| "loss": 4.6131, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5633333333333334, | |
| "grad_norm": 3.791438579559326, | |
| "learning_rate": 2.0058566490521847e-05, | |
| "loss": 4.5328, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5644444444444444, | |
| "grad_norm": 2.1941120624542236, | |
| "learning_rate": 1.9973051971350888e-05, | |
| "loss": 4.0611, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5655555555555556, | |
| "grad_norm": 2.723223924636841, | |
| "learning_rate": 1.9887598703955242e-05, | |
| "loss": 4.2184, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5666666666666667, | |
| "grad_norm": 2.9518237113952637, | |
| "learning_rate": 1.980220772955602e-05, | |
| "loss": 4.3172, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5677777777777778, | |
| "grad_norm": 3.06872296333313, | |
| "learning_rate": 1.9716880088615285e-05, | |
| "loss": 4.2687, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 2.8538174629211426, | |
| "learning_rate": 1.963161682082342e-05, | |
| "loss": 4.265, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.3108673095703125, | |
| "learning_rate": 1.9546418965086442e-05, | |
| "loss": 4.5094, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5711111111111111, | |
| "grad_norm": 3.3742525577545166, | |
| "learning_rate": 1.946128755951332e-05, | |
| "loss": 4.4563, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5722222222222222, | |
| "grad_norm": 2.6200695037841797, | |
| "learning_rate": 1.937622364140338e-05, | |
| "loss": 4.3567, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5733333333333334, | |
| "grad_norm": 2.5701615810394287, | |
| "learning_rate": 1.9291228247233605e-05, | |
| "loss": 4.2645, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5744444444444444, | |
| "grad_norm": 4.248501777648926, | |
| "learning_rate": 1.920630241264607e-05, | |
| "loss": 3.8413, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5755555555555556, | |
| "grad_norm": 3.4751811027526855, | |
| "learning_rate": 1.912144717243525e-05, | |
| "loss": 4.258, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5766666666666667, | |
| "grad_norm": 2.8151302337646484, | |
| "learning_rate": 1.9036663560535483e-05, | |
| "loss": 4.4939, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5777777777777777, | |
| "grad_norm": 3.2205138206481934, | |
| "learning_rate": 1.895195261000831e-05, | |
| "loss": 4.7516, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5788888888888889, | |
| "grad_norm": 2.6713924407958984, | |
| "learning_rate": 1.8867315353029935e-05, | |
| "loss": 4.3591, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 3.1901915073394775, | |
| "learning_rate": 1.8782752820878634e-05, | |
| "loss": 4.2607, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5811111111111111, | |
| "grad_norm": 3.473564863204956, | |
| "learning_rate": 1.869826604392216e-05, | |
| "loss": 4.125, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5822222222222222, | |
| "grad_norm": 2.8697259426116943, | |
| "learning_rate": 1.8613856051605243e-05, | |
| "loss": 4.2696, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 2.74684739112854, | |
| "learning_rate": 1.852952387243698e-05, | |
| "loss": 4.2943, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5844444444444444, | |
| "grad_norm": 2.807659387588501, | |
| "learning_rate": 1.8445270533978388e-05, | |
| "loss": 4.6892, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5855555555555556, | |
| "grad_norm": 2.5258119106292725, | |
| "learning_rate": 1.8361097062829778e-05, | |
| "loss": 4.4269, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 4.046256065368652, | |
| "learning_rate": 1.827700448461836e-05, | |
| "loss": 4.4027, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5877777777777777, | |
| "grad_norm": 2.256350517272949, | |
| "learning_rate": 1.8192993823985643e-05, | |
| "loss": 4.1628, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5888888888888889, | |
| "grad_norm": 2.6388349533081055, | |
| "learning_rate": 1.8109066104575023e-05, | |
| "loss": 5.2738, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 3.1763365268707275, | |
| "learning_rate": 1.802522234901927e-05, | |
| "loss": 4.4906, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5911111111111111, | |
| "grad_norm": 2.969287157058716, | |
| "learning_rate": 1.7941463578928086e-05, | |
| "loss": 5.1068, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5922222222222222, | |
| "grad_norm": 4.471690654754639, | |
| "learning_rate": 1.7857790814875663e-05, | |
| "loss": 4.1047, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5933333333333334, | |
| "grad_norm": 3.2363221645355225, | |
| "learning_rate": 1.7774205076388206e-05, | |
| "loss": 4.587, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5944444444444444, | |
| "grad_norm": 2.6446151733398438, | |
| "learning_rate": 1.7690707381931583e-05, | |
| "loss": 4.4606, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5955555555555555, | |
| "grad_norm": 3.1010208129882812, | |
| "learning_rate": 1.7607298748898842e-05, | |
| "loss": 4.3764, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5966666666666667, | |
| "grad_norm": 2.4426517486572266, | |
| "learning_rate": 1.7523980193597836e-05, | |
| "loss": 4.4518, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5977777777777777, | |
| "grad_norm": 1.913076400756836, | |
| "learning_rate": 1.744075273123889e-05, | |
| "loss": 4.2522, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5988888888888889, | |
| "grad_norm": 2.492178440093994, | |
| "learning_rate": 1.735761737592236e-05, | |
| "loss": 4.5726, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.457730531692505, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 4.662, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6011111111111112, | |
| "grad_norm": 2.3263602256774902, | |
| "learning_rate": 1.7191627037194186e-05, | |
| "loss": 4.4692, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6022222222222222, | |
| "grad_norm": 3.4461264610290527, | |
| "learning_rate": 1.7108774076322443e-05, | |
| "loss": 4.287, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6033333333333334, | |
| "grad_norm": 3.4049248695373535, | |
| "learning_rate": 1.702601726754825e-05, | |
| "loss": 3.8536, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6044444444444445, | |
| "grad_norm": 3.2425801753997803, | |
| "learning_rate": 1.6943357619237226e-05, | |
| "loss": 3.8095, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6055555555555555, | |
| "grad_norm": 3.209322452545166, | |
| "learning_rate": 1.686079613857109e-05, | |
| "loss": 4.0113, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.6066666666666667, | |
| "grad_norm": 2.507138729095459, | |
| "learning_rate": 1.677833383153542e-05, | |
| "loss": 4.3496, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6077777777777778, | |
| "grad_norm": 3.377285957336426, | |
| "learning_rate": 1.6695971702907426e-05, | |
| "loss": 4.2639, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6088888888888889, | |
| "grad_norm": 3.625976324081421, | |
| "learning_rate": 1.6613710756243626e-05, | |
| "loss": 4.1149, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 2.757136821746826, | |
| "learning_rate": 1.6531551993867717e-05, | |
| "loss": 4.0329, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 3.1707332134246826, | |
| "learning_rate": 1.6449496416858284e-05, | |
| "loss": 4.3594, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6122222222222222, | |
| "grad_norm": 3.1145691871643066, | |
| "learning_rate": 1.6367545025036636e-05, | |
| "loss": 4.6404, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6133333333333333, | |
| "grad_norm": 3.2511072158813477, | |
| "learning_rate": 1.6285698816954624e-05, | |
| "loss": 4.0102, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6144444444444445, | |
| "grad_norm": 3.0803847312927246, | |
| "learning_rate": 1.6203958789882456e-05, | |
| "loss": 4.2037, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6155555555555555, | |
| "grad_norm": 2.6308162212371826, | |
| "learning_rate": 1.612232593979658e-05, | |
| "loss": 4.1265, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6166666666666667, | |
| "grad_norm": 2.719158172607422, | |
| "learning_rate": 1.6040801261367493e-05, | |
| "loss": 4.4969, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6177777777777778, | |
| "grad_norm": 4.231455326080322, | |
| "learning_rate": 1.5959385747947698e-05, | |
| "loss": 3.8209, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6188888888888889, | |
| "grad_norm": 3.452610731124878, | |
| "learning_rate": 1.5878080391559508e-05, | |
| "loss": 4.8977, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.3357810974121094, | |
| "learning_rate": 1.5796886182883053e-05, | |
| "loss": 4.2065, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6211111111111111, | |
| "grad_norm": 3.3810150623321533, | |
| "learning_rate": 1.5715804111244137e-05, | |
| "loss": 4.1377, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 2.561292886734009, | |
| "learning_rate": 1.56348351646022e-05, | |
| "loss": 4.304, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6233333333333333, | |
| "grad_norm": 4.378098011016846, | |
| "learning_rate": 1.5553980329538326e-05, | |
| "loss": 4.2043, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6244444444444445, | |
| "grad_norm": 2.817155599594116, | |
| "learning_rate": 1.547324059124315e-05, | |
| "loss": 4.8234, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6255555555555555, | |
| "grad_norm": 2.7013378143310547, | |
| "learning_rate": 1.539261693350491e-05, | |
| "loss": 4.5842, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6266666666666667, | |
| "grad_norm": 3.0469796657562256, | |
| "learning_rate": 1.5312110338697426e-05, | |
| "loss": 4.475, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6277777777777778, | |
| "grad_norm": 2.944330930709839, | |
| "learning_rate": 1.523172178776816e-05, | |
| "loss": 4.7153, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6288888888888889, | |
| "grad_norm": 3.1219630241394043, | |
| "learning_rate": 1.5151452260226224e-05, | |
| "loss": 4.2081, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 3.091395139694214, | |
| "learning_rate": 1.5071302734130489e-05, | |
| "loss": 3.8313, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6311111111111111, | |
| "grad_norm": 3.610748767852783, | |
| "learning_rate": 1.4991274186077632e-05, | |
| "loss": 4.3207, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6322222222222222, | |
| "grad_norm": 2.4512412548065186, | |
| "learning_rate": 1.4911367591190248e-05, | |
| "loss": 4.6405, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6333333333333333, | |
| "grad_norm": 2.7447104454040527, | |
| "learning_rate": 1.4831583923104999e-05, | |
| "loss": 4.3182, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6344444444444445, | |
| "grad_norm": 2.834606409072876, | |
| "learning_rate": 1.475192415396068e-05, | |
| "loss": 4.0676, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6355555555555555, | |
| "grad_norm": 2.1336636543273926, | |
| "learning_rate": 1.467238925438646e-05, | |
| "loss": 4.4181, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6366666666666667, | |
| "grad_norm": 2.7370517253875732, | |
| "learning_rate": 1.4592980193489975e-05, | |
| "loss": 4.2872, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6377777777777778, | |
| "grad_norm": 2.991546392440796, | |
| "learning_rate": 1.4513697938845572e-05, | |
| "loss": 3.8864, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6388888888888888, | |
| "grad_norm": 2.664534330368042, | |
| "learning_rate": 1.443454345648252e-05, | |
| "loss": 4.5979, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.904681921005249, | |
| "learning_rate": 1.4355517710873184e-05, | |
| "loss": 4.2368, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6411111111111111, | |
| "grad_norm": 3.317148447036743, | |
| "learning_rate": 1.4276621664921357e-05, | |
| "loss": 3.8986, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6422222222222222, | |
| "grad_norm": 3.1722943782806396, | |
| "learning_rate": 1.4197856279950438e-05, | |
| "loss": 4.3489, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6433333333333333, | |
| "grad_norm": 2.8325436115264893, | |
| "learning_rate": 1.4119222515691816e-05, | |
| "loss": 4.7594, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6444444444444445, | |
| "grad_norm": 3.2218034267425537, | |
| "learning_rate": 1.4040721330273062e-05, | |
| "loss": 4.2976, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6455555555555555, | |
| "grad_norm": 3.36842679977417, | |
| "learning_rate": 1.3962353680206373e-05, | |
| "loss": 4.2204, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6466666666666666, | |
| "grad_norm": 3.8246467113494873, | |
| "learning_rate": 1.388412052037682e-05, | |
| "loss": 4.1247, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6477777777777778, | |
| "grad_norm": 3.131218910217285, | |
| "learning_rate": 1.380602280403076e-05, | |
| "loss": 4.3663, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6488888888888888, | |
| "grad_norm": 3.3939664363861084, | |
| "learning_rate": 1.3728061482764238e-05, | |
| "loss": 4.3556, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.252523183822632, | |
| "learning_rate": 1.3650237506511331e-05, | |
| "loss": 4.0815, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6511111111111111, | |
| "grad_norm": 4.023004055023193, | |
| "learning_rate": 1.3572551823532654e-05, | |
| "loss": 4.6529, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6522222222222223, | |
| "grad_norm": 2.3044891357421875, | |
| "learning_rate": 1.349500538040371e-05, | |
| "loss": 4.7574, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6533333333333333, | |
| "grad_norm": 2.99569034576416, | |
| "learning_rate": 1.3417599122003464e-05, | |
| "loss": 4.1623, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6544444444444445, | |
| "grad_norm": 3.384570360183716, | |
| "learning_rate": 1.3340333991502724e-05, | |
| "loss": 4.3638, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6555555555555556, | |
| "grad_norm": 3.1633384227752686, | |
| "learning_rate": 1.3263210930352737e-05, | |
| "loss": 4.2265, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6566666666666666, | |
| "grad_norm": 2.922513484954834, | |
| "learning_rate": 1.3186230878273653e-05, | |
| "loss": 4.2723, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6577777777777778, | |
| "grad_norm": 2.408703327178955, | |
| "learning_rate": 1.3109394773243117e-05, | |
| "loss": 4.3487, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6588888888888889, | |
| "grad_norm": 2.839890718460083, | |
| "learning_rate": 1.3032703551484832e-05, | |
| "loss": 4.3941, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.723208427429199, | |
| "learning_rate": 1.2956158147457115e-05, | |
| "loss": 4.5581, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6611111111111111, | |
| "grad_norm": 3.162594795227051, | |
| "learning_rate": 1.2879759493841575e-05, | |
| "loss": 4.6902, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6622222222222223, | |
| "grad_norm": 2.862002372741699, | |
| "learning_rate": 1.280350852153168e-05, | |
| "loss": 4.6792, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6633333333333333, | |
| "grad_norm": 3.029798746109009, | |
| "learning_rate": 1.272740615962148e-05, | |
| "loss": 4.2943, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6644444444444444, | |
| "grad_norm": 2.505032539367676, | |
| "learning_rate": 1.2651453335394231e-05, | |
| "loss": 4.373, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6655555555555556, | |
| "grad_norm": 3.039720058441162, | |
| "learning_rate": 1.2575650974311119e-05, | |
| "loss": 4.5615, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.9847793579101562, | |
| "learning_rate": 1.2500000000000006e-05, | |
| "loss": 4.7977, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6677777777777778, | |
| "grad_norm": 2.6300618648529053, | |
| "learning_rate": 1.2424501334244123e-05, | |
| "loss": 4.3867, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6688888888888889, | |
| "grad_norm": 3.1111793518066406, | |
| "learning_rate": 1.234915589697091e-05, | |
| "loss": 4.3681, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 3.274426221847534, | |
| "learning_rate": 1.2273964606240718e-05, | |
| "loss": 4.3242, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6711111111111111, | |
| "grad_norm": 2.1483371257781982, | |
| "learning_rate": 1.2198928378235716e-05, | |
| "loss": 4.468, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6722222222222223, | |
| "grad_norm": 3.4644694328308105, | |
| "learning_rate": 1.2124048127248644e-05, | |
| "loss": 4.5052, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6733333333333333, | |
| "grad_norm": 2.960430860519409, | |
| "learning_rate": 1.2049324765671749e-05, | |
| "loss": 4.3371, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6744444444444444, | |
| "grad_norm": 3.3487277030944824, | |
| "learning_rate": 1.19747592039856e-05, | |
| "loss": 4.8553, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6755555555555556, | |
| "grad_norm": 3.0505776405334473, | |
| "learning_rate": 1.1900352350748026e-05, | |
| "loss": 4.2182, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6766666666666666, | |
| "grad_norm": 3.3368704319000244, | |
| "learning_rate": 1.1826105112583061e-05, | |
| "loss": 3.9389, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6777777777777778, | |
| "grad_norm": 3.4237194061279297, | |
| "learning_rate": 1.175201839416988e-05, | |
| "loss": 4.4959, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6788888888888889, | |
| "grad_norm": 2.389981269836426, | |
| "learning_rate": 1.167809309823175e-05, | |
| "loss": 4.1658, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 4.309886455535889, | |
| "learning_rate": 1.1604330125525079e-05, | |
| "loss": 3.8112, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6811111111111111, | |
| "grad_norm": 2.3204903602600098, | |
| "learning_rate": 1.1530730374828422e-05, | |
| "loss": 4.2661, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6822222222222222, | |
| "grad_norm": 3.4007372856140137, | |
| "learning_rate": 1.1457294742931507e-05, | |
| "loss": 4.3726, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6833333333333333, | |
| "grad_norm": 3.3282923698425293, | |
| "learning_rate": 1.1384024124624324e-05, | |
| "loss": 4.3936, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6844444444444444, | |
| "grad_norm": 3.03949236869812, | |
| "learning_rate": 1.1310919412686247e-05, | |
| "loss": 3.7927, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6855555555555556, | |
| "grad_norm": 2.703687906265259, | |
| "learning_rate": 1.123798149787511e-05, | |
| "loss": 4.5, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6866666666666666, | |
| "grad_norm": 3.9763340950012207, | |
| "learning_rate": 1.11652112689164e-05, | |
| "loss": 3.7004, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6877777777777778, | |
| "grad_norm": 4.126737594604492, | |
| "learning_rate": 1.109260961249238e-05, | |
| "loss": 4.308, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6888888888888889, | |
| "grad_norm": 2.2291946411132812, | |
| "learning_rate": 1.1020177413231334e-05, | |
| "loss": 4.4126, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.0533227920532227, | |
| "learning_rate": 1.0947915553696742e-05, | |
| "loss": 3.8855, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6911111111111111, | |
| "grad_norm": 3.887996196746826, | |
| "learning_rate": 1.0875824914376553e-05, | |
| "loss": 4.3823, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6922222222222222, | |
| "grad_norm": 2.5938265323638916, | |
| "learning_rate": 1.0803906373672476e-05, | |
| "loss": 4.2245, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 3.3216285705566406, | |
| "learning_rate": 1.0732160807889211e-05, | |
| "loss": 4.1916, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6944444444444444, | |
| "grad_norm": 3.3373751640319824, | |
| "learning_rate": 1.0660589091223855e-05, | |
| "loss": 4.3503, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6955555555555556, | |
| "grad_norm": 2.2890090942382812, | |
| "learning_rate": 1.058919209575517e-05, | |
| "loss": 4.0717, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6966666666666667, | |
| "grad_norm": 4.276199817657471, | |
| "learning_rate": 1.0517970691433035e-05, | |
| "loss": 4.7337, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6977777777777778, | |
| "grad_norm": 2.4809815883636475, | |
| "learning_rate": 1.0446925746067768e-05, | |
| "loss": 4.5435, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6988888888888889, | |
| "grad_norm": 4.138044357299805, | |
| "learning_rate": 1.0376058125319613e-05, | |
| "loss": 4.2157, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.8675029277801514, | |
| "learning_rate": 1.0305368692688174e-05, | |
| "loss": 4.7462, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7011111111111111, | |
| "grad_norm": 2.406512498855591, | |
| "learning_rate": 1.0234858309501862e-05, | |
| "loss": 3.879, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7022222222222222, | |
| "grad_norm": 2.695511817932129, | |
| "learning_rate": 1.0164527834907467e-05, | |
| "loss": 4.4111, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7033333333333334, | |
| "grad_norm": 2.458010673522949, | |
| "learning_rate": 1.0094378125859602e-05, | |
| "loss": 4.5713, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.7044444444444444, | |
| "grad_norm": 3.0153543949127197, | |
| "learning_rate": 1.0024410037110357e-05, | |
| "loss": 4.3096, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7055555555555556, | |
| "grad_norm": 2.6395087242126465, | |
| "learning_rate": 9.954624421198792e-06, | |
| "loss": 4.4095, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7066666666666667, | |
| "grad_norm": 2.174259662628174, | |
| "learning_rate": 9.88502212844063e-06, | |
| "loss": 4.396, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7077777777777777, | |
| "grad_norm": 2.7941372394561768, | |
| "learning_rate": 9.815604006917839e-06, | |
| "loss": 4.1945, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7088888888888889, | |
| "grad_norm": 3.5960729122161865, | |
| "learning_rate": 9.746370902468311e-06, | |
| "loss": 4.4889, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 3.816847562789917, | |
| "learning_rate": 9.677323658675594e-06, | |
| "loss": 4.6575, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 2.2747933864593506, | |
| "learning_rate": 9.608463116858542e-06, | |
| "loss": 4.5163, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7122222222222222, | |
| "grad_norm": 3.0500824451446533, | |
| "learning_rate": 9.539790116061151e-06, | |
| "loss": 4.5815, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7133333333333334, | |
| "grad_norm": 3.7249062061309814, | |
| "learning_rate": 9.471305493042243e-06, | |
| "loss": 4.206, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7144444444444444, | |
| "grad_norm": 3.3056654930114746, | |
| "learning_rate": 9.403010082265351e-06, | |
| "loss": 4.3168, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7155555555555555, | |
| "grad_norm": 3.2421038150787354, | |
| "learning_rate": 9.334904715888495e-06, | |
| "loss": 4.815, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7166666666666667, | |
| "grad_norm": 3.044701337814331, | |
| "learning_rate": 9.266990223754069e-06, | |
| "loss": 4.4632, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7177777777777777, | |
| "grad_norm": 3.229823589324951, | |
| "learning_rate": 9.199267433378727e-06, | |
| "loss": 4.5471, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7188888888888889, | |
| "grad_norm": 2.7537453174591064, | |
| "learning_rate": 9.131737169943314e-06, | |
| "loss": 4.0644, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.645606517791748, | |
| "learning_rate": 9.064400256282757e-06, | |
| "loss": 4.3359, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7211111111111111, | |
| "grad_norm": 2.989220380783081, | |
| "learning_rate": 8.997257512876108e-06, | |
| "loss": 4.7121, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 2.0871849060058594, | |
| "learning_rate": 8.930309757836517e-06, | |
| "loss": 4.7126, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7233333333333334, | |
| "grad_norm": 2.9518845081329346, | |
| "learning_rate": 8.863557806901233e-06, | |
| "loss": 3.8763, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7244444444444444, | |
| "grad_norm": 3.165712356567383, | |
| "learning_rate": 8.797002473421728e-06, | |
| "loss": 4.0458, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7255555555555555, | |
| "grad_norm": 2.202949285507202, | |
| "learning_rate": 8.73064456835373e-06, | |
| "loss": 4.5542, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7266666666666667, | |
| "grad_norm": 3.328310489654541, | |
| "learning_rate": 8.664484900247363e-06, | |
| "loss": 4.3315, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7277777777777777, | |
| "grad_norm": 3.02006459236145, | |
| "learning_rate": 8.598524275237322e-06, | |
| "loss": 4.4394, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7288888888888889, | |
| "grad_norm": 2.5074918270111084, | |
| "learning_rate": 8.532763497032987e-06, | |
| "loss": 4.2377, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 2.847383975982666, | |
| "learning_rate": 8.467203366908707e-06, | |
| "loss": 4.1128, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7311111111111112, | |
| "grad_norm": 2.9188661575317383, | |
| "learning_rate": 8.40184468369396e-06, | |
| "loss": 4.2968, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7322222222222222, | |
| "grad_norm": 3.0603976249694824, | |
| "learning_rate": 8.33668824376369e-06, | |
| "loss": 4.175, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7333333333333333, | |
| "grad_norm": 3.114797592163086, | |
| "learning_rate": 8.271734841028553e-06, | |
| "loss": 3.9943, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7344444444444445, | |
| "grad_norm": 3.7101423740386963, | |
| "learning_rate": 8.206985266925249e-06, | |
| "loss": 4.4357, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7355555555555555, | |
| "grad_norm": 4.916779041290283, | |
| "learning_rate": 8.142440310406924e-06, | |
| "loss": 4.9196, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7366666666666667, | |
| "grad_norm": 3.456704616546631, | |
| "learning_rate": 8.078100757933485e-06, | |
| "loss": 4.7176, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7377777777777778, | |
| "grad_norm": 3.1686041355133057, | |
| "learning_rate": 8.013967393462094e-06, | |
| "loss": 4.3609, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7388888888888889, | |
| "grad_norm": 2.5040740966796875, | |
| "learning_rate": 7.950040998437542e-06, | |
| "loss": 4.0855, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 3.923576831817627, | |
| "learning_rate": 7.886322351782783e-06, | |
| "loss": 3.7909, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7411111111111112, | |
| "grad_norm": 3.781975269317627, | |
| "learning_rate": 7.822812229889428e-06, | |
| "loss": 4.4285, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7422222222222222, | |
| "grad_norm": 2.2183735370635986, | |
| "learning_rate": 7.759511406608255e-06, | |
| "loss": 4.2021, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7433333333333333, | |
| "grad_norm": 2.5517868995666504, | |
| "learning_rate": 7.696420653239833e-06, | |
| "loss": 4.0788, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7444444444444445, | |
| "grad_norm": 3.1512372493743896, | |
| "learning_rate": 7.633540738525066e-06, | |
| "loss": 4.128, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7455555555555555, | |
| "grad_norm": 2.474193811416626, | |
| "learning_rate": 7.570872428635889e-06, | |
| "loss": 4.6547, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 3.1348423957824707, | |
| "learning_rate": 7.508416487165862e-06, | |
| "loss": 4.6837, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7477777777777778, | |
| "grad_norm": 3.7456905841827393, | |
| "learning_rate": 7.4461736751209405e-06, | |
| "loss": 4.5965, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7488888888888889, | |
| "grad_norm": 2.9826486110687256, | |
| "learning_rate": 7.384144750910133e-06, | |
| "loss": 4.1727, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 3.4859273433685303, | |
| "learning_rate": 7.3223304703363135e-06, | |
| "loss": 4.188, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7511111111111111, | |
| "grad_norm": 3.679555892944336, | |
| "learning_rate": 7.260731586586983e-06, | |
| "loss": 4.3418, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7522222222222222, | |
| "grad_norm": 4.997726917266846, | |
| "learning_rate": 7.19934885022509e-06, | |
| "loss": 4.0094, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7533333333333333, | |
| "grad_norm": 3.3679285049438477, | |
| "learning_rate": 7.138183009179922e-06, | |
| "loss": 4.3927, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7544444444444445, | |
| "grad_norm": 3.4834442138671875, | |
| "learning_rate": 7.0772348087379315e-06, | |
| "loss": 4.0955, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7555555555555555, | |
| "grad_norm": 3.499994993209839, | |
| "learning_rate": 7.016504991533726e-06, | |
| "loss": 4.2323, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7566666666666667, | |
| "grad_norm": 2.678922176361084, | |
| "learning_rate": 6.9559942975409465e-06, | |
| "loss": 4.549, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7577777777777778, | |
| "grad_norm": 2.43112850189209, | |
| "learning_rate": 6.895703464063319e-06, | |
| "loss": 4.4337, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7588888888888888, | |
| "grad_norm": 2.440561294555664, | |
| "learning_rate": 6.835633225725605e-06, | |
| "loss": 4.0068, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 3.2796149253845215, | |
| "learning_rate": 6.775784314464717e-06, | |
| "loss": 4.2503, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7611111111111111, | |
| "grad_norm": 3.672053098678589, | |
| "learning_rate": 6.716157459520739e-06, | |
| "loss": 3.8174, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7622222222222222, | |
| "grad_norm": 4.125499248504639, | |
| "learning_rate": 6.656753387428089e-06, | |
| "loss": 4.1682, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7633333333333333, | |
| "grad_norm": 2.379180669784546, | |
| "learning_rate": 6.5975728220066425e-06, | |
| "loss": 3.9803, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7644444444444445, | |
| "grad_norm": 2.5495798587799072, | |
| "learning_rate": 6.538616484352902e-06, | |
| "loss": 4.4606, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7655555555555555, | |
| "grad_norm": 3.079115629196167, | |
| "learning_rate": 6.47988509283125e-06, | |
| "loss": 4.4226, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7666666666666667, | |
| "grad_norm": 3.088437795639038, | |
| "learning_rate": 6.421379363065142e-06, | |
| "loss": 4.5023, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7677777777777778, | |
| "grad_norm": 6.411847114562988, | |
| "learning_rate": 6.363100007928446e-06, | |
| "loss": 4.488, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7688888888888888, | |
| "grad_norm": 2.622467517852783, | |
| "learning_rate": 6.305047737536707e-06, | |
| "loss": 3.7526, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 3.189143657684326, | |
| "learning_rate": 6.247223259238511e-06, | |
| "loss": 4.0969, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7711111111111111, | |
| "grad_norm": 3.9353489875793457, | |
| "learning_rate": 6.189627277606894e-06, | |
| "loss": 4.0019, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7722222222222223, | |
| "grad_norm": 2.4755685329437256, | |
| "learning_rate": 6.1322604944307e-06, | |
| "loss": 4.0007, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7733333333333333, | |
| "grad_norm": 3.4721150398254395, | |
| "learning_rate": 6.075123608706093e-06, | |
| "loss": 4.1602, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7744444444444445, | |
| "grad_norm": 2.571910858154297, | |
| "learning_rate": 6.01821731662798e-06, | |
| "loss": 4.5001, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7755555555555556, | |
| "grad_norm": 2.0862197875976562, | |
| "learning_rate": 5.961542311581586e-06, | |
| "loss": 4.2366, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7766666666666666, | |
| "grad_norm": 2.0852468013763428, | |
| "learning_rate": 5.905099284133952e-06, | |
| "loss": 4.1254, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 3.239201784133911, | |
| "learning_rate": 5.848888922025553e-06, | |
| "loss": 4.0255, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7788888888888889, | |
| "grad_norm": 3.3855128288269043, | |
| "learning_rate": 5.792911910161922e-06, | |
| "loss": 4.4192, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.8477554321289062, | |
| "learning_rate": 5.737168930605272e-06, | |
| "loss": 4.6303, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7811111111111111, | |
| "grad_norm": 2.2901785373687744, | |
| "learning_rate": 5.681660662566224e-06, | |
| "loss": 4.3732, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7822222222222223, | |
| "grad_norm": 3.0778727531433105, | |
| "learning_rate": 5.626387782395512e-06, | |
| "loss": 3.9875, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7833333333333333, | |
| "grad_norm": 2.725858449935913, | |
| "learning_rate": 5.571350963575728e-06, | |
| "loss": 4.274, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7844444444444445, | |
| "grad_norm": 2.9397945404052734, | |
| "learning_rate": 5.5165508767131415e-06, | |
| "loss": 3.8244, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7855555555555556, | |
| "grad_norm": 3.8967740535736084, | |
| "learning_rate": 5.461988189529529e-06, | |
| "loss": 4.3882, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7866666666666666, | |
| "grad_norm": 3.6964597702026367, | |
| "learning_rate": 5.4076635668540075e-06, | |
| "loss": 4.6633, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7877777777777778, | |
| "grad_norm": 3.322463035583496, | |
| "learning_rate": 5.3535776706149505e-06, | |
| "loss": 4.0363, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7888888888888889, | |
| "grad_norm": 3.0608179569244385, | |
| "learning_rate": 5.299731159831953e-06, | |
| "loss": 3.9402, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 3.0244638919830322, | |
| "learning_rate": 5.24612469060774e-06, | |
| "loss": 4.7072, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7911111111111111, | |
| "grad_norm": 3.1370954513549805, | |
| "learning_rate": 5.192758916120236e-06, | |
| "loss": 3.6739, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7922222222222223, | |
| "grad_norm": 2.96083402633667, | |
| "learning_rate": 5.139634486614544e-06, | |
| "loss": 4.561, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7933333333333333, | |
| "grad_norm": 3.8822271823883057, | |
| "learning_rate": 5.086752049395094e-06, | |
| "loss": 4.6279, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7944444444444444, | |
| "grad_norm": 3.556574583053589, | |
| "learning_rate": 5.034112248817685e-06, | |
| "loss": 4.029, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7955555555555556, | |
| "grad_norm": 2.4491796493530273, | |
| "learning_rate": 4.981715726281666e-06, | |
| "loss": 4.0463, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7966666666666666, | |
| "grad_norm": 3.314884901046753, | |
| "learning_rate": 4.929563120222141e-06, | |
| "loss": 4.2907, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7977777777777778, | |
| "grad_norm": 2.998528480529785, | |
| "learning_rate": 4.877655066102149e-06, | |
| "loss": 4.1238, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7988888888888889, | |
| "grad_norm": 2.5107343196868896, | |
| "learning_rate": 4.825992196404957e-06, | |
| "loss": 4.8033, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 3.2697060108184814, | |
| "learning_rate": 4.7745751406263165e-06, | |
| "loss": 3.598, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8011111111111111, | |
| "grad_norm": 2.1032586097717285, | |
| "learning_rate": 4.723404525266839e-06, | |
| "loss": 4.0662, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8022222222222222, | |
| "grad_norm": 2.2804932594299316, | |
| "learning_rate": 4.672480973824311e-06, | |
| "loss": 4.6152, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8033333333333333, | |
| "grad_norm": 3.123626470565796, | |
| "learning_rate": 4.621805106786142e-06, | |
| "loss": 4.8937, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.8044444444444444, | |
| "grad_norm": 3.6036055088043213, | |
| "learning_rate": 4.571377541621788e-06, | |
| "loss": 4.5689, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8055555555555556, | |
| "grad_norm": 3.6055924892425537, | |
| "learning_rate": 4.521198892775203e-06, | |
| "loss": 3.8795, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8066666666666666, | |
| "grad_norm": 2.7153923511505127, | |
| "learning_rate": 4.4712697716574e-06, | |
| "loss": 4.3875, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8077777777777778, | |
| "grad_norm": 3.3169379234313965, | |
| "learning_rate": 4.421590786638951e-06, | |
| "loss": 3.9778, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.8088888888888889, | |
| "grad_norm": 3.1773722171783447, | |
| "learning_rate": 4.372162543042624e-06, | |
| "loss": 4.7571, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.2997097969055176, | |
| "learning_rate": 4.322985643135952e-06, | |
| "loss": 4.4563, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8111111111111111, | |
| "grad_norm": 3.0270705223083496, | |
| "learning_rate": 4.274060686123959e-06, | |
| "loss": 4.1449, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8122222222222222, | |
| "grad_norm": 3.159769296646118, | |
| "learning_rate": 4.225388268141797e-06, | |
| "loss": 4.2249, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8133333333333334, | |
| "grad_norm": 3.2132275104522705, | |
| "learning_rate": 4.176968982247514e-06, | |
| "loss": 3.7882, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8144444444444444, | |
| "grad_norm": 2.145144462585449, | |
| "learning_rate": 4.128803418414839e-06, | |
| "loss": 3.8867, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.8155555555555556, | |
| "grad_norm": 3.366910696029663, | |
| "learning_rate": 4.08089216352596e-06, | |
| "loss": 4.5035, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8166666666666667, | |
| "grad_norm": 3.334970235824585, | |
| "learning_rate": 4.0332358013644016e-06, | |
| "loss": 3.9257, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 3.1020681858062744, | |
| "learning_rate": 3.985834912607894e-06, | |
| "loss": 4.4492, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8188888888888889, | |
| "grad_norm": 2.6478145122528076, | |
| "learning_rate": 3.938690074821313e-06, | |
| "loss": 3.7261, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 4.352097988128662, | |
| "learning_rate": 3.891801862449629e-06, | |
| "loss": 3.9445, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8211111111111111, | |
| "grad_norm": 2.18900728225708, | |
| "learning_rate": 3.845170846810902e-06, | |
| "loss": 4.1073, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8222222222222222, | |
| "grad_norm": 3.0373637676239014, | |
| "learning_rate": 3.798797596089351e-06, | |
| "loss": 4.4156, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8233333333333334, | |
| "grad_norm": 2.5862083435058594, | |
| "learning_rate": 3.752682675328406e-06, | |
| "loss": 4.5457, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8244444444444444, | |
| "grad_norm": 2.2058353424072266, | |
| "learning_rate": 3.7068266464238084e-06, | |
| "loss": 3.9846, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8255555555555556, | |
| "grad_norm": 2.6216089725494385, | |
| "learning_rate": 3.661230068116811e-06, | |
| "loss": 4.0467, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8266666666666667, | |
| "grad_norm": 2.514681816101074, | |
| "learning_rate": 3.6158934959873353e-06, | |
| "loss": 4.6638, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8277777777777777, | |
| "grad_norm": 3.9455208778381348, | |
| "learning_rate": 3.5708174824471947e-06, | |
| "loss": 3.9174, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8288888888888889, | |
| "grad_norm": 3.5356788635253906, | |
| "learning_rate": 3.5260025767333893e-06, | |
| "loss": 4.3817, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.5251104831695557, | |
| "learning_rate": 3.4814493249014116e-06, | |
| "loss": 4.3528, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8311111111111111, | |
| "grad_norm": 3.5254111289978027, | |
| "learning_rate": 3.4371582698185633e-06, | |
| "loss": 3.896, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8322222222222222, | |
| "grad_norm": 3.865900754928589, | |
| "learning_rate": 3.393129951157384e-06, | |
| "loss": 4.0786, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 3.2861948013305664, | |
| "learning_rate": 3.3493649053890326e-06, | |
| "loss": 4.2953, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8344444444444444, | |
| "grad_norm": 2.263437509536743, | |
| "learning_rate": 3.305863665776793e-06, | |
| "loss": 3.9898, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8355555555555556, | |
| "grad_norm": 3.1119070053100586, | |
| "learning_rate": 3.262626762369525e-06, | |
| "loss": 4.3465, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8366666666666667, | |
| "grad_norm": 3.526019334793091, | |
| "learning_rate": 3.219654721995266e-06, | |
| "loss": 4.4787, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8377777777777777, | |
| "grad_norm": 3.2192864418029785, | |
| "learning_rate": 3.176948068254762e-06, | |
| "loss": 4.0421, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8388888888888889, | |
| "grad_norm": 3.149193525314331, | |
| "learning_rate": 3.1345073215151066e-06, | |
| "loss": 4.7629, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.5595197677612305, | |
| "learning_rate": 3.092332998903416e-06, | |
| "loss": 4.4459, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8411111111111111, | |
| "grad_norm": 2.972254991531372, | |
| "learning_rate": 3.0504256143004866e-06, | |
| "loss": 4.1769, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8422222222222222, | |
| "grad_norm": 2.9443609714508057, | |
| "learning_rate": 3.0087856783345914e-06, | |
| "loss": 3.9543, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8433333333333334, | |
| "grad_norm": 2.5057802200317383, | |
| "learning_rate": 2.967413698375196e-06, | |
| "loss": 4.6031, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8444444444444444, | |
| "grad_norm": 3.078894853591919, | |
| "learning_rate": 2.9263101785268254e-06, | |
| "loss": 3.9584, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8455555555555555, | |
| "grad_norm": 2.3315460681915283, | |
| "learning_rate": 2.8854756196229016e-06, | |
| "loss": 4.0571, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8466666666666667, | |
| "grad_norm": 3.303471326828003, | |
| "learning_rate": 2.8449105192196316e-06, | |
| "loss": 4.3249, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8477777777777777, | |
| "grad_norm": 3.592991590499878, | |
| "learning_rate": 2.8046153715899692e-06, | |
| "loss": 3.8605, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8488888888888889, | |
| "grad_norm": 2.9544084072113037, | |
| "learning_rate": 2.764590667717562e-06, | |
| "loss": 4.0353, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 3.3249425888061523, | |
| "learning_rate": 2.7248368952908053e-06, | |
| "loss": 4.319, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8511111111111112, | |
| "grad_norm": 4.874053001403809, | |
| "learning_rate": 2.6853545386968606e-06, | |
| "loss": 4.458, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8522222222222222, | |
| "grad_norm": 2.6893086433410645, | |
| "learning_rate": 2.646144079015797e-06, | |
| "loss": 4.0627, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 2.3588078022003174, | |
| "learning_rate": 2.6072059940146775e-06, | |
| "loss": 4.5383, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8544444444444445, | |
| "grad_norm": 2.887497663497925, | |
| "learning_rate": 2.5685407581417907e-06, | |
| "loss": 4.0523, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8555555555555555, | |
| "grad_norm": 2.968337059020996, | |
| "learning_rate": 2.5301488425208296e-06, | |
| "loss": 3.6994, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8566666666666667, | |
| "grad_norm": 2.897677183151245, | |
| "learning_rate": 2.492030714945162e-06, | |
| "loss": 4.2895, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.8577777777777778, | |
| "grad_norm": 4.195917129516602, | |
| "learning_rate": 2.454186839872158e-06, | |
| "loss": 3.8239, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8588888888888889, | |
| "grad_norm": 3.7905352115631104, | |
| "learning_rate": 2.4166176784174795e-06, | |
| "loss": 4.0074, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.2009356021881104, | |
| "learning_rate": 2.379323688349516e-06, | |
| "loss": 4.156, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8611111111111112, | |
| "grad_norm": 3.1370296478271484, | |
| "learning_rate": 2.3423053240837515e-06, | |
| "loss": 4.7336, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8622222222222222, | |
| "grad_norm": 2.4123268127441406, | |
| "learning_rate": 2.3055630366772856e-06, | |
| "loss": 4.3871, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8633333333333333, | |
| "grad_norm": 2.304750919342041, | |
| "learning_rate": 2.269097273823287e-06, | |
| "loss": 4.492, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8644444444444445, | |
| "grad_norm": 3.0814766883850098, | |
| "learning_rate": 2.2329084798455746e-06, | |
| "loss": 4.9373, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8655555555555555, | |
| "grad_norm": 3.1735880374908447, | |
| "learning_rate": 2.1969970956931762e-06, | |
| "loss": 4.2959, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8666666666666667, | |
| "grad_norm": 2.108222007751465, | |
| "learning_rate": 2.1613635589349756e-06, | |
| "loss": 4.3394, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8677777777777778, | |
| "grad_norm": 3.263927459716797, | |
| "learning_rate": 2.1260083037543817e-06, | |
| "loss": 4.5798, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8688888888888889, | |
| "grad_norm": 2.2533228397369385, | |
| "learning_rate": 2.0909317609440095e-06, | |
| "loss": 4.4232, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 3.333630323410034, | |
| "learning_rate": 2.0561343579004715e-06, | |
| "loss": 4.1046, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8711111111111111, | |
| "grad_norm": 3.1644370555877686, | |
| "learning_rate": 2.0216165186191407e-06, | |
| "loss": 4.2146, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8722222222222222, | |
| "grad_norm": 3.5359654426574707, | |
| "learning_rate": 1.9873786636889906e-06, | |
| "loss": 3.8819, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8733333333333333, | |
| "grad_norm": 4.151428699493408, | |
| "learning_rate": 1.95342121028749e-06, | |
| "loss": 3.9319, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8744444444444445, | |
| "grad_norm": 3.5409927368164062, | |
| "learning_rate": 1.9197445721754776e-06, | |
| "loss": 4.463, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8755555555555555, | |
| "grad_norm": 2.9611496925354004, | |
| "learning_rate": 1.8863491596921745e-06, | |
| "loss": 3.84, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8766666666666667, | |
| "grad_norm": 2.815295934677124, | |
| "learning_rate": 1.8532353797501318e-06, | |
| "loss": 4.3042, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8777777777777778, | |
| "grad_norm": 3.664135456085205, | |
| "learning_rate": 1.8204036358303173e-06, | |
| "loss": 3.7069, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8788888888888889, | |
| "grad_norm": 2.666962146759033, | |
| "learning_rate": 1.787854327977162e-06, | |
| "loss": 3.9498, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 3.5214672088623047, | |
| "learning_rate": 1.7555878527937164e-06, | |
| "loss": 4.15, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8811111111111111, | |
| "grad_norm": 3.4584808349609375, | |
| "learning_rate": 1.7236046034367958e-06, | |
| "loss": 4.2487, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8822222222222222, | |
| "grad_norm": 3.4059367179870605, | |
| "learning_rate": 1.6919049696121958e-06, | |
| "loss": 4.1058, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8833333333333333, | |
| "grad_norm": 2.7756285667419434, | |
| "learning_rate": 1.6604893375699594e-06, | |
| "loss": 4.0675, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8844444444444445, | |
| "grad_norm": 2.357132911682129, | |
| "learning_rate": 1.629358090099639e-06, | |
| "loss": 4.3934, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8855555555555555, | |
| "grad_norm": 3.2852492332458496, | |
| "learning_rate": 1.5985116065256684e-06, | |
| "loss": 4.4737, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8866666666666667, | |
| "grad_norm": 2.238274335861206, | |
| "learning_rate": 1.5679502627027136e-06, | |
| "loss": 4.4498, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8877777777777778, | |
| "grad_norm": 2.4234659671783447, | |
| "learning_rate": 1.5376744310111019e-06, | |
| "loss": 4.5667, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 3.9129526615142822, | |
| "learning_rate": 1.5076844803522922e-06, | |
| "loss": 4.4, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 2.525909900665283, | |
| "learning_rate": 1.4779807761443636e-06, | |
| "loss": 3.7143, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8911111111111111, | |
| "grad_norm": 3.369392156600952, | |
| "learning_rate": 1.4485636803175829e-06, | |
| "loss": 3.9016, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8922222222222222, | |
| "grad_norm": 3.97363543510437, | |
| "learning_rate": 1.4194335513099761e-06, | |
| "loss": 4.5144, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8933333333333333, | |
| "grad_norm": 3.287062883377075, | |
| "learning_rate": 1.3905907440629752e-06, | |
| "loss": 3.7106, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8944444444444445, | |
| "grad_norm": 3.274184465408325, | |
| "learning_rate": 1.362035610017079e-06, | |
| "loss": 4.6111, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8955555555555555, | |
| "grad_norm": 2.367525815963745, | |
| "learning_rate": 1.333768497107593e-06, | |
| "loss": 4.1843, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8966666666666666, | |
| "grad_norm": 3.559544324874878, | |
| "learning_rate": 1.305789749760361e-06, | |
| "loss": 4.0371, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.8977777777777778, | |
| "grad_norm": 2.5813517570495605, | |
| "learning_rate": 1.2780997088875869e-06, | |
| "loss": 4.2183, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8988888888888888, | |
| "grad_norm": 3.533015012741089, | |
| "learning_rate": 1.250698711883691e-06, | |
| "loss": 4.4354, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 3.268846273422241, | |
| "learning_rate": 1.2235870926211619e-06, | |
| "loss": 4.167, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9011111111111111, | |
| "grad_norm": 2.674999237060547, | |
| "learning_rate": 1.1967651814465354e-06, | |
| "loss": 4.5461, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.9022222222222223, | |
| "grad_norm": 2.9661448001861572, | |
| "learning_rate": 1.170233305176327e-06, | |
| "loss": 4.4819, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9033333333333333, | |
| "grad_norm": 3.1719062328338623, | |
| "learning_rate": 1.1439917870930793e-06, | |
| "loss": 4.7351, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.9044444444444445, | |
| "grad_norm": 2.9525768756866455, | |
| "learning_rate": 1.1180409469414094e-06, | |
| "loss": 4.0522, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9055555555555556, | |
| "grad_norm": 3.972069025039673, | |
| "learning_rate": 1.0923811009241142e-06, | |
| "loss": 3.8434, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 2.4678852558135986, | |
| "learning_rate": 1.067012561698319e-06, | |
| "loss": 4.2084, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9077777777777778, | |
| "grad_norm": 2.2604753971099854, | |
| "learning_rate": 1.0419356383716688e-06, | |
| "loss": 4.3375, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.9088888888888889, | |
| "grad_norm": 3.653369426727295, | |
| "learning_rate": 1.0171506364985622e-06, | |
| "loss": 4.2001, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 2.612713575363159, | |
| "learning_rate": 9.926578580764234e-07, | |
| "loss": 3.8888, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9111111111111111, | |
| "grad_norm": 3.2685186862945557, | |
| "learning_rate": 9.684576015420278e-07, | |
| "loss": 3.8959, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9122222222222223, | |
| "grad_norm": 3.556123733520508, | |
| "learning_rate": 9.445501617678654e-07, | |
| "loss": 3.9459, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.9133333333333333, | |
| "grad_norm": 5.377536773681641, | |
| "learning_rate": 9.209358300585474e-07, | |
| "loss": 3.5557, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9144444444444444, | |
| "grad_norm": 2.2200653553009033, | |
| "learning_rate": 8.976148941472501e-07, | |
| "loss": 4.1984, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.9155555555555556, | |
| "grad_norm": 2.290778875350952, | |
| "learning_rate": 8.745876381922147e-07, | |
| "loss": 3.9423, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 3.241596221923828, | |
| "learning_rate": 8.51854342773295e-07, | |
| "loss": 4.2498, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9177777777777778, | |
| "grad_norm": 4.267802715301514, | |
| "learning_rate": 8.294152848885157e-07, | |
| "loss": 3.9039, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9188888888888889, | |
| "grad_norm": 2.7669568061828613, | |
| "learning_rate": 8.072707379507216e-07, | |
| "loss": 4.0191, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 3.132392168045044, | |
| "learning_rate": 7.854209717842231e-07, | |
| "loss": 4.8574, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9211111111111111, | |
| "grad_norm": 2.444495916366577, | |
| "learning_rate": 7.638662526215284e-07, | |
| "loss": 4.0902, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9222222222222223, | |
| "grad_norm": 3.362197160720825, | |
| "learning_rate": 7.426068431000882e-07, | |
| "loss": 4.5238, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9233333333333333, | |
| "grad_norm": 4.276552200317383, | |
| "learning_rate": 7.216430022591008e-07, | |
| "loss": 3.5991, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.9244444444444444, | |
| "grad_norm": 2.867725372314453, | |
| "learning_rate": 7.009749855363456e-07, | |
| "loss": 4.3119, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9255555555555556, | |
| "grad_norm": 3.021606922149658, | |
| "learning_rate": 6.806030447650879e-07, | |
| "loss": 3.9519, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9266666666666666, | |
| "grad_norm": 3.299363851547241, | |
| "learning_rate": 6.605274281709928e-07, | |
| "loss": 3.9062, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9277777777777778, | |
| "grad_norm": 3.487799644470215, | |
| "learning_rate": 6.407483803691216e-07, | |
| "loss": 4.1561, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9288888888888889, | |
| "grad_norm": 3.5065345764160156, | |
| "learning_rate": 6.212661423609184e-07, | |
| "loss": 3.9922, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 2.3422510623931885, | |
| "learning_rate": 6.020809515313142e-07, | |
| "loss": 4.5369, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.9311111111111111, | |
| "grad_norm": 3.0932629108428955, | |
| "learning_rate": 5.83193041645802e-07, | |
| "loss": 3.8776, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9322222222222222, | |
| "grad_norm": 2.2441000938415527, | |
| "learning_rate": 5.646026428476031e-07, | |
| "loss": 4.301, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 3.151946783065796, | |
| "learning_rate": 5.463099816548579e-07, | |
| "loss": 4.5223, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9344444444444444, | |
| "grad_norm": 2.9941930770874023, | |
| "learning_rate": 5.283152809578751e-07, | |
| "loss": 4.2136, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9355555555555556, | |
| "grad_norm": 3.5801138877868652, | |
| "learning_rate": 5.106187600163987e-07, | |
| "loss": 4.0182, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9366666666666666, | |
| "grad_norm": 2.327622413635254, | |
| "learning_rate": 4.932206344569562e-07, | |
| "loss": 4.3929, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9377777777777778, | |
| "grad_norm": 2.793179512023926, | |
| "learning_rate": 4.7612111627021175e-07, | |
| "loss": 4.1198, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.9388888888888889, | |
| "grad_norm": 2.1341638565063477, | |
| "learning_rate": 4.5932041380840065e-07, | |
| "loss": 4.2686, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 2.8225035667419434, | |
| "learning_rate": 4.4281873178278475e-07, | |
| "loss": 4.3253, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9411111111111111, | |
| "grad_norm": 2.065812349319458, | |
| "learning_rate": 4.26616271261146e-07, | |
| "loss": 4.5926, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9422222222222222, | |
| "grad_norm": 3.443786859512329, | |
| "learning_rate": 4.107132296653549e-07, | |
| "loss": 4.6865, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9433333333333334, | |
| "grad_norm": 3.73835825920105, | |
| "learning_rate": 3.95109800768953e-07, | |
| "loss": 4.357, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 3.801854372024536, | |
| "learning_rate": 3.7980617469479953e-07, | |
| "loss": 4.4399, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9455555555555556, | |
| "grad_norm": 2.6715407371520996, | |
| "learning_rate": 3.6480253791274786e-07, | |
| "loss": 4.0495, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9466666666666667, | |
| "grad_norm": 2.8497438430786133, | |
| "learning_rate": 3.5009907323737825e-07, | |
| "loss": 4.6042, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9477777777777778, | |
| "grad_norm": 2.0767159461975098, | |
| "learning_rate": 3.3569595982576583e-07, | |
| "loss": 4.3431, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9488888888888889, | |
| "grad_norm": 2.8185853958129883, | |
| "learning_rate": 3.215933731753024e-07, | |
| "loss": 4.2526, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.341989040374756, | |
| "learning_rate": 3.077914851215585e-07, | |
| "loss": 4.798, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9511111111111111, | |
| "grad_norm": 2.7204387187957764, | |
| "learning_rate": 2.942904638361804e-07, | |
| "loss": 4.4965, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9522222222222222, | |
| "grad_norm": 3.139683485031128, | |
| "learning_rate": 2.810904738248549e-07, | |
| "loss": 4.7001, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.9533333333333334, | |
| "grad_norm": 3.4762489795684814, | |
| "learning_rate": 2.681916759252917e-07, | |
| "loss": 4.2364, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9544444444444444, | |
| "grad_norm": 3.4384474754333496, | |
| "learning_rate": 2.555942273052753e-07, | |
| "loss": 4.2242, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9555555555555556, | |
| "grad_norm": 7.5540571212768555, | |
| "learning_rate": 2.4329828146074095e-07, | |
| "loss": 4.4938, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9566666666666667, | |
| "grad_norm": 3.0467262268066406, | |
| "learning_rate": 2.3130398821391007e-07, | |
| "loss": 3.6816, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9577777777777777, | |
| "grad_norm": 2.601795196533203, | |
| "learning_rate": 2.1961149371145795e-07, | |
| "loss": 4.1958, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9588888888888889, | |
| "grad_norm": 4.4815216064453125, | |
| "learning_rate": 2.0822094042274032e-07, | |
| "loss": 3.994, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 3.6530420780181885, | |
| "learning_rate": 1.9713246713805588e-07, | |
| "loss": 4.3858, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9611111111111111, | |
| "grad_norm": 3.1802806854248047, | |
| "learning_rate": 1.8634620896695043e-07, | |
| "loss": 4.0963, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9622222222222222, | |
| "grad_norm": 3.68103289604187, | |
| "learning_rate": 1.7586229733657644e-07, | |
| "loss": 4.4613, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9633333333333334, | |
| "grad_norm": 2.8591842651367188, | |
| "learning_rate": 1.6568085999008888e-07, | |
| "loss": 4.3585, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9644444444444444, | |
| "grad_norm": 2.928494691848755, | |
| "learning_rate": 1.5580202098509077e-07, | |
| "loss": 4.3471, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9655555555555555, | |
| "grad_norm": 2.9359610080718994, | |
| "learning_rate": 1.4622590069211516e-07, | |
| "loss": 4.5431, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9666666666666667, | |
| "grad_norm": 2.3987393379211426, | |
| "learning_rate": 1.3695261579316777e-07, | |
| "loss": 3.9504, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9677777777777777, | |
| "grad_norm": 2.556596040725708, | |
| "learning_rate": 1.2798227928029482e-07, | |
| "loss": 3.7707, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9688888888888889, | |
| "grad_norm": 2.1739792823791504, | |
| "learning_rate": 1.193150004542204e-07, | |
| "loss": 4.1831, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.5820603370666504, | |
| "learning_rate": 1.109508849230001e-07, | |
| "loss": 4.4525, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9711111111111111, | |
| "grad_norm": 4.120064735412598, | |
| "learning_rate": 1.0289003460074165e-07, | |
| "loss": 3.9264, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9722222222222222, | |
| "grad_norm": 3.187326669692993, | |
| "learning_rate": 9.513254770636137e-08, | |
| "loss": 4.4163, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9733333333333334, | |
| "grad_norm": 2.66953706741333, | |
| "learning_rate": 8.767851876239074e-08, | |
| "loss": 4.3193, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9744444444444444, | |
| "grad_norm": 3.3307957649230957, | |
| "learning_rate": 8.052803859382174e-08, | |
| "loss": 4.6534, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9755555555555555, | |
| "grad_norm": 3.617739200592041, | |
| "learning_rate": 7.368119432699383e-08, | |
| "loss": 4.4387, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9766666666666667, | |
| "grad_norm": 2.623586654663086, | |
| "learning_rate": 6.71380693885476e-08, | |
| "loss": 4.3379, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 3.5560302734375, | |
| "learning_rate": 6.089874350439506e-08, | |
| "loss": 4.536, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9788888888888889, | |
| "grad_norm": 3.401707887649536, | |
| "learning_rate": 5.496329269875089e-08, | |
| "loss": 4.1187, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.7142131328582764, | |
| "learning_rate": 4.9331789293211026e-08, | |
| "loss": 4.5272, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9811111111111112, | |
| "grad_norm": 3.471452236175537, | |
| "learning_rate": 4.400430190586724e-08, | |
| "loss": 4.081, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9822222222222222, | |
| "grad_norm": 3.4180078506469727, | |
| "learning_rate": 3.8980895450474455e-08, | |
| "loss": 4.3884, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9833333333333333, | |
| "grad_norm": 3.2058937549591064, | |
| "learning_rate": 3.426163113565417e-08, | |
| "loss": 4.178, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9844444444444445, | |
| "grad_norm": 2.5083703994750977, | |
| "learning_rate": 2.9846566464150626e-08, | |
| "loss": 4.1643, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9855555555555555, | |
| "grad_norm": 2.149996042251587, | |
| "learning_rate": 2.5735755232134118e-08, | |
| "loss": 4.0516, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9866666666666667, | |
| "grad_norm": 3.1104636192321777, | |
| "learning_rate": 2.192924752854042e-08, | |
| "loss": 4.1596, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9877777777777778, | |
| "grad_norm": 3.0319011211395264, | |
| "learning_rate": 1.842708973447127e-08, | |
| "loss": 4.1681, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9888888888888889, | |
| "grad_norm": 2.075939893722534, | |
| "learning_rate": 1.522932452260595e-08, | |
| "loss": 4.1082, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 2.8124101161956787, | |
| "learning_rate": 1.233599085671e-08, | |
| "loss": 4.0637, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.9911111111111112, | |
| "grad_norm": 3.1139042377471924, | |
| "learning_rate": 9.747123991141194e-09, | |
| "loss": 3.8563, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9922222222222222, | |
| "grad_norm": 2.973275661468506, | |
| "learning_rate": 7.462755470422078e-09, | |
| "loss": 4.8157, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.9933333333333333, | |
| "grad_norm": 3.155707359313965, | |
| "learning_rate": 5.48291312886251e-09, | |
| "loss": 4.6411, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9944444444444445, | |
| "grad_norm": 4.164730548858643, | |
| "learning_rate": 3.807621090218261e-09, | |
| "loss": 3.6138, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 2.4967424869537354, | |
| "learning_rate": 2.4368997673940297e-09, | |
| "loss": 3.9694, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9966666666666667, | |
| "grad_norm": 2.2673118114471436, | |
| "learning_rate": 1.3707658621964215e-09, | |
| "loss": 4.1773, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9977777777777778, | |
| "grad_norm": 2.343024730682373, | |
| "learning_rate": 6.092323651313292e-10, | |
| "loss": 4.2347, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9988888888888889, | |
| "grad_norm": 3.1533758640289307, | |
| "learning_rate": 1.5230855524017708e-10, | |
| "loss": 4.2303, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.3015313148498535, | |
| "learning_rate": 0.0, | |
| "loss": 4.4286, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0254817236897792e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |