| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.999803343166175, |
| "eval_steps": 500, |
| "global_step": 2542, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00039331366764995085, |
| "grad_norm": 7.214968204498291, |
| "learning_rate": 1.9607843137254902e-08, |
| "loss": 0.1717, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007866273352999017, |
| "grad_norm": 8.45617961883545, |
| "learning_rate": 3.9215686274509804e-08, |
| "loss": 0.1394, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0011799410029498525, |
| "grad_norm": 9.644225120544434, |
| "learning_rate": 5.882352941176471e-08, |
| "loss": 0.1416, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0015732546705998034, |
| "grad_norm": 6.772904872894287, |
| "learning_rate": 7.843137254901961e-08, |
| "loss": 0.1696, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0019665683382497543, |
| "grad_norm": 11.89709758758545, |
| "learning_rate": 9.803921568627452e-08, |
| "loss": 0.2043, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002359882005899705, |
| "grad_norm": 30.768009185791016, |
| "learning_rate": 1.1764705882352942e-07, |
| "loss": 0.1557, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0027531956735496557, |
| "grad_norm": 7.8864569664001465, |
| "learning_rate": 1.3725490196078432e-07, |
| "loss": 0.1478, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003146509341199607, |
| "grad_norm": 10.4628267288208, |
| "learning_rate": 1.5686274509803921e-07, |
| "loss": 0.162, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0035398230088495575, |
| "grad_norm": 8.983762741088867, |
| "learning_rate": 1.7647058823529414e-07, |
| "loss": 0.1482, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003933136676499509, |
| "grad_norm": 9.961833953857422, |
| "learning_rate": 1.9607843137254904e-07, |
| "loss": 0.1851, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004326450344149459, |
| "grad_norm": 7.383552074432373, |
| "learning_rate": 2.1568627450980394e-07, |
| "loss": 0.1483, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00471976401179941, |
| "grad_norm": 10.243701934814453, |
| "learning_rate": 2.3529411764705883e-07, |
| "loss": 0.1457, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005113077679449361, |
| "grad_norm": 9.73193645477295, |
| "learning_rate": 2.5490196078431376e-07, |
| "loss": 0.1623, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.005506391347099311, |
| "grad_norm": 6.044100284576416, |
| "learning_rate": 2.7450980392156863e-07, |
| "loss": 0.1346, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0058997050147492625, |
| "grad_norm": 28.241085052490234, |
| "learning_rate": 2.9411764705882356e-07, |
| "loss": 0.1583, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.006293018682399214, |
| "grad_norm": 11.225924491882324, |
| "learning_rate": 3.1372549019607843e-07, |
| "loss": 0.1781, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006686332350049164, |
| "grad_norm": 9.774815559387207, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 0.1567, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.007079646017699115, |
| "grad_norm": 10.569445610046387, |
| "learning_rate": 3.529411764705883e-07, |
| "loss": 0.1362, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.007472959685349066, |
| "grad_norm": 6.202274322509766, |
| "learning_rate": 3.7254901960784315e-07, |
| "loss": 0.1493, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007866273352999017, |
| "grad_norm": 9.480630874633789, |
| "learning_rate": 3.921568627450981e-07, |
| "loss": 0.1528, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008259587020648967, |
| "grad_norm": 13.586874008178711, |
| "learning_rate": 4.1176470588235295e-07, |
| "loss": 0.1266, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.008652900688298918, |
| "grad_norm": 11.455598831176758, |
| "learning_rate": 4.3137254901960787e-07, |
| "loss": 0.1225, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00904621435594887, |
| "grad_norm": 12.348589897155762, |
| "learning_rate": 4.509803921568628e-07, |
| "loss": 0.1863, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.00943952802359882, |
| "grad_norm": 7.493137836456299, |
| "learning_rate": 4.7058823529411767e-07, |
| "loss": 0.1214, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.00983284169124877, |
| "grad_norm": 11.203600883483887, |
| "learning_rate": 4.901960784313725e-07, |
| "loss": 0.1511, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.010226155358898722, |
| "grad_norm": 10.017373085021973, |
| "learning_rate": 5.098039215686275e-07, |
| "loss": 0.1464, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.010619469026548672, |
| "grad_norm": 7.930361270904541, |
| "learning_rate": 5.294117647058824e-07, |
| "loss": 0.1716, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.011012782694198623, |
| "grad_norm": 6.609414577484131, |
| "learning_rate": 5.490196078431373e-07, |
| "loss": 0.1499, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.011406096361848575, |
| "grad_norm": 9.198175430297852, |
| "learning_rate": 5.686274509803922e-07, |
| "loss": 0.1513, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.011799410029498525, |
| "grad_norm": 7.527069091796875, |
| "learning_rate": 5.882352941176471e-07, |
| "loss": 0.1344, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.012192723697148475, |
| "grad_norm": 25.97745704650879, |
| "learning_rate": 6.07843137254902e-07, |
| "loss": 0.1025, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.012586037364798427, |
| "grad_norm": 6.214263916015625, |
| "learning_rate": 6.274509803921569e-07, |
| "loss": 0.1387, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.012979351032448377, |
| "grad_norm": 7.101906776428223, |
| "learning_rate": 6.470588235294118e-07, |
| "loss": 0.1335, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.013372664700098328, |
| "grad_norm": 7.696187496185303, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 0.1277, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01376597836774828, |
| "grad_norm": 9.324244499206543, |
| "learning_rate": 6.862745098039217e-07, |
| "loss": 0.1512, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01415929203539823, |
| "grad_norm": 3.9664223194122314, |
| "learning_rate": 7.058823529411766e-07, |
| "loss": 0.0816, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01455260570304818, |
| "grad_norm": 4.77344274520874, |
| "learning_rate": 7.254901960784315e-07, |
| "loss": 0.1036, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.014945919370698132, |
| "grad_norm": 5.8425612449646, |
| "learning_rate": 7.450980392156863e-07, |
| "loss": 0.0857, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.015339233038348082, |
| "grad_norm": 4.707705020904541, |
| "learning_rate": 7.647058823529413e-07, |
| "loss": 0.0905, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.015732546705998034, |
| "grad_norm": 8.28884220123291, |
| "learning_rate": 7.843137254901962e-07, |
| "loss": 0.1273, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.016125860373647983, |
| "grad_norm": 5.381669998168945, |
| "learning_rate": 8.039215686274511e-07, |
| "loss": 0.0938, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.016519174041297935, |
| "grad_norm": 4.281416893005371, |
| "learning_rate": 8.235294117647059e-07, |
| "loss": 0.0935, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.016912487708947887, |
| "grad_norm": 6.621143817901611, |
| "learning_rate": 8.431372549019609e-07, |
| "loss": 0.1002, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.017305801376597835, |
| "grad_norm": 4.4914350509643555, |
| "learning_rate": 8.627450980392157e-07, |
| "loss": 0.097, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.017699115044247787, |
| "grad_norm": 3.7035109996795654, |
| "learning_rate": 8.823529411764707e-07, |
| "loss": 0.0887, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01809242871189774, |
| "grad_norm": 4.306455612182617, |
| "learning_rate": 9.019607843137256e-07, |
| "loss": 0.1027, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.018485742379547688, |
| "grad_norm": 5.768416881561279, |
| "learning_rate": 9.215686274509806e-07, |
| "loss": 0.1006, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01887905604719764, |
| "grad_norm": 19.471040725708008, |
| "learning_rate": 9.411764705882353e-07, |
| "loss": 0.1178, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.019272369714847592, |
| "grad_norm": 6.249476432800293, |
| "learning_rate": 9.607843137254904e-07, |
| "loss": 0.1, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01966568338249754, |
| "grad_norm": 5.785927772521973, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 0.078, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.020058997050147492, |
| "grad_norm": 6.312557220458984, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.1117, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.020452310717797444, |
| "grad_norm": 3.5102477073669434, |
| "learning_rate": 1.019607843137255e-06, |
| "loss": 0.0913, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.020845624385447393, |
| "grad_norm": 6.845943450927734, |
| "learning_rate": 1.03921568627451e-06, |
| "loss": 0.1353, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.021238938053097345, |
| "grad_norm": 5.505466461181641, |
| "learning_rate": 1.0588235294117648e-06, |
| "loss": 0.0965, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.021632251720747297, |
| "grad_norm": 4.362204551696777, |
| "learning_rate": 1.0784313725490197e-06, |
| "loss": 0.0844, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.022025565388397245, |
| "grad_norm": 4.358127117156982, |
| "learning_rate": 1.0980392156862745e-06, |
| "loss": 0.1155, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.022418879056047197, |
| "grad_norm": 7.55561637878418, |
| "learning_rate": 1.1176470588235296e-06, |
| "loss": 0.0742, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02281219272369715, |
| "grad_norm": 5.882073879241943, |
| "learning_rate": 1.1372549019607845e-06, |
| "loss": 0.1112, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.023205506391347098, |
| "grad_norm": 2.456120491027832, |
| "learning_rate": 1.1568627450980394e-06, |
| "loss": 0.0605, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02359882005899705, |
| "grad_norm": 19.60419273376465, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 0.1267, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.023992133726647002, |
| "grad_norm": 3.074788808822632, |
| "learning_rate": 1.196078431372549e-06, |
| "loss": 0.0821, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02438544739429695, |
| "grad_norm": 3.561314344406128, |
| "learning_rate": 1.215686274509804e-06, |
| "loss": 0.0572, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.024778761061946902, |
| "grad_norm": 13.668036460876465, |
| "learning_rate": 1.235294117647059e-06, |
| "loss": 0.1268, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.025172074729596854, |
| "grad_norm": 3.8883397579193115, |
| "learning_rate": 1.2549019607843137e-06, |
| "loss": 0.0849, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.025565388397246803, |
| "grad_norm": 4.154886245727539, |
| "learning_rate": 1.2745098039215686e-06, |
| "loss": 0.1071, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.025958702064896755, |
| "grad_norm": 5.3974127769470215, |
| "learning_rate": 1.2941176470588237e-06, |
| "loss": 0.0749, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.026352015732546707, |
| "grad_norm": 3.088780164718628, |
| "learning_rate": 1.3137254901960785e-06, |
| "loss": 0.0768, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.026745329400196655, |
| "grad_norm": 3.2044262886047363, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.0641, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.027138643067846607, |
| "grad_norm": 5.424925327301025, |
| "learning_rate": 1.3529411764705883e-06, |
| "loss": 0.061, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02753195673549656, |
| "grad_norm": 4.061574935913086, |
| "learning_rate": 1.3725490196078434e-06, |
| "loss": 0.0851, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.027925270403146508, |
| "grad_norm": 5.696750164031982, |
| "learning_rate": 1.3921568627450982e-06, |
| "loss": 0.1107, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02831858407079646, |
| "grad_norm": 4.410640716552734, |
| "learning_rate": 1.4117647058823531e-06, |
| "loss": 0.0714, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.028711897738446412, |
| "grad_norm": 6.307974815368652, |
| "learning_rate": 1.4313725490196078e-06, |
| "loss": 0.0866, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02910521140609636, |
| "grad_norm": 2.53486967086792, |
| "learning_rate": 1.450980392156863e-06, |
| "loss": 0.0613, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.029498525073746312, |
| "grad_norm": 6.9410881996154785, |
| "learning_rate": 1.4705882352941177e-06, |
| "loss": 0.086, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.029891838741396264, |
| "grad_norm": 2.5871775150299072, |
| "learning_rate": 1.4901960784313726e-06, |
| "loss": 0.0507, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.030285152409046213, |
| "grad_norm": 2.2673654556274414, |
| "learning_rate": 1.5098039215686275e-06, |
| "loss": 0.0676, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.030678466076696165, |
| "grad_norm": 2.789076805114746, |
| "learning_rate": 1.5294117647058826e-06, |
| "loss": 0.0632, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.031071779744346117, |
| "grad_norm": 6.127337455749512, |
| "learning_rate": 1.5490196078431374e-06, |
| "loss": 0.0498, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.03146509341199607, |
| "grad_norm": 2.758253574371338, |
| "learning_rate": 1.5686274509803923e-06, |
| "loss": 0.0706, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03185840707964602, |
| "grad_norm": 6.687328815460205, |
| "learning_rate": 1.5882352941176472e-06, |
| "loss": 0.0961, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.032251720747295966, |
| "grad_norm": 7.499604225158691, |
| "learning_rate": 1.6078431372549023e-06, |
| "loss": 0.0715, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03264503441494592, |
| "grad_norm": 6.008899211883545, |
| "learning_rate": 1.6274509803921571e-06, |
| "loss": 0.123, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03303834808259587, |
| "grad_norm": 4.841026306152344, |
| "learning_rate": 1.6470588235294118e-06, |
| "loss": 0.0647, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03343166175024582, |
| "grad_norm": 3.0710766315460205, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.0372, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.033824975417895774, |
| "grad_norm": 3.3783321380615234, |
| "learning_rate": 1.6862745098039217e-06, |
| "loss": 0.0843, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03421828908554572, |
| "grad_norm": 2.6547350883483887, |
| "learning_rate": 1.7058823529411766e-06, |
| "loss": 0.0589, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03461160275319567, |
| "grad_norm": 3.6741859912872314, |
| "learning_rate": 1.7254901960784315e-06, |
| "loss": 0.0308, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.035004916420845626, |
| "grad_norm": 3.555490493774414, |
| "learning_rate": 1.7450980392156864e-06, |
| "loss": 0.0497, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.035398230088495575, |
| "grad_norm": 3.1174697875976562, |
| "learning_rate": 1.7647058823529414e-06, |
| "loss": 0.063, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03579154375614552, |
| "grad_norm": 4.790848255157471, |
| "learning_rate": 1.7843137254901963e-06, |
| "loss": 0.0834, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03618485742379548, |
| "grad_norm": 3.2931265830993652, |
| "learning_rate": 1.8039215686274512e-06, |
| "loss": 0.0531, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03657817109144543, |
| "grad_norm": 13.777477264404297, |
| "learning_rate": 1.8235294117647058e-06, |
| "loss": 0.0786, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.036971484759095376, |
| "grad_norm": 4.943524360656738, |
| "learning_rate": 1.8431372549019611e-06, |
| "loss": 0.0602, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03736479842674533, |
| "grad_norm": 6.189723014831543, |
| "learning_rate": 1.8627450980392158e-06, |
| "loss": 0.0697, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03775811209439528, |
| "grad_norm": 3.5542352199554443, |
| "learning_rate": 1.8823529411764707e-06, |
| "loss": 0.0863, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03815142576204523, |
| "grad_norm": 5.407109260559082, |
| "learning_rate": 1.9019607843137255e-06, |
| "loss": 0.088, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.038544739429695184, |
| "grad_norm": 3.3334732055664062, |
| "learning_rate": 1.921568627450981e-06, |
| "loss": 0.0889, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03893805309734513, |
| "grad_norm": 2.48398756980896, |
| "learning_rate": 1.9411764705882353e-06, |
| "loss": 0.0483, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03933136676499508, |
| "grad_norm": 2.3380913734436035, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 0.0707, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.039724680432645036, |
| "grad_norm": 4.355076789855957, |
| "learning_rate": 1.980392156862745e-06, |
| "loss": 0.0639, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.040117994100294985, |
| "grad_norm": 4.081620693206787, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.06, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04051130776794493, |
| "grad_norm": 4.437114715576172, |
| "learning_rate": 2.019607843137255e-06, |
| "loss": 0.1017, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.04090462143559489, |
| "grad_norm": 4.925793647766113, |
| "learning_rate": 2.03921568627451e-06, |
| "loss": 0.0934, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04129793510324484, |
| "grad_norm": 2.085400104522705, |
| "learning_rate": 2.058823529411765e-06, |
| "loss": 0.058, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.041691248770894786, |
| "grad_norm": 2.8664395809173584, |
| "learning_rate": 2.07843137254902e-06, |
| "loss": 0.0709, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04208456243854474, |
| "grad_norm": 1.7521601915359497, |
| "learning_rate": 2.0980392156862747e-06, |
| "loss": 0.031, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04247787610619469, |
| "grad_norm": 3.7575159072875977, |
| "learning_rate": 2.1176470588235296e-06, |
| "loss": 0.0777, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.04287118977384464, |
| "grad_norm": 4.240278720855713, |
| "learning_rate": 2.1372549019607844e-06, |
| "loss": 0.0965, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.043264503441494594, |
| "grad_norm": 3.841932773590088, |
| "learning_rate": 2.1568627450980393e-06, |
| "loss": 0.0844, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04365781710914454, |
| "grad_norm": 4.4334397315979, |
| "learning_rate": 2.176470588235294e-06, |
| "loss": 0.0956, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04405113077679449, |
| "grad_norm": 4.255678653717041, |
| "learning_rate": 2.196078431372549e-06, |
| "loss": 0.0855, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 2.3486170768737793, |
| "learning_rate": 2.215686274509804e-06, |
| "loss": 0.0417, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.044837758112094395, |
| "grad_norm": 2.222768783569336, |
| "learning_rate": 2.2352941176470592e-06, |
| "loss": 0.0556, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.04523107177974434, |
| "grad_norm": 2.750119686126709, |
| "learning_rate": 2.254901960784314e-06, |
| "loss": 0.0481, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0456243854473943, |
| "grad_norm": 4.375302314758301, |
| "learning_rate": 2.274509803921569e-06, |
| "loss": 0.098, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04601769911504425, |
| "grad_norm": 3.7654221057891846, |
| "learning_rate": 2.2941176470588234e-06, |
| "loss": 0.1025, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.046411012782694196, |
| "grad_norm": 2.422442674636841, |
| "learning_rate": 2.3137254901960787e-06, |
| "loss": 0.0675, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04680432645034415, |
| "grad_norm": 3.3458054065704346, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 0.067, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0471976401179941, |
| "grad_norm": 2.7424211502075195, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 0.0774, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04759095378564405, |
| "grad_norm": 3.4825127124786377, |
| "learning_rate": 2.3725490196078433e-06, |
| "loss": 0.086, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.047984267453294004, |
| "grad_norm": 55.36836242675781, |
| "learning_rate": 2.392156862745098e-06, |
| "loss": 0.0938, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04837758112094395, |
| "grad_norm": 2.256223201751709, |
| "learning_rate": 2.411764705882353e-06, |
| "loss": 0.0673, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0487708947885939, |
| "grad_norm": 3.8095710277557373, |
| "learning_rate": 2.431372549019608e-06, |
| "loss": 0.0728, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.049164208456243856, |
| "grad_norm": 1.8562949895858765, |
| "learning_rate": 2.450980392156863e-06, |
| "loss": 0.0629, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.049557522123893805, |
| "grad_norm": 4.999472618103027, |
| "learning_rate": 2.470588235294118e-06, |
| "loss": 0.059, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04995083579154375, |
| "grad_norm": 3.9088096618652344, |
| "learning_rate": 2.490196078431373e-06, |
| "loss": 0.0662, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05034414945919371, |
| "grad_norm": 4.975748062133789, |
| "learning_rate": 2.5098039215686274e-06, |
| "loss": 0.0688, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.05073746312684366, |
| "grad_norm": 2.183948516845703, |
| "learning_rate": 2.5294117647058823e-06, |
| "loss": 0.0477, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.051130776794493606, |
| "grad_norm": 4.890422821044922, |
| "learning_rate": 2.549019607843137e-06, |
| "loss": 0.0793, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05152409046214356, |
| "grad_norm": 4.04612398147583, |
| "learning_rate": 2.568627450980392e-06, |
| "loss": 0.0705, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.05191740412979351, |
| "grad_norm": 2.8650074005126953, |
| "learning_rate": 2.5882352941176473e-06, |
| "loss": 0.0777, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.05231071779744346, |
| "grad_norm": 3.9029088020324707, |
| "learning_rate": 2.6078431372549022e-06, |
| "loss": 0.0766, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.052704031465093414, |
| "grad_norm": 2.4210422039031982, |
| "learning_rate": 2.627450980392157e-06, |
| "loss": 0.0663, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.05309734513274336, |
| "grad_norm": 3.0176892280578613, |
| "learning_rate": 2.647058823529412e-06, |
| "loss": 0.0703, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05349065880039331, |
| "grad_norm": 13.886055946350098, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.064, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.053883972468043266, |
| "grad_norm": 2.40460205078125, |
| "learning_rate": 2.6862745098039217e-06, |
| "loss": 0.0492, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.054277286135693215, |
| "grad_norm": 3.829288959503174, |
| "learning_rate": 2.7058823529411766e-06, |
| "loss": 0.0564, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.05467059980334316, |
| "grad_norm": 2.2005629539489746, |
| "learning_rate": 2.7254901960784314e-06, |
| "loss": 0.0483, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.05506391347099312, |
| "grad_norm": 14.79651927947998, |
| "learning_rate": 2.7450980392156867e-06, |
| "loss": 0.0937, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05545722713864307, |
| "grad_norm": 1.6898876428604126, |
| "learning_rate": 2.7647058823529416e-06, |
| "loss": 0.0693, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.055850540806293016, |
| "grad_norm": 3.5447332859039307, |
| "learning_rate": 2.7843137254901965e-06, |
| "loss": 0.1311, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.05624385447394297, |
| "grad_norm": 2.291607618331909, |
| "learning_rate": 2.8039215686274514e-06, |
| "loss": 0.061, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.05663716814159292, |
| "grad_norm": 4.079521656036377, |
| "learning_rate": 2.8235294117647062e-06, |
| "loss": 0.1169, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05703048180924287, |
| "grad_norm": 5.1168012619018555, |
| "learning_rate": 2.843137254901961e-06, |
| "loss": 0.0436, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.057423795476892824, |
| "grad_norm": 4.056823253631592, |
| "learning_rate": 2.8627450980392155e-06, |
| "loss": 0.09, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05781710914454277, |
| "grad_norm": 2.1756484508514404, |
| "learning_rate": 2.8823529411764704e-06, |
| "loss": 0.0747, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05821042281219272, |
| "grad_norm": 2.8064467906951904, |
| "learning_rate": 2.901960784313726e-06, |
| "loss": 0.0261, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.058603736479842676, |
| "grad_norm": 2.9834907054901123, |
| "learning_rate": 2.9215686274509806e-06, |
| "loss": 0.0735, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.058997050147492625, |
| "grad_norm": 15.821993827819824, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 0.0835, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05939036381514257, |
| "grad_norm": 6.1172709465026855, |
| "learning_rate": 2.9607843137254903e-06, |
| "loss": 0.0621, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.05978367748279253, |
| "grad_norm": 3.961477041244507, |
| "learning_rate": 2.980392156862745e-06, |
| "loss": 0.0777, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06017699115044248, |
| "grad_norm": 3.682879686355591, |
| "learning_rate": 3e-06, |
| "loss": 0.0836, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.060570304818092426, |
| "grad_norm": 1.2253718376159668, |
| "learning_rate": 3.019607843137255e-06, |
| "loss": 0.0255, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.06096361848574238, |
| "grad_norm": 2.107466220855713, |
| "learning_rate": 3.03921568627451e-06, |
| "loss": 0.0698, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.06135693215339233, |
| "grad_norm": 2.720797061920166, |
| "learning_rate": 3.058823529411765e-06, |
| "loss": 0.0683, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.06175024582104228, |
| "grad_norm": 2.0135252475738525, |
| "learning_rate": 3.07843137254902e-06, |
| "loss": 0.0594, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.062143559488692234, |
| "grad_norm": 2.011382579803467, |
| "learning_rate": 3.098039215686275e-06, |
| "loss": 0.0643, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.06253687315634218, |
| "grad_norm": 3.047201156616211, |
| "learning_rate": 3.1176470588235297e-06, |
| "loss": 0.0564, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.06293018682399214, |
| "grad_norm": 2.3302555084228516, |
| "learning_rate": 3.1372549019607846e-06, |
| "loss": 0.0404, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06332350049164208, |
| "grad_norm": 2.7288010120391846, |
| "learning_rate": 3.1568627450980395e-06, |
| "loss": 0.1009, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.06371681415929203, |
| "grad_norm": 2.852647304534912, |
| "learning_rate": 3.1764705882352943e-06, |
| "loss": 0.0508, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.06411012782694199, |
| "grad_norm": 2.101698637008667, |
| "learning_rate": 3.1960784313725492e-06, |
| "loss": 0.0814, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.06450344149459193, |
| "grad_norm": 2.864086151123047, |
| "learning_rate": 3.2156862745098045e-06, |
| "loss": 0.0543, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.06489675516224189, |
| "grad_norm": 2.587751865386963, |
| "learning_rate": 3.2352941176470594e-06, |
| "loss": 0.0753, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06529006882989184, |
| "grad_norm": 1.5767340660095215, |
| "learning_rate": 3.2549019607843143e-06, |
| "loss": 0.0399, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06568338249754178, |
| "grad_norm": 3.7279415130615234, |
| "learning_rate": 3.2745098039215687e-06, |
| "loss": 0.0804, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.06607669616519174, |
| "grad_norm": 2.9727795124053955, |
| "learning_rate": 3.2941176470588236e-06, |
| "loss": 0.0548, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0664700098328417, |
| "grad_norm": 2.0582468509674072, |
| "learning_rate": 3.3137254901960785e-06, |
| "loss": 0.0656, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06686332350049164, |
| "grad_norm": 7.246119499206543, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0499, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06725663716814159, |
| "grad_norm": 70.4866714477539, |
| "learning_rate": 3.352941176470588e-06, |
| "loss": 0.0764, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06764995083579155, |
| "grad_norm": 1.8262776136398315, |
| "learning_rate": 3.3725490196078435e-06, |
| "loss": 0.0497, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06804326450344149, |
| "grad_norm": 2.6392412185668945, |
| "learning_rate": 3.3921568627450984e-06, |
| "loss": 0.072, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06843657817109144, |
| "grad_norm": 1.2957279682159424, |
| "learning_rate": 3.4117647058823532e-06, |
| "loss": 0.0749, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0688298918387414, |
| "grad_norm": 1.5801424980163574, |
| "learning_rate": 3.431372549019608e-06, |
| "loss": 0.0504, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06922320550639134, |
| "grad_norm": 1.6194735765457153, |
| "learning_rate": 3.450980392156863e-06, |
| "loss": 0.0396, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0696165191740413, |
| "grad_norm": 3.31343674659729, |
| "learning_rate": 3.470588235294118e-06, |
| "loss": 0.0624, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.07000983284169125, |
| "grad_norm": 2.1785762310028076, |
| "learning_rate": 3.4901960784313727e-06, |
| "loss": 0.0548, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0704031465093412, |
| "grad_norm": 1.3683737516403198, |
| "learning_rate": 3.5098039215686276e-06, |
| "loss": 0.0274, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.07079646017699115, |
| "grad_norm": 3.2981035709381104, |
| "learning_rate": 3.529411764705883e-06, |
| "loss": 0.0816, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0711897738446411, |
| "grad_norm": 2.3660190105438232, |
| "learning_rate": 3.5490196078431378e-06, |
| "loss": 0.0445, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.07158308751229105, |
| "grad_norm": 3.4103376865386963, |
| "learning_rate": 3.5686274509803926e-06, |
| "loss": 0.0959, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.071976401179941, |
| "grad_norm": 2.7939486503601074, |
| "learning_rate": 3.5882352941176475e-06, |
| "loss": 0.096, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.07236971484759096, |
| "grad_norm": 2.009209632873535, |
| "learning_rate": 3.6078431372549024e-06, |
| "loss": 0.0548, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0727630285152409, |
| "grad_norm": 1.9003010988235474, |
| "learning_rate": 3.6274509803921573e-06, |
| "loss": 0.058, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07315634218289085, |
| "grad_norm": 2.788331985473633, |
| "learning_rate": 3.6470588235294117e-06, |
| "loss": 0.0828, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.07354965585054081, |
| "grad_norm": 2.2508130073547363, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 0.089, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.07394296951819075, |
| "grad_norm": 14.532478332519531, |
| "learning_rate": 3.6862745098039223e-06, |
| "loss": 0.0878, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0743362831858407, |
| "grad_norm": 1.3768811225891113, |
| "learning_rate": 3.7058823529411767e-06, |
| "loss": 0.0534, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.07472959685349066, |
| "grad_norm": 2.9948389530181885, |
| "learning_rate": 3.7254901960784316e-06, |
| "loss": 0.0704, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0751229105211406, |
| "grad_norm": 1.4626399278640747, |
| "learning_rate": 3.7450980392156865e-06, |
| "loss": 0.0306, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.07551622418879056, |
| "grad_norm": 3.062840700149536, |
| "learning_rate": 3.7647058823529414e-06, |
| "loss": 0.0802, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.07590953785644051, |
| "grad_norm": 5.729097843170166, |
| "learning_rate": 3.7843137254901962e-06, |
| "loss": 0.1013, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.07630285152409046, |
| "grad_norm": 1.8716782331466675, |
| "learning_rate": 3.803921568627451e-06, |
| "loss": 0.0664, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07669616519174041, |
| "grad_norm": 2.058469533920288, |
| "learning_rate": 3.8235294117647055e-06, |
| "loss": 0.0683, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07708947885939037, |
| "grad_norm": 12.551715850830078, |
| "learning_rate": 3.843137254901962e-06, |
| "loss": 0.09, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07748279252704031, |
| "grad_norm": 2.2984426021575928, |
| "learning_rate": 3.862745098039216e-06, |
| "loss": 0.0672, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07787610619469026, |
| "grad_norm": 4.480764865875244, |
| "learning_rate": 3.882352941176471e-06, |
| "loss": 0.051, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.07826941986234022, |
| "grad_norm": 1.4032012224197388, |
| "learning_rate": 3.901960784313726e-06, |
| "loss": 0.0289, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07866273352999016, |
| "grad_norm": 3.133589029312134, |
| "learning_rate": 3.92156862745098e-06, |
| "loss": 0.0807, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07905604719764012, |
| "grad_norm": 4.1782307624816895, |
| "learning_rate": 3.941176470588236e-06, |
| "loss": 0.0683, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07944936086529007, |
| "grad_norm": 11.163358688354492, |
| "learning_rate": 3.96078431372549e-06, |
| "loss": 0.0421, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07984267453294001, |
| "grad_norm": 1.3736735582351685, |
| "learning_rate": 3.980392156862745e-06, |
| "loss": 0.0339, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.08023598820058997, |
| "grad_norm": 6.474332332611084, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0606, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.08062930186823992, |
| "grad_norm": 2.8827829360961914, |
| "learning_rate": 4.019607843137255e-06, |
| "loss": 0.1104, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.08102261553588987, |
| "grad_norm": 1.8476606607437134, |
| "learning_rate": 4.03921568627451e-06, |
| "loss": 0.0479, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.08141592920353982, |
| "grad_norm": 3.2202746868133545, |
| "learning_rate": 4.058823529411765e-06, |
| "loss": 0.088, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.08180924287118978, |
| "grad_norm": 3.4121432304382324, |
| "learning_rate": 4.07843137254902e-06, |
| "loss": 0.1051, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.08220255653883972, |
| "grad_norm": 2.4771883487701416, |
| "learning_rate": 4.098039215686275e-06, |
| "loss": 0.0477, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.08259587020648967, |
| "grad_norm": 2.9881558418273926, |
| "learning_rate": 4.11764705882353e-06, |
| "loss": 0.0472, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08298918387413963, |
| "grad_norm": 2.8722712993621826, |
| "learning_rate": 4.137254901960784e-06, |
| "loss": 0.0856, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.08338249754178957, |
| "grad_norm": 1.9073129892349243, |
| "learning_rate": 4.15686274509804e-06, |
| "loss": 0.0542, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.08377581120943953, |
| "grad_norm": 3.5067648887634277, |
| "learning_rate": 4.176470588235295e-06, |
| "loss": 0.0567, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.08416912487708948, |
| "grad_norm": 2.5827410221099854, |
| "learning_rate": 4.196078431372549e-06, |
| "loss": 0.1062, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.08456243854473942, |
| "grad_norm": 1.8257296085357666, |
| "learning_rate": 4.215686274509805e-06, |
| "loss": 0.0821, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08495575221238938, |
| "grad_norm": 3.9571404457092285, |
| "learning_rate": 4.235294117647059e-06, |
| "loss": 0.1143, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.08534906588003933, |
| "grad_norm": 2.6589484214782715, |
| "learning_rate": 4.254901960784314e-06, |
| "loss": 0.0814, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.08574237954768928, |
| "grad_norm": 0.915239155292511, |
| "learning_rate": 4.274509803921569e-06, |
| "loss": 0.0355, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.08613569321533923, |
| "grad_norm": 2.9066381454467773, |
| "learning_rate": 4.294117647058823e-06, |
| "loss": 0.0783, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.08652900688298919, |
| "grad_norm": 1.581722378730774, |
| "learning_rate": 4.313725490196079e-06, |
| "loss": 0.0589, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08692232055063913, |
| "grad_norm": 2.2173354625701904, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.0791, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.08731563421828908, |
| "grad_norm": 1.784740686416626, |
| "learning_rate": 4.352941176470588e-06, |
| "loss": 0.0616, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08770894788593904, |
| "grad_norm": 1.9993363618850708, |
| "learning_rate": 4.372549019607844e-06, |
| "loss": 0.0864, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08810226155358898, |
| "grad_norm": 4.089532375335693, |
| "learning_rate": 4.392156862745098e-06, |
| "loss": 0.0982, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 2.5914440155029297, |
| "learning_rate": 4.411764705882353e-06, |
| "loss": 0.0702, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 2.555253028869629, |
| "learning_rate": 4.431372549019608e-06, |
| "loss": 0.0831, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.08928220255653883, |
| "grad_norm": 2.2960548400878906, |
| "learning_rate": 4.450980392156863e-06, |
| "loss": 0.0641, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08967551622418879, |
| "grad_norm": 1.402106761932373, |
| "learning_rate": 4.4705882352941184e-06, |
| "loss": 0.0594, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.09006882989183874, |
| "grad_norm": 3.1225955486297607, |
| "learning_rate": 4.490196078431373e-06, |
| "loss": 0.1042, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.09046214355948869, |
| "grad_norm": 1.7568937540054321, |
| "learning_rate": 4.509803921568628e-06, |
| "loss": 0.0689, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09085545722713864, |
| "grad_norm": 2.8846213817596436, |
| "learning_rate": 4.529411764705883e-06, |
| "loss": 0.0955, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0912487708947886, |
| "grad_norm": 4.436802387237549, |
| "learning_rate": 4.549019607843138e-06, |
| "loss": 0.1668, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.09164208456243854, |
| "grad_norm": 2.784074068069458, |
| "learning_rate": 4.568627450980392e-06, |
| "loss": 0.083, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.0920353982300885, |
| "grad_norm": 2.276759147644043, |
| "learning_rate": 4.588235294117647e-06, |
| "loss": 0.0725, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.09242871189773845, |
| "grad_norm": 2.5278875827789307, |
| "learning_rate": 4.607843137254902e-06, |
| "loss": 0.0744, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09282202556538839, |
| "grad_norm": 1.711602807044983, |
| "learning_rate": 4.627450980392157e-06, |
| "loss": 0.0749, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.09321533923303835, |
| "grad_norm": 1.4517807960510254, |
| "learning_rate": 4.647058823529412e-06, |
| "loss": 0.0587, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0936086529006883, |
| "grad_norm": 1.090840220451355, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.0719, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.09400196656833824, |
| "grad_norm": 1.8589414358139038, |
| "learning_rate": 4.686274509803922e-06, |
| "loss": 0.0563, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.0943952802359882, |
| "grad_norm": 2.264702081680298, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 0.0648, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09478859390363815, |
| "grad_norm": 1.4464210271835327, |
| "learning_rate": 4.725490196078431e-06, |
| "loss": 0.0238, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0951819075712881, |
| "grad_norm": 1.9937217235565186, |
| "learning_rate": 4.745098039215687e-06, |
| "loss": 0.0493, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.09557522123893805, |
| "grad_norm": 2.2047340869903564, |
| "learning_rate": 4.764705882352941e-06, |
| "loss": 0.091, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.09596853490658801, |
| "grad_norm": 4.057810306549072, |
| "learning_rate": 4.784313725490196e-06, |
| "loss": 0.0938, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.09636184857423795, |
| "grad_norm": 1.6187644004821777, |
| "learning_rate": 4.803921568627452e-06, |
| "loss": 0.0673, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.0967551622418879, |
| "grad_norm": 2.7249605655670166, |
| "learning_rate": 4.823529411764706e-06, |
| "loss": 0.0848, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.09714847590953786, |
| "grad_norm": 1.7594577074050903, |
| "learning_rate": 4.8431372549019614e-06, |
| "loss": 0.0594, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0975417895771878, |
| "grad_norm": 2.6266980171203613, |
| "learning_rate": 4.862745098039216e-06, |
| "loss": 0.0866, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09793510324483776, |
| "grad_norm": 3.3526737689971924, |
| "learning_rate": 4.882352941176471e-06, |
| "loss": 0.1115, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09832841691248771, |
| "grad_norm": 2.7514872550964355, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 0.0694, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09872173058013765, |
| "grad_norm": 2.44143009185791, |
| "learning_rate": 4.921568627450981e-06, |
| "loss": 0.0715, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.09911504424778761, |
| "grad_norm": 2.214268207550049, |
| "learning_rate": 4.941176470588236e-06, |
| "loss": 0.0576, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.09950835791543756, |
| "grad_norm": 1.7012481689453125, |
| "learning_rate": 4.960784313725491e-06, |
| "loss": 0.0754, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0999016715830875, |
| "grad_norm": 1.8335487842559814, |
| "learning_rate": 4.980392156862746e-06, |
| "loss": 0.0617, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.10029498525073746, |
| "grad_norm": 2.3848774433135986, |
| "learning_rate": 5e-06, |
| "loss": 0.1011, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.10068829891838742, |
| "grad_norm": 2.1847634315490723, |
| "learning_rate": 4.999997641274725e-06, |
| "loss": 0.0793, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.10108161258603736, |
| "grad_norm": 1.5467146635055542, |
| "learning_rate": 4.999990565103349e-06, |
| "loss": 0.0685, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.10147492625368731, |
| "grad_norm": 1.5211800336837769, |
| "learning_rate": 4.999978771499224e-06, |
| "loss": 0.0453, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.10186823992133727, |
| "grad_norm": 1.944356918334961, |
| "learning_rate": 4.999962260484607e-06, |
| "loss": 0.0726, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.10226155358898721, |
| "grad_norm": 2.206536054611206, |
| "learning_rate": 4.999941032090652e-06, |
| "loss": 0.0963, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10265486725663717, |
| "grad_norm": 0.9998722076416016, |
| "learning_rate": 4.999915086357417e-06, |
| "loss": 0.0425, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.10304818092428712, |
| "grad_norm": 2.102257013320923, |
| "learning_rate": 4.99988442333386e-06, |
| "loss": 0.0857, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.10344149459193706, |
| "grad_norm": 2.055304765701294, |
| "learning_rate": 4.999849043077843e-06, |
| "loss": 0.058, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.10383480825958702, |
| "grad_norm": 2.11883544921875, |
| "learning_rate": 4.999808945656128e-06, |
| "loss": 0.1135, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.10422812192723697, |
| "grad_norm": 1.4651076793670654, |
| "learning_rate": 4.999764131144377e-06, |
| "loss": 0.0609, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.10462143559488692, |
| "grad_norm": 1.3278563022613525, |
| "learning_rate": 4.999714599627155e-06, |
| "loss": 0.0506, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.10501474926253687, |
| "grad_norm": 3.376959800720215, |
| "learning_rate": 4.999660351197926e-06, |
| "loss": 0.0505, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.10540806293018683, |
| "grad_norm": 14.901459693908691, |
| "learning_rate": 4.999601385959056e-06, |
| "loss": 0.0717, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.10580137659783677, |
| "grad_norm": 1.7644176483154297, |
| "learning_rate": 4.999537704021812e-06, |
| "loss": 0.1109, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.10619469026548672, |
| "grad_norm": 1.3101154565811157, |
| "learning_rate": 4.99946930550636e-06, |
| "loss": 0.0433, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10658800393313668, |
| "grad_norm": 3.403160572052002, |
| "learning_rate": 4.999396190541766e-06, |
| "loss": 0.1082, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.10698131760078662, |
| "grad_norm": 2.1354033946990967, |
| "learning_rate": 4.999318359265998e-06, |
| "loss": 0.0698, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.10737463126843658, |
| "grad_norm": 1.1540406942367554, |
| "learning_rate": 4.999235811825921e-06, |
| "loss": 0.0857, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.10776794493608653, |
| "grad_norm": 1.4908989667892456, |
| "learning_rate": 4.9991485483773e-06, |
| "loss": 0.0627, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.10816125860373647, |
| "grad_norm": 1.5307058095932007, |
| "learning_rate": 4.999056569084801e-06, |
| "loss": 0.0555, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10855457227138643, |
| "grad_norm": 2.4000704288482666, |
| "learning_rate": 4.998959874121986e-06, |
| "loss": 0.068, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.10894788593903638, |
| "grad_norm": 1.2169445753097534, |
| "learning_rate": 4.998858463671316e-06, |
| "loss": 0.0716, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10934119960668633, |
| "grad_norm": 1.496738076210022, |
| "learning_rate": 4.998752337924152e-06, |
| "loss": 0.063, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10973451327433628, |
| "grad_norm": 1.3070656061172485, |
| "learning_rate": 4.998641497080749e-06, |
| "loss": 0.0444, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.11012782694198624, |
| "grad_norm": 3.1283788681030273, |
| "learning_rate": 4.998525941350264e-06, |
| "loss": 0.1097, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11052114060963618, |
| "grad_norm": 2.3517940044403076, |
| "learning_rate": 4.998405670950747e-06, |
| "loss": 0.0778, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.11091445427728613, |
| "grad_norm": 1.4366756677627563, |
| "learning_rate": 4.998280686109146e-06, |
| "loss": 0.0645, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.11130776794493609, |
| "grad_norm": 1.5536798238754272, |
| "learning_rate": 4.998150987061304e-06, |
| "loss": 0.0483, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.11170108161258603, |
| "grad_norm": 2.191906690597534, |
| "learning_rate": 4.9980165740519625e-06, |
| "loss": 0.061, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.11209439528023599, |
| "grad_norm": 2.2331135272979736, |
| "learning_rate": 4.997877447334754e-06, |
| "loss": 0.073, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.11248770894788594, |
| "grad_norm": 2.7030222415924072, |
| "learning_rate": 4.99773360717221e-06, |
| "loss": 0.0924, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.11288102261553588, |
| "grad_norm": 1.2399053573608398, |
| "learning_rate": 4.997585053835754e-06, |
| "loss": 0.0603, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.11327433628318584, |
| "grad_norm": 1.5186935663223267, |
| "learning_rate": 4.997431787605701e-06, |
| "loss": 0.0733, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1136676499508358, |
| "grad_norm": 5.53955078125, |
| "learning_rate": 4.997273808771263e-06, |
| "loss": 0.0735, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.11406096361848574, |
| "grad_norm": 1.861646294593811, |
| "learning_rate": 4.997111117630543e-06, |
| "loss": 0.0365, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11445427728613569, |
| "grad_norm": 1.5158923864364624, |
| "learning_rate": 4.996943714490535e-06, |
| "loss": 0.0598, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.11484759095378565, |
| "grad_norm": 3.7808361053466797, |
| "learning_rate": 4.996771599667126e-06, |
| "loss": 0.09, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.11524090462143559, |
| "grad_norm": 1.3470269441604614, |
| "learning_rate": 4.996594773485093e-06, |
| "loss": 0.0304, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.11563421828908554, |
| "grad_norm": 2.0843825340270996, |
| "learning_rate": 4.996413236278104e-06, |
| "loss": 0.0556, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1160275319567355, |
| "grad_norm": 1.6657154560089111, |
| "learning_rate": 4.996226988388716e-06, |
| "loss": 0.0628, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.11642084562438544, |
| "grad_norm": 1.9300707578659058, |
| "learning_rate": 4.9960360301683755e-06, |
| "loss": 0.0701, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1168141592920354, |
| "grad_norm": 1.6507627964019775, |
| "learning_rate": 4.995840361977416e-06, |
| "loss": 0.0783, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.11720747295968535, |
| "grad_norm": 1.9679419994354248, |
| "learning_rate": 4.995639984185059e-06, |
| "loss": 0.0714, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1176007866273353, |
| "grad_norm": 1.7199714183807373, |
| "learning_rate": 4.9954348971694146e-06, |
| "loss": 0.046, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.11799410029498525, |
| "grad_norm": 1.3099826574325562, |
| "learning_rate": 4.995225101317478e-06, |
| "loss": 0.0542, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1183874139626352, |
| "grad_norm": 1.4102526903152466, |
| "learning_rate": 4.99501059702513e-06, |
| "loss": 0.07, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.11878072763028515, |
| "grad_norm": 2.6054928302764893, |
| "learning_rate": 4.9947913846971345e-06, |
| "loss": 0.0753, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1191740412979351, |
| "grad_norm": 2.4399526119232178, |
| "learning_rate": 4.994567464747141e-06, |
| "loss": 0.1051, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11956735496558506, |
| "grad_norm": 3.065548896789551, |
| "learning_rate": 4.994338837597683e-06, |
| "loss": 0.0955, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.119960668633235, |
| "grad_norm": 1.3317792415618896, |
| "learning_rate": 4.994105503680176e-06, |
| "loss": 0.0595, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.12035398230088495, |
| "grad_norm": 1.5237491130828857, |
| "learning_rate": 4.993867463434916e-06, |
| "loss": 0.0909, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.12074729596853491, |
| "grad_norm": 0.8940740823745728, |
| "learning_rate": 4.9936247173110785e-06, |
| "loss": 0.0628, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.12114060963618485, |
| "grad_norm": 2.6642251014709473, |
| "learning_rate": 4.993377265766723e-06, |
| "loss": 0.0679, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1215339233038348, |
| "grad_norm": 2.868943452835083, |
| "learning_rate": 4.993125109268784e-06, |
| "loss": 0.047, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.12192723697148476, |
| "grad_norm": 1.1550475358963013, |
| "learning_rate": 4.992868248293077e-06, |
| "loss": 0.0771, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1223205506391347, |
| "grad_norm": 1.7380859851837158, |
| "learning_rate": 4.9926066833242926e-06, |
| "loss": 0.0573, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.12271386430678466, |
| "grad_norm": 1.8886913061141968, |
| "learning_rate": 4.9923404148559995e-06, |
| "loss": 0.1034, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.12310717797443461, |
| "grad_norm": 1.5682885646820068, |
| "learning_rate": 4.992069443390641e-06, |
| "loss": 0.0595, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.12350049164208456, |
| "grad_norm": 2.2674522399902344, |
| "learning_rate": 4.991793769439534e-06, |
| "loss": 0.0855, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.12389380530973451, |
| "grad_norm": 1.3800448179244995, |
| "learning_rate": 4.991513393522871e-06, |
| "loss": 0.0537, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12428711897738447, |
| "grad_norm": 1.9727108478546143, |
| "learning_rate": 4.991228316169715e-06, |
| "loss": 0.0698, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.12468043264503441, |
| "grad_norm": 1.1997886896133423, |
| "learning_rate": 4.990938537918001e-06, |
| "loss": 0.0513, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.12507374631268436, |
| "grad_norm": 1.0357115268707275, |
| "learning_rate": 4.990644059314536e-06, |
| "loss": 0.0537, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1254670599803343, |
| "grad_norm": 2.9861936569213867, |
| "learning_rate": 4.990344880914994e-06, |
| "loss": 0.0836, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.12586037364798427, |
| "grad_norm": 1.0183316469192505, |
| "learning_rate": 4.990041003283921e-06, |
| "loss": 0.0595, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12625368731563422, |
| "grad_norm": 3.085170269012451, |
| "learning_rate": 4.989732426994725e-06, |
| "loss": 0.1097, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.12664700098328416, |
| "grad_norm": 1.6864210367202759, |
| "learning_rate": 4.989419152629685e-06, |
| "loss": 0.0546, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.12704031465093413, |
| "grad_norm": 1.678736686706543, |
| "learning_rate": 4.9891011807799435e-06, |
| "loss": 0.0436, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.12743362831858407, |
| "grad_norm": 1.6153947114944458, |
| "learning_rate": 4.988778512045507e-06, |
| "loss": 0.0885, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.127826941986234, |
| "grad_norm": 2.239644765853882, |
| "learning_rate": 4.9884511470352456e-06, |
| "loss": 0.0841, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12822025565388398, |
| "grad_norm": 2.258629560470581, |
| "learning_rate": 4.9881190863668895e-06, |
| "loss": 0.0547, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.12861356932153392, |
| "grad_norm": 1.519643783569336, |
| "learning_rate": 4.98778233066703e-06, |
| "loss": 0.076, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.12900688298918386, |
| "grad_norm": 2.382768154144287, |
| "learning_rate": 4.987440880571121e-06, |
| "loss": 0.0754, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.12940019665683383, |
| "grad_norm": 1.1717922687530518, |
| "learning_rate": 4.98709473672347e-06, |
| "loss": 0.0431, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.12979351032448377, |
| "grad_norm": 2.597674608230591, |
| "learning_rate": 4.986743899777244e-06, |
| "loss": 0.0831, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13018682399213372, |
| "grad_norm": 2.2018444538116455, |
| "learning_rate": 4.986388370394466e-06, |
| "loss": 0.0967, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.13058013765978368, |
| "grad_norm": 2.4188756942749023, |
| "learning_rate": 4.986028149246013e-06, |
| "loss": 0.0706, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.13097345132743363, |
| "grad_norm": 1.3178000450134277, |
| "learning_rate": 4.985663237011614e-06, |
| "loss": 0.0814, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.13136676499508357, |
| "grad_norm": 1.007521390914917, |
| "learning_rate": 4.985293634379852e-06, |
| "loss": 0.0518, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.13176007866273354, |
| "grad_norm": 2.3999087810516357, |
| "learning_rate": 4.984919342048159e-06, |
| "loss": 0.0526, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.13215339233038348, |
| "grad_norm": 2.07135272026062, |
| "learning_rate": 4.984540360722819e-06, |
| "loss": 0.0493, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.13254670599803342, |
| "grad_norm": 1.2785420417785645, |
| "learning_rate": 4.98415669111896e-06, |
| "loss": 0.0671, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1329400196656834, |
| "grad_norm": 1.264936089515686, |
| "learning_rate": 4.9837683339605615e-06, |
| "loss": 0.0619, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 2.3385870456695557, |
| "learning_rate": 4.983375289980443e-06, |
| "loss": 0.1164, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.13372664700098327, |
| "grad_norm": 2.5312047004699707, |
| "learning_rate": 4.982977559920273e-06, |
| "loss": 0.1017, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13411996066863324, |
| "grad_norm": 1.6104050874710083, |
| "learning_rate": 4.982575144530559e-06, |
| "loss": 0.0647, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.13451327433628318, |
| "grad_norm": 1.557822346687317, |
| "learning_rate": 4.982168044570652e-06, |
| "loss": 0.0546, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.13490658800393313, |
| "grad_norm": 1.430794596672058, |
| "learning_rate": 4.981756260808741e-06, |
| "loss": 0.0553, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1352999016715831, |
| "grad_norm": 1.718525767326355, |
| "learning_rate": 4.981339794021853e-06, |
| "loss": 0.0633, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.13569321533923304, |
| "grad_norm": 0.9465076327323914, |
| "learning_rate": 4.9809186449958536e-06, |
| "loss": 0.0468, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13608652900688298, |
| "grad_norm": 1.7588387727737427, |
| "learning_rate": 4.980492814525442e-06, |
| "loss": 0.0687, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.13647984267453295, |
| "grad_norm": 1.392269492149353, |
| "learning_rate": 4.980062303414152e-06, |
| "loss": 0.0363, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1368731563421829, |
| "grad_norm": 2.146742582321167, |
| "learning_rate": 4.97962711247435e-06, |
| "loss": 0.0604, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.13726647000983283, |
| "grad_norm": 2.926267385482788, |
| "learning_rate": 4.979187242527233e-06, |
| "loss": 0.086, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1376597836774828, |
| "grad_norm": 1.9409819841384888, |
| "learning_rate": 4.978742694402825e-06, |
| "loss": 0.0588, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13805309734513274, |
| "grad_norm": 1.8433561325073242, |
| "learning_rate": 4.978293468939982e-06, |
| "loss": 0.0676, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.13844641101278268, |
| "grad_norm": 2.0934383869171143, |
| "learning_rate": 4.977839566986382e-06, |
| "loss": 0.0713, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.13883972468043265, |
| "grad_norm": 1.8030976057052612, |
| "learning_rate": 4.977380989398529e-06, |
| "loss": 0.1169, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1392330383480826, |
| "grad_norm": 2.014277935028076, |
| "learning_rate": 4.976917737041751e-06, |
| "loss": 0.0376, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.13962635201573254, |
| "grad_norm": 1.3366997241973877, |
| "learning_rate": 4.976449810790196e-06, |
| "loss": 0.0644, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1400196656833825, |
| "grad_norm": 1.63720703125, |
| "learning_rate": 4.97597721152683e-06, |
| "loss": 0.067, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.14041297935103245, |
| "grad_norm": 2.317793846130371, |
| "learning_rate": 4.975499940143439e-06, |
| "loss": 0.0732, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1408062930186824, |
| "grad_norm": 1.352824330329895, |
| "learning_rate": 4.975017997540625e-06, |
| "loss": 0.0721, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.14119960668633236, |
| "grad_norm": 1.2860400676727295, |
| "learning_rate": 4.974531384627805e-06, |
| "loss": 0.0604, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 2.315216064453125, |
| "learning_rate": 4.974040102323207e-06, |
| "loss": 0.0492, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14198623402163224, |
| "grad_norm": 1.771453857421875, |
| "learning_rate": 4.973544151553869e-06, |
| "loss": 0.0554, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.1423795476892822, |
| "grad_norm": 0.9052230715751648, |
| "learning_rate": 4.973043533255645e-06, |
| "loss": 0.0524, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.14277286135693215, |
| "grad_norm": 2.327606439590454, |
| "learning_rate": 4.972538248373188e-06, |
| "loss": 0.0583, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.1431661750245821, |
| "grad_norm": 2.986643075942993, |
| "learning_rate": 4.9720282978599625e-06, |
| "loss": 0.0726, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.14355948869223206, |
| "grad_norm": 1.1824491024017334, |
| "learning_rate": 4.971513682678234e-06, |
| "loss": 0.0749, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.143952802359882, |
| "grad_norm": 3.0968868732452393, |
| "learning_rate": 4.970994403799072e-06, |
| "loss": 0.0547, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.14434611602753195, |
| "grad_norm": 1.2194032669067383, |
| "learning_rate": 4.970470462202343e-06, |
| "loss": 0.0651, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.14473942969518191, |
| "grad_norm": 1.3438714742660522, |
| "learning_rate": 4.969941858876719e-06, |
| "loss": 0.0416, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.14513274336283186, |
| "grad_norm": 1.4193546772003174, |
| "learning_rate": 4.96940859481966e-06, |
| "loss": 0.06, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1455260570304818, |
| "grad_norm": 1.2842000722885132, |
| "learning_rate": 4.968870671037427e-06, |
| "loss": 0.0598, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14591937069813177, |
| "grad_norm": 2.3905892372131348, |
| "learning_rate": 4.96832808854507e-06, |
| "loss": 0.0652, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1463126843657817, |
| "grad_norm": 1.5380994081497192, |
| "learning_rate": 4.967780848366432e-06, |
| "loss": 0.1034, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.14670599803343165, |
| "grad_norm": 1.3698018789291382, |
| "learning_rate": 4.967228951534144e-06, |
| "loss": 0.0695, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.14709931170108162, |
| "grad_norm": 1.6553199291229248, |
| "learning_rate": 4.966672399089626e-06, |
| "loss": 0.0358, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.14749262536873156, |
| "grad_norm": 1.966484546661377, |
| "learning_rate": 4.966111192083081e-06, |
| "loss": 0.0396, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1478859390363815, |
| "grad_norm": 1.1057041883468628, |
| "learning_rate": 4.965545331573493e-06, |
| "loss": 0.0294, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.14827925270403147, |
| "grad_norm": 1.3603320121765137, |
| "learning_rate": 4.964974818628633e-06, |
| "loss": 0.0431, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1486725663716814, |
| "grad_norm": 3.8050637245178223, |
| "learning_rate": 4.964399654325045e-06, |
| "loss": 0.063, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.14906588003933136, |
| "grad_norm": 1.361873984336853, |
| "learning_rate": 4.963819839748055e-06, |
| "loss": 0.0258, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14945919370698132, |
| "grad_norm": 1.0739333629608154, |
| "learning_rate": 4.96323537599176e-06, |
| "loss": 0.0553, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14985250737463127, |
| "grad_norm": 1.5606439113616943, |
| "learning_rate": 4.962646264159031e-06, |
| "loss": 0.0341, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1502458210422812, |
| "grad_norm": 1.526953101158142, |
| "learning_rate": 4.962052505361512e-06, |
| "loss": 0.0693, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.15063913470993118, |
| "grad_norm": 3.761380195617676, |
| "learning_rate": 4.9614541007196136e-06, |
| "loss": 0.0685, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.15103244837758112, |
| "grad_norm": 2.7432498931884766, |
| "learning_rate": 4.960851051362514e-06, |
| "loss": 0.0501, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.15142576204523106, |
| "grad_norm": 2.669240951538086, |
| "learning_rate": 4.960243358428154e-06, |
| "loss": 0.1198, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.15181907571288103, |
| "grad_norm": 1.5905970335006714, |
| "learning_rate": 4.959631023063238e-06, |
| "loss": 0.0803, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.15221238938053097, |
| "grad_norm": 1.1858878135681152, |
| "learning_rate": 4.959014046423233e-06, |
| "loss": 0.0654, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1526057030481809, |
| "grad_norm": 1.7795485258102417, |
| "learning_rate": 4.9583924296723606e-06, |
| "loss": 0.0598, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.15299901671583088, |
| "grad_norm": 1.2830811738967896, |
| "learning_rate": 4.957766173983598e-06, |
| "loss": 0.0437, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.15339233038348082, |
| "grad_norm": 0.8960599303245544, |
| "learning_rate": 4.9571352805386795e-06, |
| "loss": 0.0455, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15378564405113077, |
| "grad_norm": 2.005126714706421, |
| "learning_rate": 4.956499750528086e-06, |
| "loss": 0.0755, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.15417895771878073, |
| "grad_norm": 1.5545151233673096, |
| "learning_rate": 4.955859585151054e-06, |
| "loss": 0.0449, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.15457227138643068, |
| "grad_norm": 1.0876412391662598, |
| "learning_rate": 4.955214785615558e-06, |
| "loss": 0.0718, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.15496558505408062, |
| "grad_norm": 1.9705466032028198, |
| "learning_rate": 4.9545653531383255e-06, |
| "loss": 0.0612, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1553588987217306, |
| "grad_norm": 1.3790346384048462, |
| "learning_rate": 4.953911288944821e-06, |
| "loss": 0.0371, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.15575221238938053, |
| "grad_norm": 1.0736052989959717, |
| "learning_rate": 4.953252594269252e-06, |
| "loss": 0.056, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.15614552605703047, |
| "grad_norm": 1.919756531715393, |
| "learning_rate": 4.9525892703545604e-06, |
| "loss": 0.0737, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.15653883972468044, |
| "grad_norm": 1.333601713180542, |
| "learning_rate": 4.951921318452428e-06, |
| "loss": 0.0628, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.15693215339233038, |
| "grad_norm": 1.5093313455581665, |
| "learning_rate": 4.951248739823264e-06, |
| "loss": 0.0677, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.15732546705998032, |
| "grad_norm": 1.5697554349899292, |
| "learning_rate": 4.950571535736214e-06, |
| "loss": 0.0672, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1577187807276303, |
| "grad_norm": 1.4692028760910034, |
| "learning_rate": 4.949889707469145e-06, |
| "loss": 0.0472, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.15811209439528023, |
| "grad_norm": 0.9199762940406799, |
| "learning_rate": 4.949203256308658e-06, |
| "loss": 0.0661, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.15850540806293018, |
| "grad_norm": 1.4585742950439453, |
| "learning_rate": 4.948512183550068e-06, |
| "loss": 0.0776, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.15889872173058014, |
| "grad_norm": 1.2560405731201172, |
| "learning_rate": 4.947816490497419e-06, |
| "loss": 0.0932, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1592920353982301, |
| "grad_norm": 1.6395833492279053, |
| "learning_rate": 4.947116178463469e-06, |
| "loss": 0.0399, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.15968534906588003, |
| "grad_norm": 0.8655360341072083, |
| "learning_rate": 4.946411248769693e-06, |
| "loss": 0.0421, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.16007866273353, |
| "grad_norm": 0.9741353392601013, |
| "learning_rate": 4.945701702746279e-06, |
| "loss": 0.0469, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.16047197640117994, |
| "grad_norm": 0.9401141405105591, |
| "learning_rate": 4.944987541732126e-06, |
| "loss": 0.0668, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.16086529006882988, |
| "grad_norm": 0.8718335032463074, |
| "learning_rate": 4.944268767074842e-06, |
| "loss": 0.0597, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.16125860373647985, |
| "grad_norm": 1.3456203937530518, |
| "learning_rate": 4.943545380130742e-06, |
| "loss": 0.0755, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1616519174041298, |
| "grad_norm": 1.1579302549362183, |
| "learning_rate": 4.942817382264842e-06, |
| "loss": 0.0583, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.16204523107177973, |
| "grad_norm": 1.664872169494629, |
| "learning_rate": 4.942084774850858e-06, |
| "loss": 0.0777, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1624385447394297, |
| "grad_norm": 2.256772518157959, |
| "learning_rate": 4.941347559271208e-06, |
| "loss": 0.0734, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.16283185840707964, |
| "grad_norm": 1.235349416732788, |
| "learning_rate": 4.9406057369170015e-06, |
| "loss": 0.051, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.16322517207472959, |
| "grad_norm": 1.6716983318328857, |
| "learning_rate": 4.939859309188044e-06, |
| "loss": 0.0728, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.16361848574237955, |
| "grad_norm": 1.3591656684875488, |
| "learning_rate": 4.939108277492829e-06, |
| "loss": 0.0725, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1640117994100295, |
| "grad_norm": 0.6709238886833191, |
| "learning_rate": 4.9383526432485375e-06, |
| "loss": 0.0452, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.16440511307767944, |
| "grad_norm": 1.2356040477752686, |
| "learning_rate": 4.937592407881039e-06, |
| "loss": 0.0682, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.1647984267453294, |
| "grad_norm": 1.0750470161437988, |
| "learning_rate": 4.93682757282488e-06, |
| "loss": 0.0383, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.16519174041297935, |
| "grad_norm": 1.5483283996582031, |
| "learning_rate": 4.936058139523291e-06, |
| "loss": 0.0645, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1655850540806293, |
| "grad_norm": 2.0328383445739746, |
| "learning_rate": 4.935284109428177e-06, |
| "loss": 0.0623, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.16597836774827926, |
| "grad_norm": 1.5979444980621338, |
| "learning_rate": 4.934505484000116e-06, |
| "loss": 0.0751, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1663716814159292, |
| "grad_norm": 1.1430745124816895, |
| "learning_rate": 4.93372226470836e-06, |
| "loss": 0.0542, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.16676499508357914, |
| "grad_norm": 2.062899112701416, |
| "learning_rate": 4.932934453030829e-06, |
| "loss": 0.0873, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.1671583087512291, |
| "grad_norm": 3.2697086334228516, |
| "learning_rate": 4.932142050454107e-06, |
| "loss": 0.0733, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.16755162241887905, |
| "grad_norm": 1.2826026678085327, |
| "learning_rate": 4.931345058473443e-06, |
| "loss": 0.0497, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.167944936086529, |
| "grad_norm": 2.3819937705993652, |
| "learning_rate": 4.930543478592743e-06, |
| "loss": 0.0789, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.16833824975417896, |
| "grad_norm": 2.840121030807495, |
| "learning_rate": 4.929737312324574e-06, |
| "loss": 0.054, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1687315634218289, |
| "grad_norm": 0.6918103098869324, |
| "learning_rate": 4.928926561190155e-06, |
| "loss": 0.0448, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.16912487708947885, |
| "grad_norm": 0.8336203694343567, |
| "learning_rate": 4.928111226719359e-06, |
| "loss": 0.0629, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16951819075712882, |
| "grad_norm": 1.9415661096572876, |
| "learning_rate": 4.927291310450705e-06, |
| "loss": 0.0731, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.16991150442477876, |
| "grad_norm": 1.3499138355255127, |
| "learning_rate": 4.926466813931358e-06, |
| "loss": 0.0562, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1703048180924287, |
| "grad_norm": 1.0689488649368286, |
| "learning_rate": 4.925637738717127e-06, |
| "loss": 0.0706, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.17069813176007867, |
| "grad_norm": 2.7924535274505615, |
| "learning_rate": 4.924804086372462e-06, |
| "loss": 0.0671, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1710914454277286, |
| "grad_norm": 0.8586186170578003, |
| "learning_rate": 4.9239658584704466e-06, |
| "loss": 0.049, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.17148475909537855, |
| "grad_norm": 1.8235011100769043, |
| "learning_rate": 4.923123056592801e-06, |
| "loss": 0.0715, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.17187807276302852, |
| "grad_norm": 1.1591852903366089, |
| "learning_rate": 4.922275682329876e-06, |
| "loss": 0.0799, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.17227138643067846, |
| "grad_norm": 1.2786961793899536, |
| "learning_rate": 4.921423737280649e-06, |
| "loss": 0.0561, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1726647000983284, |
| "grad_norm": 1.602005958557129, |
| "learning_rate": 4.9205672230527254e-06, |
| "loss": 0.0517, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.17305801376597837, |
| "grad_norm": 1.3069565296173096, |
| "learning_rate": 4.919706141262329e-06, |
| "loss": 0.063, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17345132743362832, |
| "grad_norm": 1.4721592664718628, |
| "learning_rate": 4.918840493534305e-06, |
| "loss": 0.0789, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.17384464110127826, |
| "grad_norm": 2.0551934242248535, |
| "learning_rate": 4.917970281502112e-06, |
| "loss": 0.0711, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.17423795476892823, |
| "grad_norm": 1.175560474395752, |
| "learning_rate": 4.917095506807824e-06, |
| "loss": 0.0646, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.17463126843657817, |
| "grad_norm": 1.3429381847381592, |
| "learning_rate": 4.916216171102124e-06, |
| "loss": 0.0609, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.1750245821042281, |
| "grad_norm": 1.306825041770935, |
| "learning_rate": 4.9153322760443015e-06, |
| "loss": 0.0529, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.17541789577187808, |
| "grad_norm": 1.4618321657180786, |
| "learning_rate": 4.914443823302246e-06, |
| "loss": 0.0509, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.17581120943952802, |
| "grad_norm": 1.054541826248169, |
| "learning_rate": 4.913550814552454e-06, |
| "loss": 0.0613, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.17620452310717796, |
| "grad_norm": 0.9349273443222046, |
| "learning_rate": 4.912653251480013e-06, |
| "loss": 0.0531, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.17659783677482793, |
| "grad_norm": 1.302675724029541, |
| "learning_rate": 4.9117511357786075e-06, |
| "loss": 0.0661, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 2.327521562576294, |
| "learning_rate": 4.910844469150512e-06, |
| "loss": 0.08, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17738446411012782, |
| "grad_norm": 1.7499988079071045, |
| "learning_rate": 4.909933253306588e-06, |
| "loss": 0.0368, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 1.1263257265090942, |
| "learning_rate": 4.909017489966283e-06, |
| "loss": 0.0322, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.17817109144542773, |
| "grad_norm": 2.8002772331237793, |
| "learning_rate": 4.9080971808576226e-06, |
| "loss": 0.0597, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.17856440511307767, |
| "grad_norm": 2.0555684566497803, |
| "learning_rate": 4.907172327717214e-06, |
| "loss": 0.0754, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.17895771878072764, |
| "grad_norm": 2.3041601181030273, |
| "learning_rate": 4.906242932290234e-06, |
| "loss": 0.0838, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.17935103244837758, |
| "grad_norm": 2.3882484436035156, |
| "learning_rate": 4.905308996330437e-06, |
| "loss": 0.063, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.17974434611602752, |
| "grad_norm": 1.4339286088943481, |
| "learning_rate": 4.904370521600138e-06, |
| "loss": 0.0723, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.1801376597836775, |
| "grad_norm": 1.387052059173584, |
| "learning_rate": 4.903427509870222e-06, |
| "loss": 0.0708, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.18053097345132743, |
| "grad_norm": 0.8694115877151489, |
| "learning_rate": 4.902479962920134e-06, |
| "loss": 0.0519, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.18092428711897737, |
| "grad_norm": 1.0308964252471924, |
| "learning_rate": 4.901527882537876e-06, |
| "loss": 0.054, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.18131760078662734, |
| "grad_norm": 2.4914846420288086, |
| "learning_rate": 4.900571270520004e-06, |
| "loss": 0.115, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.18171091445427728, |
| "grad_norm": 2.637059450149536, |
| "learning_rate": 4.899610128671626e-06, |
| "loss": 0.0851, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.18210422812192723, |
| "grad_norm": 1.9722718000411987, |
| "learning_rate": 4.898644458806398e-06, |
| "loss": 0.0637, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1824975417895772, |
| "grad_norm": 0.9795344471931458, |
| "learning_rate": 4.897674262746522e-06, |
| "loss": 0.0622, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.18289085545722714, |
| "grad_norm": 1.2904670238494873, |
| "learning_rate": 4.896699542322736e-06, |
| "loss": 0.0384, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.18328416912487708, |
| "grad_norm": 1.4417036771774292, |
| "learning_rate": 4.895720299374319e-06, |
| "loss": 0.1118, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.18367748279252705, |
| "grad_norm": 1.6243058443069458, |
| "learning_rate": 4.894736535749083e-06, |
| "loss": 0.0756, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.184070796460177, |
| "grad_norm": 1.0999799966812134, |
| "learning_rate": 4.89374825330337e-06, |
| "loss": 0.0525, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.18446411012782693, |
| "grad_norm": 1.9067320823669434, |
| "learning_rate": 4.892755453902051e-06, |
| "loss": 0.066, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.1848574237954769, |
| "grad_norm": 1.1623554229736328, |
| "learning_rate": 4.8917581394185175e-06, |
| "loss": 0.0547, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18525073746312684, |
| "grad_norm": 1.2230125665664673, |
| "learning_rate": 4.890756311734683e-06, |
| "loss": 0.0753, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.18564405113077678, |
| "grad_norm": 1.376905083656311, |
| "learning_rate": 4.8897499727409755e-06, |
| "loss": 0.0637, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.18603736479842675, |
| "grad_norm": 2.381087064743042, |
| "learning_rate": 4.888739124336338e-06, |
| "loss": 0.0818, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.1864306784660767, |
| "grad_norm": 1.5327961444854736, |
| "learning_rate": 4.8877237684282205e-06, |
| "loss": 0.0689, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.18682399213372664, |
| "grad_norm": 1.7480573654174805, |
| "learning_rate": 4.8867039069325804e-06, |
| "loss": 0.0713, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1872173058013766, |
| "grad_norm": 1.2657626867294312, |
| "learning_rate": 4.8856795417738754e-06, |
| "loss": 0.0742, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.18761061946902655, |
| "grad_norm": 1.0295419692993164, |
| "learning_rate": 4.884650674885062e-06, |
| "loss": 0.0448, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.1880039331366765, |
| "grad_norm": 1.9904601573944092, |
| "learning_rate": 4.883617308207592e-06, |
| "loss": 0.0801, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.18839724680432646, |
| "grad_norm": 1.4027286767959595, |
| "learning_rate": 4.88257944369141e-06, |
| "loss": 0.0502, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.1887905604719764, |
| "grad_norm": 2.087235689163208, |
| "learning_rate": 4.8815370832949425e-06, |
| "loss": 0.1021, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18918387413962634, |
| "grad_norm": 0.8643338680267334, |
| "learning_rate": 4.880490228985104e-06, |
| "loss": 0.0732, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1895771878072763, |
| "grad_norm": 1.4668515920639038, |
| "learning_rate": 4.8794388827372884e-06, |
| "loss": 0.0548, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.18997050147492625, |
| "grad_norm": 1.8225198984146118, |
| "learning_rate": 4.878383046535366e-06, |
| "loss": 0.0882, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1903638151425762, |
| "grad_norm": 1.6394109725952148, |
| "learning_rate": 4.877322722371677e-06, |
| "loss": 0.1029, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.19075712881022616, |
| "grad_norm": 0.9612401723861694, |
| "learning_rate": 4.876257912247033e-06, |
| "loss": 0.0442, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1911504424778761, |
| "grad_norm": 2.0715410709381104, |
| "learning_rate": 4.8751886181707105e-06, |
| "loss": 0.0793, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.19154375614552605, |
| "grad_norm": 1.14213228225708, |
| "learning_rate": 4.874114842160445e-06, |
| "loss": 0.0782, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.19193706981317601, |
| "grad_norm": 1.7314140796661377, |
| "learning_rate": 4.873036586242431e-06, |
| "loss": 0.0478, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.19233038348082596, |
| "grad_norm": 0.6948450803756714, |
| "learning_rate": 4.871953852451316e-06, |
| "loss": 0.0546, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.1927236971484759, |
| "grad_norm": 1.9421541690826416, |
| "learning_rate": 4.8708666428301975e-06, |
| "loss": 0.0793, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.19311701081612587, |
| "grad_norm": 0.5670569539070129, |
| "learning_rate": 4.869774959430619e-06, |
| "loss": 0.0506, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.1935103244837758, |
| "grad_norm": 1.437902808189392, |
| "learning_rate": 4.868678804312565e-06, |
| "loss": 0.0545, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.19390363815142575, |
| "grad_norm": 1.8984867334365845, |
| "learning_rate": 4.867578179544457e-06, |
| "loss": 0.0658, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.19429695181907572, |
| "grad_norm": 2.0684666633605957, |
| "learning_rate": 4.866473087203154e-06, |
| "loss": 0.0565, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.19469026548672566, |
| "grad_norm": 1.5473408699035645, |
| "learning_rate": 4.865363529373944e-06, |
| "loss": 0.0481, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1950835791543756, |
| "grad_norm": 1.678281545639038, |
| "learning_rate": 4.864249508150539e-06, |
| "loss": 0.056, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.19547689282202557, |
| "grad_norm": 1.3713724613189697, |
| "learning_rate": 4.863131025635076e-06, |
| "loss": 0.0474, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.1958702064896755, |
| "grad_norm": 2.0483641624450684, |
| "learning_rate": 4.862008083938109e-06, |
| "loss": 0.0712, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.19626352015732546, |
| "grad_norm": 1.701915979385376, |
| "learning_rate": 4.8608806851786075e-06, |
| "loss": 0.0642, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.19665683382497542, |
| "grad_norm": 1.4159979820251465, |
| "learning_rate": 4.859748831483949e-06, |
| "loss": 0.0706, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19705014749262537, |
| "grad_norm": 0.9921556711196899, |
| "learning_rate": 4.858612524989921e-06, |
| "loss": 0.0311, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1974434611602753, |
| "grad_norm": 0.6453993320465088, |
| "learning_rate": 4.857471767840709e-06, |
| "loss": 0.0304, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.19783677482792528, |
| "grad_norm": 2.1691184043884277, |
| "learning_rate": 4.856326562188902e-06, |
| "loss": 0.0573, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.19823008849557522, |
| "grad_norm": 1.424170732498169, |
| "learning_rate": 4.855176910195479e-06, |
| "loss": 0.0371, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.19862340216322516, |
| "grad_norm": 2.0996835231781006, |
| "learning_rate": 4.854022814029809e-06, |
| "loss": 0.06, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.19901671583087513, |
| "grad_norm": 2.2325479984283447, |
| "learning_rate": 4.852864275869652e-06, |
| "loss": 0.0686, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.19941002949852507, |
| "grad_norm": 1.8133199214935303, |
| "learning_rate": 4.851701297901144e-06, |
| "loss": 0.0811, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.199803343166175, |
| "grad_norm": 1.4886740446090698, |
| "learning_rate": 4.850533882318803e-06, |
| "loss": 0.0516, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.20019665683382498, |
| "grad_norm": 1.685327172279358, |
| "learning_rate": 4.849362031325518e-06, |
| "loss": 0.0427, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.20058997050147492, |
| "grad_norm": 2.726207733154297, |
| "learning_rate": 4.8481857471325485e-06, |
| "loss": 0.0686, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.20098328416912487, |
| "grad_norm": 1.1494991779327393, |
| "learning_rate": 4.847005031959521e-06, |
| "loss": 0.0642, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.20137659783677483, |
| "grad_norm": 2.118980884552002, |
| "learning_rate": 4.84581988803442e-06, |
| "loss": 0.0504, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.20176991150442478, |
| "grad_norm": 1.4535127878189087, |
| "learning_rate": 4.84463031759359e-06, |
| "loss": 0.0482, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.20216322517207472, |
| "grad_norm": 0.8411951065063477, |
| "learning_rate": 4.843436322881725e-06, |
| "loss": 0.0491, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2025565388397247, |
| "grad_norm": 0.9351110458374023, |
| "learning_rate": 4.8422379061518705e-06, |
| "loss": 0.0278, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.20294985250737463, |
| "grad_norm": 1.2653199434280396, |
| "learning_rate": 4.841035069665416e-06, |
| "loss": 0.0494, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.20334316617502457, |
| "grad_norm": 2.1194064617156982, |
| "learning_rate": 4.83982781569209e-06, |
| "loss": 0.0985, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.20373647984267454, |
| "grad_norm": 0.9621169567108154, |
| "learning_rate": 4.838616146509956e-06, |
| "loss": 0.0681, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.20412979351032448, |
| "grad_norm": 2.935671091079712, |
| "learning_rate": 4.83740006440541e-06, |
| "loss": 0.1056, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.20452310717797442, |
| "grad_norm": 1.5503019094467163, |
| "learning_rate": 4.8361795716731744e-06, |
| "loss": 0.0736, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2049164208456244, |
| "grad_norm": 1.5426656007766724, |
| "learning_rate": 4.8349546706162965e-06, |
| "loss": 0.0768, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.20530973451327433, |
| "grad_norm": 1.788036823272705, |
| "learning_rate": 4.833725363546139e-06, |
| "loss": 0.0785, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.20570304818092428, |
| "grad_norm": 1.3642781972885132, |
| "learning_rate": 4.8324916527823795e-06, |
| "loss": 0.0582, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.20609636184857424, |
| "grad_norm": 2.6498544216156006, |
| "learning_rate": 4.831253540653007e-06, |
| "loss": 0.068, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.20648967551622419, |
| "grad_norm": 1.3358078002929688, |
| "learning_rate": 4.8300110294943145e-06, |
| "loss": 0.0689, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.20688298918387413, |
| "grad_norm": 2.4475595951080322, |
| "learning_rate": 4.828764121650896e-06, |
| "loss": 0.0685, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2072763028515241, |
| "grad_norm": 1.8231087923049927, |
| "learning_rate": 4.827512819475641e-06, |
| "loss": 0.061, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.20766961651917404, |
| "grad_norm": 1.6098417043685913, |
| "learning_rate": 4.826257125329733e-06, |
| "loss": 0.0775, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.20806293018682398, |
| "grad_norm": 1.2955044507980347, |
| "learning_rate": 4.824997041582641e-06, |
| "loss": 0.0828, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.20845624385447395, |
| "grad_norm": 1.600419282913208, |
| "learning_rate": 4.82373257061212e-06, |
| "loss": 0.0868, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2088495575221239, |
| "grad_norm": 1.2169928550720215, |
| "learning_rate": 4.8224637148042e-06, |
| "loss": 0.0543, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.20924287118977383, |
| "grad_norm": 1.6863512992858887, |
| "learning_rate": 4.821190476553186e-06, |
| "loss": 0.0703, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2096361848574238, |
| "grad_norm": 1.9771099090576172, |
| "learning_rate": 4.819912858261656e-06, |
| "loss": 0.0799, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.21002949852507374, |
| "grad_norm": 1.276354432106018, |
| "learning_rate": 4.818630862340449e-06, |
| "loss": 0.0661, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.21042281219272368, |
| "grad_norm": 1.1068519353866577, |
| "learning_rate": 4.817344491208665e-06, |
| "loss": 0.0496, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.21081612586037365, |
| "grad_norm": 1.1699997186660767, |
| "learning_rate": 4.816053747293663e-06, |
| "loss": 0.0395, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2112094395280236, |
| "grad_norm": 1.290640115737915, |
| "learning_rate": 4.814758633031049e-06, |
| "loss": 0.0526, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.21160275319567354, |
| "grad_norm": 1.8085367679595947, |
| "learning_rate": 4.813459150864681e-06, |
| "loss": 0.0593, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2119960668633235, |
| "grad_norm": 1.6277810335159302, |
| "learning_rate": 4.812155303246653e-06, |
| "loss": 0.0645, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.21238938053097345, |
| "grad_norm": 0.9544056057929993, |
| "learning_rate": 4.810847092637301e-06, |
| "loss": 0.063, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2127826941986234, |
| "grad_norm": 1.349601149559021, |
| "learning_rate": 4.809534521505192e-06, |
| "loss": 0.0877, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.21317600786627336, |
| "grad_norm": 1.6013360023498535, |
| "learning_rate": 4.8082175923271235e-06, |
| "loss": 0.0637, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2135693215339233, |
| "grad_norm": 1.130764365196228, |
| "learning_rate": 4.806896307588113e-06, |
| "loss": 0.086, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.21396263520157324, |
| "grad_norm": 1.40028715133667, |
| "learning_rate": 4.805570669781399e-06, |
| "loss": 0.0876, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2143559488692232, |
| "grad_norm": 1.7551463842391968, |
| "learning_rate": 4.804240681408434e-06, |
| "loss": 0.0593, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.21474926253687315, |
| "grad_norm": 1.648735523223877, |
| "learning_rate": 4.802906344978881e-06, |
| "loss": 0.0772, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2151425762045231, |
| "grad_norm": 0.8385063409805298, |
| "learning_rate": 4.801567663010605e-06, |
| "loss": 0.0706, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.21553588987217306, |
| "grad_norm": 1.8120150566101074, |
| "learning_rate": 4.800224638029672e-06, |
| "loss": 0.0696, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.215929203539823, |
| "grad_norm": 0.5346795916557312, |
| "learning_rate": 4.798877272570343e-06, |
| "loss": 0.0494, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.21632251720747295, |
| "grad_norm": 1.4182865619659424, |
| "learning_rate": 4.797525569175073e-06, |
| "loss": 0.0711, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21671583087512292, |
| "grad_norm": 0.9838932752609253, |
| "learning_rate": 4.796169530394498e-06, |
| "loss": 0.0843, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.21710914454277286, |
| "grad_norm": 1.5188270807266235, |
| "learning_rate": 4.7948091587874355e-06, |
| "loss": 0.0663, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2175024582104228, |
| "grad_norm": 1.796202540397644, |
| "learning_rate": 4.793444456920881e-06, |
| "loss": 0.0655, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.21789577187807277, |
| "grad_norm": 1.4925826787948608, |
| "learning_rate": 4.7920754273699985e-06, |
| "loss": 0.0607, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2182890855457227, |
| "grad_norm": 1.2840732336044312, |
| "learning_rate": 4.790702072718121e-06, |
| "loss": 0.0634, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21868239921337265, |
| "grad_norm": 1.0566197633743286, |
| "learning_rate": 4.789324395556741e-06, |
| "loss": 0.0475, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.21907571288102262, |
| "grad_norm": 1.2299338579177856, |
| "learning_rate": 4.7879423984855085e-06, |
| "loss": 0.054, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.21946902654867256, |
| "grad_norm": 1.7808493375778198, |
| "learning_rate": 4.786556084112224e-06, |
| "loss": 0.0905, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2198623402163225, |
| "grad_norm": 1.054694652557373, |
| "learning_rate": 4.785165455052836e-06, |
| "loss": 0.0561, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.22025565388397247, |
| "grad_norm": 2.180976629257202, |
| "learning_rate": 4.783770513931433e-06, |
| "loss": 0.0705, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22064896755162242, |
| "grad_norm": 0.9467242956161499, |
| "learning_rate": 4.782371263380242e-06, |
| "loss": 0.0471, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.22104228121927236, |
| "grad_norm": 1.0072274208068848, |
| "learning_rate": 4.780967706039622e-06, |
| "loss": 0.0642, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.22143559488692233, |
| "grad_norm": 0.9987531304359436, |
| "learning_rate": 4.779559844558056e-06, |
| "loss": 0.0556, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.22182890855457227, |
| "grad_norm": 1.5135668516159058, |
| "learning_rate": 4.778147681592152e-06, |
| "loss": 0.051, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 1.6369942426681519, |
| "learning_rate": 4.776731219806634e-06, |
| "loss": 0.1089, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.22261553588987218, |
| "grad_norm": 1.8307068347930908, |
| "learning_rate": 4.775310461874337e-06, |
| "loss": 0.0555, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.22300884955752212, |
| "grad_norm": 1.2417643070220947, |
| "learning_rate": 4.773885410476202e-06, |
| "loss": 0.0356, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.22340216322517206, |
| "grad_norm": 0.8904944658279419, |
| "learning_rate": 4.7724560683012735e-06, |
| "loss": 0.0649, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.22379547689282203, |
| "grad_norm": 1.3853691816329956, |
| "learning_rate": 4.771022438046693e-06, |
| "loss": 0.0429, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.22418879056047197, |
| "grad_norm": 1.6937843561172485, |
| "learning_rate": 4.769584522417691e-06, |
| "loss": 0.0831, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.22458210422812191, |
| "grad_norm": 1.6160171031951904, |
| "learning_rate": 4.768142324127586e-06, |
| "loss": 0.0754, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.22497541789577188, |
| "grad_norm": 1.2548290491104126, |
| "learning_rate": 4.766695845897778e-06, |
| "loss": 0.073, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.22536873156342183, |
| "grad_norm": 2.645967483520508, |
| "learning_rate": 4.765245090457744e-06, |
| "loss": 0.1022, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.22576204523107177, |
| "grad_norm": 1.2090085744857788, |
| "learning_rate": 4.763790060545028e-06, |
| "loss": 0.0449, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.22615535889872174, |
| "grad_norm": 1.5384302139282227, |
| "learning_rate": 4.762330758905246e-06, |
| "loss": 0.0523, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.22654867256637168, |
| "grad_norm": 1.3840306997299194, |
| "learning_rate": 4.760867188292068e-06, |
| "loss": 0.0409, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.22694198623402162, |
| "grad_norm": 0.8169382214546204, |
| "learning_rate": 4.7593993514672255e-06, |
| "loss": 0.0526, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2273352999016716, |
| "grad_norm": 0.6939831972122192, |
| "learning_rate": 4.757927251200497e-06, |
| "loss": 0.0497, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.22772861356932153, |
| "grad_norm": 2.4073455333709717, |
| "learning_rate": 4.756450890269705e-06, |
| "loss": 0.0703, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.22812192723697147, |
| "grad_norm": 1.4490169286727905, |
| "learning_rate": 4.754970271460714e-06, |
| "loss": 0.0429, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22851524090462144, |
| "grad_norm": 0.8039276599884033, |
| "learning_rate": 4.753485397567424e-06, |
| "loss": 0.0525, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.22890855457227138, |
| "grad_norm": 0.9220805764198303, |
| "learning_rate": 4.751996271391761e-06, |
| "loss": 0.056, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.22930186823992132, |
| "grad_norm": 2.1960690021514893, |
| "learning_rate": 4.750502895743677e-06, |
| "loss": 0.0636, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2296951819075713, |
| "grad_norm": 1.5164406299591064, |
| "learning_rate": 4.749005273441143e-06, |
| "loss": 0.0557, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.23008849557522124, |
| "grad_norm": 1.8541299104690552, |
| "learning_rate": 4.747503407310142e-06, |
| "loss": 0.0679, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.23048180924287118, |
| "grad_norm": 5.52957010269165, |
| "learning_rate": 4.745997300184666e-06, |
| "loss": 0.0805, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.23087512291052115, |
| "grad_norm": 1.318687915802002, |
| "learning_rate": 4.744486954906709e-06, |
| "loss": 0.0499, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.2312684365781711, |
| "grad_norm": 1.1736847162246704, |
| "learning_rate": 4.742972374326262e-06, |
| "loss": 0.0371, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.23166175024582103, |
| "grad_norm": 1.7209968566894531, |
| "learning_rate": 4.74145356130131e-06, |
| "loss": 0.0553, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.232055063913471, |
| "grad_norm": 1.392303228378296, |
| "learning_rate": 4.739930518697823e-06, |
| "loss": 0.0468, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.23244837758112094, |
| "grad_norm": 1.6198259592056274, |
| "learning_rate": 4.738403249389752e-06, |
| "loss": 0.0671, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.23284169124877088, |
| "grad_norm": 1.394888997077942, |
| "learning_rate": 4.736871756259023e-06, |
| "loss": 0.0851, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.23323500491642085, |
| "grad_norm": 1.2976491451263428, |
| "learning_rate": 4.7353360421955345e-06, |
| "loss": 0.0614, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2336283185840708, |
| "grad_norm": 1.2485517263412476, |
| "learning_rate": 4.733796110097148e-06, |
| "loss": 0.0429, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.23402163225172073, |
| "grad_norm": 2.0384671688079834, |
| "learning_rate": 4.732251962869685e-06, |
| "loss": 0.0549, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.2344149459193707, |
| "grad_norm": 2.514827251434326, |
| "learning_rate": 4.730703603426921e-06, |
| "loss": 0.0934, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.23480825958702065, |
| "grad_norm": 1.5746873617172241, |
| "learning_rate": 4.729151034690579e-06, |
| "loss": 0.0797, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2352015732546706, |
| "grad_norm": 1.458757996559143, |
| "learning_rate": 4.727594259590326e-06, |
| "loss": 0.07, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.23559488692232056, |
| "grad_norm": 1.9289155006408691, |
| "learning_rate": 4.726033281063766e-06, |
| "loss": 0.0447, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2359882005899705, |
| "grad_norm": 2.641873359680176, |
| "learning_rate": 4.724468102056434e-06, |
| "loss": 0.1165, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.23638151425762044, |
| "grad_norm": 0.6296206116676331, |
| "learning_rate": 4.722898725521793e-06, |
| "loss": 0.0597, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2367748279252704, |
| "grad_norm": 1.7393361330032349, |
| "learning_rate": 4.721325154421224e-06, |
| "loss": 0.0508, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.23716814159292035, |
| "grad_norm": 1.639045000076294, |
| "learning_rate": 4.7197473917240255e-06, |
| "loss": 0.0433, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2375614552605703, |
| "grad_norm": 1.4411070346832275, |
| "learning_rate": 4.718165440407404e-06, |
| "loss": 0.0626, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.23795476892822026, |
| "grad_norm": 1.7141265869140625, |
| "learning_rate": 4.716579303456471e-06, |
| "loss": 0.0641, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2383480825958702, |
| "grad_norm": 1.1153072118759155, |
| "learning_rate": 4.714988983864235e-06, |
| "loss": 0.0524, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.23874139626352014, |
| "grad_norm": 0.6169893741607666, |
| "learning_rate": 4.713394484631598e-06, |
| "loss": 0.0485, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.23913470993117011, |
| "grad_norm": 2.24593186378479, |
| "learning_rate": 4.711795808767348e-06, |
| "loss": 0.0767, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.23952802359882006, |
| "grad_norm": 0.8726077675819397, |
| "learning_rate": 4.7101929592881545e-06, |
| "loss": 0.0506, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.23992133726647, |
| "grad_norm": 1.0482176542282104, |
| "learning_rate": 4.708585939218564e-06, |
| "loss": 0.0374, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24031465093411997, |
| "grad_norm": 1.031867265701294, |
| "learning_rate": 4.7069747515909905e-06, |
| "loss": 0.0513, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.2407079646017699, |
| "grad_norm": 1.548361897468567, |
| "learning_rate": 4.7053593994457135e-06, |
| "loss": 0.0524, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.24110127826941985, |
| "grad_norm": 2.367420196533203, |
| "learning_rate": 4.70373988583087e-06, |
| "loss": 0.0915, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.24149459193706982, |
| "grad_norm": 1.440256953239441, |
| "learning_rate": 4.7021162138024524e-06, |
| "loss": 0.0829, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.24188790560471976, |
| "grad_norm": 1.6830074787139893, |
| "learning_rate": 4.700488386424294e-06, |
| "loss": 0.0706, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2422812192723697, |
| "grad_norm": 2.811821699142456, |
| "learning_rate": 4.698856406768076e-06, |
| "loss": 0.0531, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.24267453294001967, |
| "grad_norm": 2.031094551086426, |
| "learning_rate": 4.697220277913311e-06, |
| "loss": 0.0751, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2430678466076696, |
| "grad_norm": 1.9269078969955444, |
| "learning_rate": 4.695580002947341e-06, |
| "loss": 0.0624, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.24346116027531955, |
| "grad_norm": 1.3828526735305786, |
| "learning_rate": 4.6939355849653325e-06, |
| "loss": 0.0776, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.24385447394296952, |
| "grad_norm": 1.0781844854354858, |
| "learning_rate": 4.69228702707027e-06, |
| "loss": 0.0477, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.24424778761061947, |
| "grad_norm": 1.0195046663284302, |
| "learning_rate": 4.69063433237295e-06, |
| "loss": 0.06, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2446411012782694, |
| "grad_norm": 0.6686704158782959, |
| "learning_rate": 4.688977503991975e-06, |
| "loss": 0.0713, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.24503441494591938, |
| "grad_norm": 1.7740367650985718, |
| "learning_rate": 4.687316545053746e-06, |
| "loss": 0.092, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.24542772861356932, |
| "grad_norm": 1.1935254335403442, |
| "learning_rate": 4.68565145869246e-06, |
| "loss": 0.0697, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.24582104228121926, |
| "grad_norm": 0.7092412710189819, |
| "learning_rate": 4.683982248050103e-06, |
| "loss": 0.0647, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.24621435594886923, |
| "grad_norm": 2.2962708473205566, |
| "learning_rate": 4.6823089162764425e-06, |
| "loss": 0.07, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.24660766961651917, |
| "grad_norm": 1.1462363004684448, |
| "learning_rate": 4.6806314665290205e-06, |
| "loss": 0.0519, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2470009832841691, |
| "grad_norm": 2.2198500633239746, |
| "learning_rate": 4.678949901973154e-06, |
| "loss": 0.0411, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.24739429695181908, |
| "grad_norm": 0.703561007976532, |
| "learning_rate": 4.677264225781921e-06, |
| "loss": 0.0505, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.24778761061946902, |
| "grad_norm": 1.4070128202438354, |
| "learning_rate": 4.6755744411361585e-06, |
| "loss": 0.0659, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24818092428711896, |
| "grad_norm": 0.9832798838615417, |
| "learning_rate": 4.6738805512244575e-06, |
| "loss": 0.0917, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.24857423795476893, |
| "grad_norm": 0.9056950807571411, |
| "learning_rate": 4.672182559243155e-06, |
| "loss": 0.0484, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.24896755162241888, |
| "grad_norm": 2.0713984966278076, |
| "learning_rate": 4.670480468396327e-06, |
| "loss": 0.0729, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.24936086529006882, |
| "grad_norm": 0.9963469505310059, |
| "learning_rate": 4.668774281895786e-06, |
| "loss": 0.0507, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2497541789577188, |
| "grad_norm": 0.9695498943328857, |
| "learning_rate": 4.667064002961073e-06, |
| "loss": 0.0538, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.25014749262536873, |
| "grad_norm": 1.3090274333953857, |
| "learning_rate": 4.66534963481945e-06, |
| "loss": 0.0931, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.25054080629301867, |
| "grad_norm": 1.2280491590499878, |
| "learning_rate": 4.663631180705894e-06, |
| "loss": 0.0488, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2509341199606686, |
| "grad_norm": 1.050603985786438, |
| "learning_rate": 4.661908643863096e-06, |
| "loss": 0.0723, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.2513274336283186, |
| "grad_norm": 1.2820688486099243, |
| "learning_rate": 4.66018202754145e-06, |
| "loss": 0.0854, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.25172074729596855, |
| "grad_norm": 0.9909592866897583, |
| "learning_rate": 4.658451334999043e-06, |
| "loss": 0.0613, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2521140609636185, |
| "grad_norm": 0.7117825746536255, |
| "learning_rate": 4.656716569501661e-06, |
| "loss": 0.0249, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.25250737463126843, |
| "grad_norm": 1.803819179534912, |
| "learning_rate": 4.654977734322772e-06, |
| "loss": 0.0744, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.2529006882989184, |
| "grad_norm": 1.2123903036117554, |
| "learning_rate": 4.653234832743521e-06, |
| "loss": 0.0893, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2532940019665683, |
| "grad_norm": 1.3053680658340454, |
| "learning_rate": 4.651487868052731e-06, |
| "loss": 0.0794, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2536873156342183, |
| "grad_norm": 1.5112253427505493, |
| "learning_rate": 4.64973684354689e-06, |
| "loss": 0.1139, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.25408062930186825, |
| "grad_norm": 0.4444582164287567, |
| "learning_rate": 4.647981762530145e-06, |
| "loss": 0.031, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2544739429695182, |
| "grad_norm": 0.863317608833313, |
| "learning_rate": 4.6462226283143e-06, |
| "loss": 0.0336, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.25486725663716814, |
| "grad_norm": 2.007761001586914, |
| "learning_rate": 4.644459444218807e-06, |
| "loss": 0.0531, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2552605703048181, |
| "grad_norm": 2.1189866065979004, |
| "learning_rate": 4.642692213570759e-06, |
| "loss": 0.0906, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.255653883972468, |
| "grad_norm": 0.7463569045066833, |
| "learning_rate": 4.640920939704885e-06, |
| "loss": 0.0449, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.256047197640118, |
| "grad_norm": 2.031602144241333, |
| "learning_rate": 4.639145625963544e-06, |
| "loss": 0.0673, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.25644051130776796, |
| "grad_norm": 2.0455472469329834, |
| "learning_rate": 4.637366275696718e-06, |
| "loss": 0.0495, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2568338249754179, |
| "grad_norm": 1.2602909803390503, |
| "learning_rate": 4.635582892262006e-06, |
| "loss": 0.0442, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.25722713864306784, |
| "grad_norm": 1.3121466636657715, |
| "learning_rate": 4.633795479024616e-06, |
| "loss": 0.0404, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.2576204523107178, |
| "grad_norm": 1.028448224067688, |
| "learning_rate": 4.632004039357364e-06, |
| "loss": 0.0497, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2580137659783677, |
| "grad_norm": 0.9586936235427856, |
| "learning_rate": 4.630208576640659e-06, |
| "loss": 0.0499, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2584070796460177, |
| "grad_norm": 1.3646454811096191, |
| "learning_rate": 4.628409094262504e-06, |
| "loss": 0.0383, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.25880039331366766, |
| "grad_norm": 1.6489843130111694, |
| "learning_rate": 4.6266055956184865e-06, |
| "loss": 0.0458, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.2591937069813176, |
| "grad_norm": 1.8696314096450806, |
| "learning_rate": 4.624798084111773e-06, |
| "loss": 0.0783, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.25958702064896755, |
| "grad_norm": 1.5261452198028564, |
| "learning_rate": 4.622986563153104e-06, |
| "loss": 0.0465, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2599803343166175, |
| "grad_norm": 1.8203606605529785, |
| "learning_rate": 4.621171036160781e-06, |
| "loss": 0.0767, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.26037364798426743, |
| "grad_norm": 1.3250322341918945, |
| "learning_rate": 4.6193515065606675e-06, |
| "loss": 0.0607, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.26076696165191743, |
| "grad_norm": 1.298017978668213, |
| "learning_rate": 4.617527977786182e-06, |
| "loss": 0.0619, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.26116027531956737, |
| "grad_norm": 1.0446304082870483, |
| "learning_rate": 4.615700453278285e-06, |
| "loss": 0.0268, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.2615535889872173, |
| "grad_norm": 1.0812922716140747, |
| "learning_rate": 4.61386893648548e-06, |
| "loss": 0.0519, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.26194690265486725, |
| "grad_norm": 1.8242236375808716, |
| "learning_rate": 4.612033430863804e-06, |
| "loss": 0.0565, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.2623402163225172, |
| "grad_norm": 1.567988634109497, |
| "learning_rate": 4.610193939876818e-06, |
| "loss": 0.0476, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.26273352999016714, |
| "grad_norm": 3.7344436645507812, |
| "learning_rate": 4.608350466995606e-06, |
| "loss": 0.0519, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.26312684365781713, |
| "grad_norm": 3.131584882736206, |
| "learning_rate": 4.606503015698765e-06, |
| "loss": 0.0696, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.2635201573254671, |
| "grad_norm": 1.2186100482940674, |
| "learning_rate": 4.6046515894723985e-06, |
| "loss": 0.0596, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.263913470993117, |
| "grad_norm": 0.8804354667663574, |
| "learning_rate": 4.602796191810113e-06, |
| "loss": 0.0465, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.26430678466076696, |
| "grad_norm": 1.961540937423706, |
| "learning_rate": 4.600936826213004e-06, |
| "loss": 0.0756, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.2647000983284169, |
| "grad_norm": 0.739213764667511, |
| "learning_rate": 4.59907349618966e-06, |
| "loss": 0.0475, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.26509341199606684, |
| "grad_norm": 0.8394540548324585, |
| "learning_rate": 4.597206205256147e-06, |
| "loss": 0.0538, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 1.5452135801315308, |
| "learning_rate": 4.595334956936007e-06, |
| "loss": 0.0664, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.2658800393313668, |
| "grad_norm": 1.613324522972107, |
| "learning_rate": 4.593459754760248e-06, |
| "loss": 0.0673, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.2662733529990167, |
| "grad_norm": 1.4427350759506226, |
| "learning_rate": 4.591580602267338e-06, |
| "loss": 0.0509, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 1.7156988382339478, |
| "learning_rate": 4.589697503003203e-06, |
| "loss": 0.0601, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2670599803343166, |
| "grad_norm": 1.4072953462600708, |
| "learning_rate": 4.587810460521213e-06, |
| "loss": 0.0678, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.26745329400196655, |
| "grad_norm": 0.7101967930793762, |
| "learning_rate": 4.585919478382178e-06, |
| "loss": 0.0522, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.26784660766961654, |
| "grad_norm": 0.5038359761238098, |
| "learning_rate": 4.584024560154348e-06, |
| "loss": 0.0408, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.2682399213372665, |
| "grad_norm": 1.1651291847229004, |
| "learning_rate": 4.582125709413392e-06, |
| "loss": 0.0719, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.2686332350049164, |
| "grad_norm": 1.0390863418579102, |
| "learning_rate": 4.580222929742407e-06, |
| "loss": 0.0402, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.26902654867256637, |
| "grad_norm": 1.8808722496032715, |
| "learning_rate": 4.5783162247318986e-06, |
| "loss": 0.0612, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.2694198623402163, |
| "grad_norm": 1.4362890720367432, |
| "learning_rate": 4.576405597979782e-06, |
| "loss": 0.0367, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.26981317600786625, |
| "grad_norm": 0.9547756910324097, |
| "learning_rate": 4.5744910530913725e-06, |
| "loss": 0.0799, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.27020648967551625, |
| "grad_norm": 1.8914170265197754, |
| "learning_rate": 4.572572593679379e-06, |
| "loss": 0.048, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.2705998033431662, |
| "grad_norm": 1.460436224937439, |
| "learning_rate": 4.5706502233638935e-06, |
| "loss": 0.0633, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.27099311701081613, |
| "grad_norm": 1.7330501079559326, |
| "learning_rate": 4.568723945772394e-06, |
| "loss": 0.0332, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2713864306784661, |
| "grad_norm": 1.1326316595077515, |
| "learning_rate": 4.5667937645397276e-06, |
| "loss": 0.0555, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.271779744346116, |
| "grad_norm": 0.8753216862678528, |
| "learning_rate": 4.564859683308107e-06, |
| "loss": 0.0416, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.27217305801376596, |
| "grad_norm": 0.8659785389900208, |
| "learning_rate": 4.562921705727106e-06, |
| "loss": 0.0551, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.27256637168141595, |
| "grad_norm": 0.502169668674469, |
| "learning_rate": 4.5609798354536495e-06, |
| "loss": 0.0284, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.2729596853490659, |
| "grad_norm": 2.1083321571350098, |
| "learning_rate": 4.559034076152009e-06, |
| "loss": 0.0779, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.27335299901671584, |
| "grad_norm": 1.5410869121551514, |
| "learning_rate": 4.557084431493793e-06, |
| "loss": 0.0788, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.2737463126843658, |
| "grad_norm": 1.707189679145813, |
| "learning_rate": 4.555130905157943e-06, |
| "loss": 0.0921, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.2741396263520157, |
| "grad_norm": 1.2371059656143188, |
| "learning_rate": 4.553173500830724e-06, |
| "loss": 0.0562, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.27453294001966566, |
| "grad_norm": 1.6234147548675537, |
| "learning_rate": 4.55121222220572e-06, |
| "loss": 0.0471, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.27492625368731566, |
| "grad_norm": 1.2629426717758179, |
| "learning_rate": 4.549247072983825e-06, |
| "loss": 0.0795, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.2753195673549656, |
| "grad_norm": 1.7955608367919922, |
| "learning_rate": 4.5472780568732356e-06, |
| "loss": 0.0468, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.27571288102261554, |
| "grad_norm": 7.252640724182129, |
| "learning_rate": 4.545305177589448e-06, |
| "loss": 0.0699, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.2761061946902655, |
| "grad_norm": 1.8121711015701294, |
| "learning_rate": 4.5433284388552435e-06, |
| "loss": 0.0718, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.2764995083579154, |
| "grad_norm": 0.901907742023468, |
| "learning_rate": 4.541347844400692e-06, |
| "loss": 0.0255, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.27689282202556537, |
| "grad_norm": 0.7126281261444092, |
| "learning_rate": 4.539363397963134e-06, |
| "loss": 0.0509, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.27728613569321536, |
| "grad_norm": 2.012707233428955, |
| "learning_rate": 4.537375103287183e-06, |
| "loss": 0.0904, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2776794493608653, |
| "grad_norm": 1.7197178602218628, |
| "learning_rate": 4.53538296412471e-06, |
| "loss": 0.0617, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.27807276302851525, |
| "grad_norm": 2.5714545249938965, |
| "learning_rate": 4.533386984234841e-06, |
| "loss": 0.0825, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.2784660766961652, |
| "grad_norm": 1.3491824865341187, |
| "learning_rate": 4.5313871673839525e-06, |
| "loss": 0.0545, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.27885939036381513, |
| "grad_norm": 1.0081161260604858, |
| "learning_rate": 4.52938351734566e-06, |
| "loss": 0.046, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.27925270403146507, |
| "grad_norm": 1.3097039461135864, |
| "learning_rate": 4.52737603790081e-06, |
| "loss": 0.0678, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.27964601769911507, |
| "grad_norm": 1.264832615852356, |
| "learning_rate": 4.525364732837476e-06, |
| "loss": 0.0408, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.280039331366765, |
| "grad_norm": 1.6724627017974854, |
| "learning_rate": 4.523349605950953e-06, |
| "loss": 0.0583, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.28043264503441495, |
| "grad_norm": 1.2600414752960205, |
| "learning_rate": 4.521330661043744e-06, |
| "loss": 0.0762, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2808259587020649, |
| "grad_norm": 0.8454362750053406, |
| "learning_rate": 4.519307901925558e-06, |
| "loss": 0.0433, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.28121927236971483, |
| "grad_norm": 2.131969451904297, |
| "learning_rate": 4.517281332413302e-06, |
| "loss": 0.0738, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.2816125860373648, |
| "grad_norm": 2.226288080215454, |
| "learning_rate": 4.515250956331072e-06, |
| "loss": 0.0892, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.2820058997050148, |
| "grad_norm": 1.6737391948699951, |
| "learning_rate": 4.513216777510149e-06, |
| "loss": 0.0556, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2823992133726647, |
| "grad_norm": 1.5575467348098755, |
| "learning_rate": 4.511178799788987e-06, |
| "loss": 0.0561, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.28279252704031466, |
| "grad_norm": 1.7405011653900146, |
| "learning_rate": 4.50913702701321e-06, |
| "loss": 0.0653, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 1.097738265991211, |
| "learning_rate": 4.507091463035601e-06, |
| "loss": 0.0772, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.28357915437561454, |
| "grad_norm": 0.8409376740455627, |
| "learning_rate": 4.505042111716103e-06, |
| "loss": 0.0645, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2839724680432645, |
| "grad_norm": 1.1851140260696411, |
| "learning_rate": 4.502988976921797e-06, |
| "loss": 0.0462, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2843657817109145, |
| "grad_norm": 1.7740516662597656, |
| "learning_rate": 4.50093206252691e-06, |
| "loss": 0.0717, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2847590953785644, |
| "grad_norm": 2.491065263748169, |
| "learning_rate": 4.498871372412798e-06, |
| "loss": 0.0575, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.28515240904621436, |
| "grad_norm": 1.446291446685791, |
| "learning_rate": 4.496806910467944e-06, |
| "loss": 0.0566, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2855457227138643, |
| "grad_norm": 1.2584576606750488, |
| "learning_rate": 4.494738680587946e-06, |
| "loss": 0.053, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.28593903638151424, |
| "grad_norm": 1.188159704208374, |
| "learning_rate": 4.492666686675511e-06, |
| "loss": 0.0627, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.2863323500491642, |
| "grad_norm": 1.2687791585922241, |
| "learning_rate": 4.490590932640453e-06, |
| "loss": 0.0676, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2867256637168142, |
| "grad_norm": 1.7722615003585815, |
| "learning_rate": 4.488511422399677e-06, |
| "loss": 0.0548, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.2871189773844641, |
| "grad_norm": 3.2244741916656494, |
| "learning_rate": 4.48642815987718e-06, |
| "loss": 0.0763, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.28751229105211407, |
| "grad_norm": 1.1106655597686768, |
| "learning_rate": 4.484341149004035e-06, |
| "loss": 0.0862, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.287905604719764, |
| "grad_norm": 0.6258023381233215, |
| "learning_rate": 4.482250393718392e-06, |
| "loss": 0.0526, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.28829891838741395, |
| "grad_norm": 0.7904531955718994, |
| "learning_rate": 4.480155897965463e-06, |
| "loss": 0.0367, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.2886922320550639, |
| "grad_norm": 1.5454163551330566, |
| "learning_rate": 4.47805766569752e-06, |
| "loss": 0.0747, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2890855457227139, |
| "grad_norm": 2.1076667308807373, |
| "learning_rate": 4.475955700873888e-06, |
| "loss": 0.0939, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.28947885939036383, |
| "grad_norm": 1.407893419265747, |
| "learning_rate": 4.473850007460932e-06, |
| "loss": 0.0524, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.28987217305801377, |
| "grad_norm": 1.957629680633545, |
| "learning_rate": 4.471740589432053e-06, |
| "loss": 0.0541, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.2902654867256637, |
| "grad_norm": 1.0253725051879883, |
| "learning_rate": 4.469627450767682e-06, |
| "loss": 0.0478, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.29065880039331365, |
| "grad_norm": 1.5762360095977783, |
| "learning_rate": 4.46751059545527e-06, |
| "loss": 0.0936, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2910521140609636, |
| "grad_norm": 1.2460707426071167, |
| "learning_rate": 4.465390027489279e-06, |
| "loss": 0.0596, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2914454277286136, |
| "grad_norm": 1.042962670326233, |
| "learning_rate": 4.463265750871182e-06, |
| "loss": 0.0615, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.29183874139626353, |
| "grad_norm": 1.554513692855835, |
| "learning_rate": 4.461137769609445e-06, |
| "loss": 0.0562, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2922320550639135, |
| "grad_norm": 1.5099841356277466, |
| "learning_rate": 4.459006087719527e-06, |
| "loss": 0.0462, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.2926253687315634, |
| "grad_norm": 0.8272073864936829, |
| "learning_rate": 4.45687070922387e-06, |
| "loss": 0.0311, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.29301868239921336, |
| "grad_norm": 1.1962639093399048, |
| "learning_rate": 4.4547316381518905e-06, |
| "loss": 0.054, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2934119960668633, |
| "grad_norm": 0.7265387773513794, |
| "learning_rate": 4.4525888785399725e-06, |
| "loss": 0.0322, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2938053097345133, |
| "grad_norm": 2.045783042907715, |
| "learning_rate": 4.450442434431463e-06, |
| "loss": 0.0668, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.29419862340216324, |
| "grad_norm": 1.417593240737915, |
| "learning_rate": 4.448292309876657e-06, |
| "loss": 0.0499, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2945919370698132, |
| "grad_norm": 1.4235261678695679, |
| "learning_rate": 4.4461385089328e-06, |
| "loss": 0.0904, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.2949852507374631, |
| "grad_norm": 1.050933837890625, |
| "learning_rate": 4.44398103566407e-06, |
| "loss": 0.05, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.29537856440511306, |
| "grad_norm": 1.3113094568252563, |
| "learning_rate": 4.4418198941415756e-06, |
| "loss": 0.0717, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.295771878072763, |
| "grad_norm": 1.1153532266616821, |
| "learning_rate": 4.4396550884433495e-06, |
| "loss": 0.0613, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.296165191740413, |
| "grad_norm": 1.6574000120162964, |
| "learning_rate": 4.437486622654337e-06, |
| "loss": 0.08, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.29655850540806294, |
| "grad_norm": 1.037023901939392, |
| "learning_rate": 4.43531450086639e-06, |
| "loss": 0.059, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.2969518190757129, |
| "grad_norm": 1.3382397890090942, |
| "learning_rate": 4.433138727178259e-06, |
| "loss": 0.0504, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2973451327433628, |
| "grad_norm": 2.023531198501587, |
| "learning_rate": 4.4309593056955865e-06, |
| "loss": 0.0682, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.29773844641101277, |
| "grad_norm": 1.3962974548339844, |
| "learning_rate": 4.4287762405308974e-06, |
| "loss": 0.0678, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2981317600786627, |
| "grad_norm": 0.6099796295166016, |
| "learning_rate": 4.426589535803593e-06, |
| "loss": 0.0496, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2985250737463127, |
| "grad_norm": 1.6071325540542603, |
| "learning_rate": 4.424399195639941e-06, |
| "loss": 0.0519, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.29891838741396265, |
| "grad_norm": 1.116490125656128, |
| "learning_rate": 4.422205224173071e-06, |
| "loss": 0.0651, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2993117010816126, |
| "grad_norm": 1.163526177406311, |
| "learning_rate": 4.420007625542963e-06, |
| "loss": 0.042, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.29970501474926253, |
| "grad_norm": 0.6789044737815857, |
| "learning_rate": 4.417806403896442e-06, |
| "loss": 0.0652, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3000983284169125, |
| "grad_norm": 1.6137206554412842, |
| "learning_rate": 4.41560156338717e-06, |
| "loss": 0.073, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3004916420845624, |
| "grad_norm": 1.9308634996414185, |
| "learning_rate": 4.413393108175637e-06, |
| "loss": 0.0805, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.3008849557522124, |
| "grad_norm": 1.6792504787445068, |
| "learning_rate": 4.411181042429156e-06, |
| "loss": 0.0471, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.30127826941986235, |
| "grad_norm": 1.1271363496780396, |
| "learning_rate": 4.40896537032185e-06, |
| "loss": 0.0378, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3016715830875123, |
| "grad_norm": 1.0671911239624023, |
| "learning_rate": 4.406746096034647e-06, |
| "loss": 0.0548, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.30206489675516224, |
| "grad_norm": 1.2227768898010254, |
| "learning_rate": 4.4045232237552756e-06, |
| "loss": 0.0701, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3024582104228122, |
| "grad_norm": 1.471924901008606, |
| "learning_rate": 4.4022967576782525e-06, |
| "loss": 0.0568, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3028515240904621, |
| "grad_norm": 1.6219385862350464, |
| "learning_rate": 4.400066702004874e-06, |
| "loss": 0.05, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3032448377581121, |
| "grad_norm": 1.4471542835235596, |
| "learning_rate": 4.39783306094321e-06, |
| "loss": 0.0685, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.30363815142576206, |
| "grad_norm": 1.525600552558899, |
| "learning_rate": 4.395595838708099e-06, |
| "loss": 0.0513, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.304031465093412, |
| "grad_norm": 1.3881157636642456, |
| "learning_rate": 4.393355039521134e-06, |
| "loss": 0.0812, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.30442477876106194, |
| "grad_norm": 1.1738461256027222, |
| "learning_rate": 4.391110667610658e-06, |
| "loss": 0.0595, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3048180924287119, |
| "grad_norm": 1.1576417684555054, |
| "learning_rate": 4.388862727211759e-06, |
| "loss": 0.0541, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3052114060963618, |
| "grad_norm": 1.283400058746338, |
| "learning_rate": 4.386611222566254e-06, |
| "loss": 0.0505, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3056047197640118, |
| "grad_norm": 1.4386646747589111, |
| "learning_rate": 4.384356157922688e-06, |
| "loss": 0.0706, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.30599803343166176, |
| "grad_norm": 2.0160024166107178, |
| "learning_rate": 4.382097537536322e-06, |
| "loss": 0.0596, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3063913470993117, |
| "grad_norm": 1.3747514486312866, |
| "learning_rate": 4.379835365669132e-06, |
| "loss": 0.0561, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.30678466076696165, |
| "grad_norm": 1.5668084621429443, |
| "learning_rate": 4.377569646589789e-06, |
| "loss": 0.0522, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3071779744346116, |
| "grad_norm": 1.6369160413742065, |
| "learning_rate": 4.375300384573659e-06, |
| "loss": 0.05, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.30757128810226153, |
| "grad_norm": 1.2633172273635864, |
| "learning_rate": 4.373027583902796e-06, |
| "loss": 0.0447, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.30796460176991153, |
| "grad_norm": 1.3119875192642212, |
| "learning_rate": 4.370751248865929e-06, |
| "loss": 0.062, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.30835791543756147, |
| "grad_norm": 2.1404073238372803, |
| "learning_rate": 4.368471383758459e-06, |
| "loss": 0.0446, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3087512291052114, |
| "grad_norm": 0.7563901543617249, |
| "learning_rate": 4.366187992882444e-06, |
| "loss": 0.0429, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.30914454277286135, |
| "grad_norm": 0.7048685550689697, |
| "learning_rate": 4.3639010805466e-06, |
| "loss": 0.0299, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3095378564405113, |
| "grad_norm": 0.7395270466804504, |
| "learning_rate": 4.361610651066283e-06, |
| "loss": 0.0334, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.30993117010816124, |
| "grad_norm": 1.2910830974578857, |
| "learning_rate": 4.35931670876349e-06, |
| "loss": 0.0666, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.31032448377581123, |
| "grad_norm": 3.32393217086792, |
| "learning_rate": 4.357019257966844e-06, |
| "loss": 0.0773, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3107177974434612, |
| "grad_norm": 1.2098692655563354, |
| "learning_rate": 4.354718303011588e-06, |
| "loss": 0.0524, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 1.650527834892273, |
| "learning_rate": 4.352413848239579e-06, |
| "loss": 0.0518, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.31150442477876106, |
| "grad_norm": 0.8377374410629272, |
| "learning_rate": 4.35010589799928e-06, |
| "loss": 0.0482, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.311897738446411, |
| "grad_norm": 1.225882649421692, |
| "learning_rate": 4.347794456645744e-06, |
| "loss": 0.0405, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.31229105211406094, |
| "grad_norm": 2.0014147758483887, |
| "learning_rate": 4.345479528540618e-06, |
| "loss": 0.053, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.31268436578171094, |
| "grad_norm": 1.2061558961868286, |
| "learning_rate": 4.343161118052123e-06, |
| "loss": 0.045, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3130776794493609, |
| "grad_norm": 0.8555061221122742, |
| "learning_rate": 4.340839229555056e-06, |
| "loss": 0.0673, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3134709931170108, |
| "grad_norm": 1.4630858898162842, |
| "learning_rate": 4.338513867430773e-06, |
| "loss": 0.0414, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.31386430678466076, |
| "grad_norm": 1.101480484008789, |
| "learning_rate": 4.336185036067187e-06, |
| "loss": 0.0383, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3142576204523107, |
| "grad_norm": 0.6861633658409119, |
| "learning_rate": 4.3338527398587575e-06, |
| "loss": 0.0393, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.31465093411996065, |
| "grad_norm": 1.0716795921325684, |
| "learning_rate": 4.33151698320648e-06, |
| "loss": 0.0407, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31504424778761064, |
| "grad_norm": 1.0103176832199097, |
| "learning_rate": 4.329177770517881e-06, |
| "loss": 0.0467, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3154375614552606, |
| "grad_norm": 1.1415047645568848, |
| "learning_rate": 4.32683510620701e-06, |
| "loss": 0.0518, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3158308751229105, |
| "grad_norm": 1.0959949493408203, |
| "learning_rate": 4.324488994694427e-06, |
| "loss": 0.0447, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.31622418879056047, |
| "grad_norm": 3.7971184253692627, |
| "learning_rate": 4.322139440407198e-06, |
| "loss": 0.1218, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3166175024582104, |
| "grad_norm": 1.0682744979858398, |
| "learning_rate": 4.319786447778887e-06, |
| "loss": 0.0271, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.31701081612586035, |
| "grad_norm": 0.7397903800010681, |
| "learning_rate": 4.317430021249543e-06, |
| "loss": 0.0313, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.31740412979351035, |
| "grad_norm": 1.9803013801574707, |
| "learning_rate": 4.315070165265695e-06, |
| "loss": 0.0832, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.3177974434611603, |
| "grad_norm": 0.9591525793075562, |
| "learning_rate": 4.312706884280349e-06, |
| "loss": 0.0611, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.31819075712881023, |
| "grad_norm": 0.7980911731719971, |
| "learning_rate": 4.310340182752965e-06, |
| "loss": 0.0163, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3185840707964602, |
| "grad_norm": 0.8986029028892517, |
| "learning_rate": 4.307970065149464e-06, |
| "loss": 0.0382, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3189773844641101, |
| "grad_norm": 0.9218258857727051, |
| "learning_rate": 4.305596535942211e-06, |
| "loss": 0.0362, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.31937069813176006, |
| "grad_norm": 1.9387575387954712, |
| "learning_rate": 4.303219599610009e-06, |
| "loss": 0.045, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.31976401179941005, |
| "grad_norm": 2.1032979488372803, |
| "learning_rate": 4.300839260638089e-06, |
| "loss": 0.0583, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.32015732546706, |
| "grad_norm": 0.8777870535850525, |
| "learning_rate": 4.298455523518102e-06, |
| "loss": 0.0611, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.32055063913470994, |
| "grad_norm": 1.7572643756866455, |
| "learning_rate": 4.296068392748116e-06, |
| "loss": 0.053, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3209439528023599, |
| "grad_norm": 1.3729215860366821, |
| "learning_rate": 4.293677872832599e-06, |
| "loss": 0.1014, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.3213372664700098, |
| "grad_norm": 2.968247175216675, |
| "learning_rate": 4.291283968282413e-06, |
| "loss": 0.0422, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.32173058013765976, |
| "grad_norm": 1.2367733716964722, |
| "learning_rate": 4.288886683614809e-06, |
| "loss": 0.0598, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.32212389380530976, |
| "grad_norm": 2.149622678756714, |
| "learning_rate": 4.286486023353417e-06, |
| "loss": 0.0834, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3225172074729597, |
| "grad_norm": 2.1104652881622314, |
| "learning_rate": 4.284081992028235e-06, |
| "loss": 0.0764, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.32291052114060964, |
| "grad_norm": 1.5311528444290161, |
| "learning_rate": 4.281674594175621e-06, |
| "loss": 0.0586, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3233038348082596, |
| "grad_norm": 1.432000756263733, |
| "learning_rate": 4.2792638343382894e-06, |
| "loss": 0.0787, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.3236971484759095, |
| "grad_norm": 1.2007765769958496, |
| "learning_rate": 4.276849717065295e-06, |
| "loss": 0.0462, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.32409046214355947, |
| "grad_norm": 1.0811890363693237, |
| "learning_rate": 4.2744322469120296e-06, |
| "loss": 0.0624, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.32448377581120946, |
| "grad_norm": 1.440487265586853, |
| "learning_rate": 4.272011428440212e-06, |
| "loss": 0.0557, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3248770894788594, |
| "grad_norm": 2.677267551422119, |
| "learning_rate": 4.269587266217878e-06, |
| "loss": 0.0804, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.32527040314650935, |
| "grad_norm": 1.07245671749115, |
| "learning_rate": 4.2671597648193745e-06, |
| "loss": 0.0542, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3256637168141593, |
| "grad_norm": 1.0649880170822144, |
| "learning_rate": 4.264728928825347e-06, |
| "loss": 0.0573, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.32605703048180923, |
| "grad_norm": 1.880872130393982, |
| "learning_rate": 4.262294762822738e-06, |
| "loss": 0.0892, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.32645034414945917, |
| "grad_norm": 1.7007864713668823, |
| "learning_rate": 4.259857271404767e-06, |
| "loss": 0.097, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.32684365781710917, |
| "grad_norm": 0.9796857237815857, |
| "learning_rate": 4.257416459170935e-06, |
| "loss": 0.0372, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3272369714847591, |
| "grad_norm": 1.3802924156188965, |
| "learning_rate": 4.254972330727004e-06, |
| "loss": 0.0388, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.32763028515240905, |
| "grad_norm": 1.8189585208892822, |
| "learning_rate": 4.252524890685e-06, |
| "loss": 0.0504, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.328023598820059, |
| "grad_norm": 1.2440087795257568, |
| "learning_rate": 4.250074143663189e-06, |
| "loss": 0.055, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.32841691248770893, |
| "grad_norm": 1.26856529712677, |
| "learning_rate": 4.247620094286085e-06, |
| "loss": 0.0528, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3288102261553589, |
| "grad_norm": 1.8983615636825562, |
| "learning_rate": 4.2451627471844305e-06, |
| "loss": 0.0527, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3292035398230089, |
| "grad_norm": 0.9810947179794312, |
| "learning_rate": 4.24270210699519e-06, |
| "loss": 0.04, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3295968534906588, |
| "grad_norm": 1.2199605703353882, |
| "learning_rate": 4.240238178361543e-06, |
| "loss": 0.0443, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.32999016715830876, |
| "grad_norm": 0.5256842374801636, |
| "learning_rate": 4.237770965932875e-06, |
| "loss": 0.0267, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3303834808259587, |
| "grad_norm": 1.456432819366455, |
| "learning_rate": 4.235300474364766e-06, |
| "loss": 0.0623, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.33077679449360864, |
| "grad_norm": 1.4406569004058838, |
| "learning_rate": 4.232826708318985e-06, |
| "loss": 0.0453, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3311701081612586, |
| "grad_norm": 1.9302328824996948, |
| "learning_rate": 4.230349672463481e-06, |
| "loss": 0.0655, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3315634218289086, |
| "grad_norm": 0.7055051922798157, |
| "learning_rate": 4.22786937147237e-06, |
| "loss": 0.0405, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3319567354965585, |
| "grad_norm": 2.823591947555542, |
| "learning_rate": 4.2253858100259304e-06, |
| "loss": 0.1111, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.33235004916420846, |
| "grad_norm": 1.458694577217102, |
| "learning_rate": 4.222898992810596e-06, |
| "loss": 0.0688, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3327433628318584, |
| "grad_norm": 1.3440479040145874, |
| "learning_rate": 4.220408924518939e-06, |
| "loss": 0.0654, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.33313667649950834, |
| "grad_norm": 1.2197304964065552, |
| "learning_rate": 4.217915609849671e-06, |
| "loss": 0.0269, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.3335299901671583, |
| "grad_norm": 1.0218877792358398, |
| "learning_rate": 4.215419053507626e-06, |
| "loss": 0.0525, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3339233038348083, |
| "grad_norm": 1.4025174379348755, |
| "learning_rate": 4.212919260203757e-06, |
| "loss": 0.0947, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3343166175024582, |
| "grad_norm": 0.7898326516151428, |
| "learning_rate": 4.210416234655125e-06, |
| "loss": 0.0337, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.33470993117010817, |
| "grad_norm": 1.196540355682373, |
| "learning_rate": 4.207909981584889e-06, |
| "loss": 0.0578, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.3351032448377581, |
| "grad_norm": 0.926796555519104, |
| "learning_rate": 4.2054005057223e-06, |
| "loss": 0.0672, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.33549655850540805, |
| "grad_norm": 1.2736568450927734, |
| "learning_rate": 4.202887811802687e-06, |
| "loss": 0.0484, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.335889872173058, |
| "grad_norm": 1.2440752983093262, |
| "learning_rate": 4.200371904567457e-06, |
| "loss": 0.0478, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.336283185840708, |
| "grad_norm": 1.4759784936904907, |
| "learning_rate": 4.197852788764075e-06, |
| "loss": 0.0458, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.33667649950835793, |
| "grad_norm": 0.7424830794334412, |
| "learning_rate": 4.195330469146063e-06, |
| "loss": 0.0327, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.33706981317600787, |
| "grad_norm": 1.2250968217849731, |
| "learning_rate": 4.1928049504729886e-06, |
| "loss": 0.0637, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3374631268436578, |
| "grad_norm": 1.2263579368591309, |
| "learning_rate": 4.1902762375104555e-06, |
| "loss": 0.0733, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.33785644051130775, |
| "grad_norm": 0.5867930054664612, |
| "learning_rate": 4.187744335030095e-06, |
| "loss": 0.055, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.3382497541789577, |
| "grad_norm": 2.040759563446045, |
| "learning_rate": 4.185209247809557e-06, |
| "loss": 0.0664, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3386430678466077, |
| "grad_norm": 2.09037709236145, |
| "learning_rate": 4.182670980632501e-06, |
| "loss": 0.0728, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.33903638151425763, |
| "grad_norm": 3.822634220123291, |
| "learning_rate": 4.180129538288587e-06, |
| "loss": 0.0912, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.3394296951819076, |
| "grad_norm": 1.7590773105621338, |
| "learning_rate": 4.177584925573466e-06, |
| "loss": 0.0623, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.3398230088495575, |
| "grad_norm": 1.2151440382003784, |
| "learning_rate": 4.175037147288772e-06, |
| "loss": 0.044, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.34021632251720746, |
| "grad_norm": 0.765602171421051, |
| "learning_rate": 4.172486208242113e-06, |
| "loss": 0.0811, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3406096361848574, |
| "grad_norm": 0.9690750241279602, |
| "learning_rate": 4.169932113247059e-06, |
| "loss": 0.0587, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.3410029498525074, |
| "grad_norm": 0.6641612648963928, |
| "learning_rate": 4.167374867123138e-06, |
| "loss": 0.0336, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.34139626352015734, |
| "grad_norm": 0.9194386601448059, |
| "learning_rate": 4.164814474695823e-06, |
| "loss": 0.0566, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3417895771878073, |
| "grad_norm": 2.2128334045410156, |
| "learning_rate": 4.162250940796523e-06, |
| "loss": 0.074, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3421828908554572, |
| "grad_norm": 1.8464068174362183, |
| "learning_rate": 4.159684270262576e-06, |
| "loss": 0.0736, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.34257620452310716, |
| "grad_norm": 0.9694234728813171, |
| "learning_rate": 4.157114467937239e-06, |
| "loss": 0.0413, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.3429695181907571, |
| "grad_norm": 1.4554444551467896, |
| "learning_rate": 4.154541538669677e-06, |
| "loss": 0.0468, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.3433628318584071, |
| "grad_norm": 1.3524583578109741, |
| "learning_rate": 4.151965487314959e-06, |
| "loss": 0.049, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.34375614552605704, |
| "grad_norm": 1.6620694398880005, |
| "learning_rate": 4.1493863187340415e-06, |
| "loss": 0.0686, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.344149459193707, |
| "grad_norm": 0.8126603364944458, |
| "learning_rate": 4.146804037793763e-06, |
| "loss": 0.0335, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3445427728613569, |
| "grad_norm": 1.852401852607727, |
| "learning_rate": 4.144218649366839e-06, |
| "loss": 0.0488, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.34493608652900687, |
| "grad_norm": 1.165703296661377, |
| "learning_rate": 4.141630158331845e-06, |
| "loss": 0.0464, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3453294001966568, |
| "grad_norm": 2.391685962677002, |
| "learning_rate": 4.139038569573213e-06, |
| "loss": 0.0829, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.3457227138643068, |
| "grad_norm": 1.832273006439209, |
| "learning_rate": 4.1364438879812194e-06, |
| "loss": 0.0406, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.34611602753195675, |
| "grad_norm": 1.1527806520462036, |
| "learning_rate": 4.1338461184519776e-06, |
| "loss": 0.0682, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3465093411996067, |
| "grad_norm": 1.8680974245071411, |
| "learning_rate": 4.131245265887426e-06, |
| "loss": 0.0847, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.34690265486725663, |
| "grad_norm": 1.7685651779174805, |
| "learning_rate": 4.1286413351953235e-06, |
| "loss": 0.0461, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3472959685349066, |
| "grad_norm": 2.0602667331695557, |
| "learning_rate": 4.126034331289235e-06, |
| "loss": 0.0992, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.3476892822025565, |
| "grad_norm": 1.4323168992996216, |
| "learning_rate": 4.123424259088525e-06, |
| "loss": 0.0992, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3480825958702065, |
| "grad_norm": 0.9091783165931702, |
| "learning_rate": 4.120811123518349e-06, |
| "loss": 0.0519, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.34847590953785645, |
| "grad_norm": 1.3111385107040405, |
| "learning_rate": 4.1181949295096415e-06, |
| "loss": 0.0811, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.3488692232055064, |
| "grad_norm": 2.218848705291748, |
| "learning_rate": 4.11557568199911e-06, |
| "loss": 0.0743, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.34926253687315634, |
| "grad_norm": 0.9991410970687866, |
| "learning_rate": 4.112953385929221e-06, |
| "loss": 0.0488, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3496558505408063, |
| "grad_norm": 1.4411261081695557, |
| "learning_rate": 4.110328046248196e-06, |
| "loss": 0.0704, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3500491642084562, |
| "grad_norm": 1.3707761764526367, |
| "learning_rate": 4.107699667909999e-06, |
| "loss": 0.0514, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3504424778761062, |
| "grad_norm": 1.438081979751587, |
| "learning_rate": 4.105068255874328e-06, |
| "loss": 0.0622, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.35083579154375616, |
| "grad_norm": 1.0999984741210938, |
| "learning_rate": 4.102433815106606e-06, |
| "loss": 0.0423, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3512291052114061, |
| "grad_norm": 1.6553218364715576, |
| "learning_rate": 4.09979635057797e-06, |
| "loss": 0.0621, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.35162241887905604, |
| "grad_norm": 2.6534736156463623, |
| "learning_rate": 4.097155867265264e-06, |
| "loss": 0.0956, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.352015732546706, |
| "grad_norm": 1.2164000272750854, |
| "learning_rate": 4.094512370151027e-06, |
| "loss": 0.064, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3524090462143559, |
| "grad_norm": 1.4759900569915771, |
| "learning_rate": 4.091865864223487e-06, |
| "loss": 0.0496, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3528023598820059, |
| "grad_norm": 1.3511669635772705, |
| "learning_rate": 4.089216354476545e-06, |
| "loss": 0.0662, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.35319567354965586, |
| "grad_norm": 1.4343103170394897, |
| "learning_rate": 4.086563845909779e-06, |
| "loss": 0.0543, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.3535889872173058, |
| "grad_norm": 0.5085878968238831, |
| "learning_rate": 4.083908343528415e-06, |
| "loss": 0.0457, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.35398230088495575, |
| "grad_norm": 0.9629530906677246, |
| "learning_rate": 4.081249852343336e-06, |
| "loss": 0.0422, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3543756145526057, |
| "grad_norm": 1.697277307510376, |
| "learning_rate": 4.078588377371062e-06, |
| "loss": 0.0583, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.35476892822025563, |
| "grad_norm": 1.2820713520050049, |
| "learning_rate": 4.075923923633745e-06, |
| "loss": 0.0621, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.3551622418879056, |
| "grad_norm": 0.9127804636955261, |
| "learning_rate": 4.073256496159153e-06, |
| "loss": 0.0616, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.4303189516067505, |
| "learning_rate": 4.070586099980672e-06, |
| "loss": 0.0556, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3559488692232055, |
| "grad_norm": 0.8110685348510742, |
| "learning_rate": 4.067912740137285e-06, |
| "loss": 0.0665, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.35634218289085545, |
| "grad_norm": 1.490004062652588, |
| "learning_rate": 4.06523642167357e-06, |
| "loss": 0.0771, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3567354965585054, |
| "grad_norm": 1.763295292854309, |
| "learning_rate": 4.062557149639688e-06, |
| "loss": 0.0824, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.35712881022615534, |
| "grad_norm": 2.5675792694091797, |
| "learning_rate": 4.059874929091369e-06, |
| "loss": 0.0886, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.35752212389380533, |
| "grad_norm": 1.442456841468811, |
| "learning_rate": 4.057189765089914e-06, |
| "loss": 0.0507, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3579154375614553, |
| "grad_norm": 1.2593395709991455, |
| "learning_rate": 4.054501662702172e-06, |
| "loss": 0.0555, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3583087512291052, |
| "grad_norm": 1.1391284465789795, |
| "learning_rate": 4.05181062700054e-06, |
| "loss": 0.058, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.35870206489675516, |
| "grad_norm": 0.7833881378173828, |
| "learning_rate": 4.049116663062949e-06, |
| "loss": 0.0588, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3590953785644051, |
| "grad_norm": 1.7920033931732178, |
| "learning_rate": 4.046419775972855e-06, |
| "loss": 0.1015, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.35948869223205504, |
| "grad_norm": 1.4693628549575806, |
| "learning_rate": 4.043719970819231e-06, |
| "loss": 0.0734, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.35988200589970504, |
| "grad_norm": 0.9692854285240173, |
| "learning_rate": 4.041017252696556e-06, |
| "loss": 0.0537, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.360275319567355, |
| "grad_norm": 0.9593791961669922, |
| "learning_rate": 4.038311626704806e-06, |
| "loss": 0.0599, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3606686332350049, |
| "grad_norm": 1.1619371175765991, |
| "learning_rate": 4.035603097949444e-06, |
| "loss": 0.0597, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.36106194690265486, |
| "grad_norm": 1.3384184837341309, |
| "learning_rate": 4.032891671541409e-06, |
| "loss": 0.0513, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3614552605703048, |
| "grad_norm": 0.7744063138961792, |
| "learning_rate": 4.030177352597109e-06, |
| "loss": 0.0428, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.36184857423795475, |
| "grad_norm": 1.1778054237365723, |
| "learning_rate": 4.027460146238411e-06, |
| "loss": 0.0733, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.36224188790560474, |
| "grad_norm": 1.161788821220398, |
| "learning_rate": 4.02474005759263e-06, |
| "loss": 0.0735, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3626352015732547, |
| "grad_norm": 2.0623209476470947, |
| "learning_rate": 4.022017091792518e-06, |
| "loss": 0.065, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3630285152409046, |
| "grad_norm": 1.3139375448226929, |
| "learning_rate": 4.01929125397626e-06, |
| "loss": 0.0582, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.36342182890855457, |
| "grad_norm": 2.0761849880218506, |
| "learning_rate": 4.016562549287455e-06, |
| "loss": 0.0557, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3638151425762045, |
| "grad_norm": 1.474522352218628, |
| "learning_rate": 4.013830982875117e-06, |
| "loss": 0.0665, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.36420845624385445, |
| "grad_norm": 1.7274634838104248, |
| "learning_rate": 4.0110965598936565e-06, |
| "loss": 0.0735, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.36460176991150445, |
| "grad_norm": 0.7064616084098816, |
| "learning_rate": 4.008359285502877e-06, |
| "loss": 0.0449, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3649950835791544, |
| "grad_norm": 0.8762916922569275, |
| "learning_rate": 4.005619164867959e-06, |
| "loss": 0.0582, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.36538839724680433, |
| "grad_norm": 1.2766094207763672, |
| "learning_rate": 4.002876203159458e-06, |
| "loss": 0.0467, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.36578171091445427, |
| "grad_norm": 1.4357662200927734, |
| "learning_rate": 4.000130405553287e-06, |
| "loss": 0.0676, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3661750245821042, |
| "grad_norm": 1.755672574043274, |
| "learning_rate": 3.997381777230714e-06, |
| "loss": 0.0647, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.36656833824975416, |
| "grad_norm": 0.9483436942100525, |
| "learning_rate": 3.994630323378344e-06, |
| "loss": 0.0601, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.36696165191740415, |
| "grad_norm": 1.6659551858901978, |
| "learning_rate": 3.991876049188116e-06, |
| "loss": 0.0738, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.3673549655850541, |
| "grad_norm": 1.5737981796264648, |
| "learning_rate": 3.989118959857293e-06, |
| "loss": 0.0483, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.36774827925270404, |
| "grad_norm": 1.5014865398406982, |
| "learning_rate": 3.986359060588446e-06, |
| "loss": 0.0458, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.368141592920354, |
| "grad_norm": 1.5164520740509033, |
| "learning_rate": 3.983596356589452e-06, |
| "loss": 0.0617, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3685349065880039, |
| "grad_norm": 2.2842421531677246, |
| "learning_rate": 3.980830853073476e-06, |
| "loss": 0.0816, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.36892822025565386, |
| "grad_norm": 1.5114701986312866, |
| "learning_rate": 3.978062555258972e-06, |
| "loss": 0.0355, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.36932153392330386, |
| "grad_norm": 1.2816709280014038, |
| "learning_rate": 3.975291468369661e-06, |
| "loss": 0.0556, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3697148475909538, |
| "grad_norm": 2.0237350463867188, |
| "learning_rate": 3.97251759763453e-06, |
| "loss": 0.0622, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.37010816125860374, |
| "grad_norm": 1.3120791912078857, |
| "learning_rate": 3.969740948287817e-06, |
| "loss": 0.0414, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3705014749262537, |
| "grad_norm": 1.3838061094284058, |
| "learning_rate": 3.966961525569005e-06, |
| "loss": 0.0653, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3708947885939036, |
| "grad_norm": 0.6813984513282776, |
| "learning_rate": 3.964179334722811e-06, |
| "loss": 0.0345, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.37128810226155357, |
| "grad_norm": 0.8976694345474243, |
| "learning_rate": 3.961394380999173e-06, |
| "loss": 0.0314, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.37168141592920356, |
| "grad_norm": 0.9033572673797607, |
| "learning_rate": 3.958606669653243e-06, |
| "loss": 0.0542, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.3720747295968535, |
| "grad_norm": 0.901779055595398, |
| "learning_rate": 3.955816205945378e-06, |
| "loss": 0.0359, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.37246804326450345, |
| "grad_norm": 2.198181390762329, |
| "learning_rate": 3.953022995141128e-06, |
| "loss": 0.0473, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3728613569321534, |
| "grad_norm": 1.4871481657028198, |
| "learning_rate": 3.950227042511226e-06, |
| "loss": 0.0888, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.37325467059980333, |
| "grad_norm": 1.3157522678375244, |
| "learning_rate": 3.947428353331579e-06, |
| "loss": 0.041, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.37364798426745327, |
| "grad_norm": 1.431186318397522, |
| "learning_rate": 3.94462693288326e-06, |
| "loss": 0.0799, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.37404129793510327, |
| "grad_norm": 1.389054775238037, |
| "learning_rate": 3.941822786452491e-06, |
| "loss": 0.0457, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3744346116027532, |
| "grad_norm": 1.6102625131607056, |
| "learning_rate": 3.939015919330643e-06, |
| "loss": 0.0926, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.37482792527040315, |
| "grad_norm": 0.8472495675086975, |
| "learning_rate": 3.936206336814219e-06, |
| "loss": 0.0408, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3752212389380531, |
| "grad_norm": 0.8631911873817444, |
| "learning_rate": 3.933394044204843e-06, |
| "loss": 0.0405, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.37561455260570303, |
| "grad_norm": 5.559257507324219, |
| "learning_rate": 3.930579046809259e-06, |
| "loss": 0.048, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.376007866273353, |
| "grad_norm": 1.6139276027679443, |
| "learning_rate": 3.92776134993931e-06, |
| "loss": 0.0596, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.376401179941003, |
| "grad_norm": 1.7035290002822876, |
| "learning_rate": 3.924940958911933e-06, |
| "loss": 0.061, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.3767944936086529, |
| "grad_norm": 0.8409842848777771, |
| "learning_rate": 3.922117879049152e-06, |
| "loss": 0.0416, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.37718780727630286, |
| "grad_norm": 1.9367414712905884, |
| "learning_rate": 3.91929211567806e-06, |
| "loss": 0.0617, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.3775811209439528, |
| "grad_norm": 1.0128939151763916, |
| "learning_rate": 3.916463674130821e-06, |
| "loss": 0.0477, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.37797443461160274, |
| "grad_norm": 1.9125791788101196, |
| "learning_rate": 3.913632559744645e-06, |
| "loss": 0.0571, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3783677482792527, |
| "grad_norm": 1.4633182287216187, |
| "learning_rate": 3.910798777861788e-06, |
| "loss": 0.0511, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.3787610619469027, |
| "grad_norm": 0.9891822934150696, |
| "learning_rate": 3.9079623338295436e-06, |
| "loss": 0.0485, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3791543756145526, |
| "grad_norm": 1.2277315855026245, |
| "learning_rate": 3.9051232330002245e-06, |
| "loss": 0.0449, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.37954768928220256, |
| "grad_norm": 0.49736377596855164, |
| "learning_rate": 3.902281480731156e-06, |
| "loss": 0.0213, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3799410029498525, |
| "grad_norm": 0.982218861579895, |
| "learning_rate": 3.899437082384671e-06, |
| "loss": 0.0581, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.38033431661750244, |
| "grad_norm": 0.8971213102340698, |
| "learning_rate": 3.89659004332809e-06, |
| "loss": 0.0458, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3807276302851524, |
| "grad_norm": 0.4127979874610901, |
| "learning_rate": 3.893740368933722e-06, |
| "loss": 0.0313, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.3811209439528024, |
| "grad_norm": 2.5857155323028564, |
| "learning_rate": 3.8908880645788464e-06, |
| "loss": 0.0711, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3815142576204523, |
| "grad_norm": 1.2110406160354614, |
| "learning_rate": 3.888033135645702e-06, |
| "loss": 0.0508, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.38190757128810227, |
| "grad_norm": 1.58492112159729, |
| "learning_rate": 3.885175587521486e-06, |
| "loss": 0.0662, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3823008849557522, |
| "grad_norm": 0.8792701363563538, |
| "learning_rate": 3.882315425598334e-06, |
| "loss": 0.0767, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.38269419862340215, |
| "grad_norm": 1.797515869140625, |
| "learning_rate": 3.879452655273316e-06, |
| "loss": 0.0585, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3830875122910521, |
| "grad_norm": 1.6386829614639282, |
| "learning_rate": 3.876587281948422e-06, |
| "loss": 0.08, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3834808259587021, |
| "grad_norm": 1.1229251623153687, |
| "learning_rate": 3.873719311030556e-06, |
| "loss": 0.0585, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.38387413962635203, |
| "grad_norm": 1.2260591983795166, |
| "learning_rate": 3.8708487479315204e-06, |
| "loss": 0.0647, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.38426745329400197, |
| "grad_norm": 1.565321683883667, |
| "learning_rate": 3.867975598068012e-06, |
| "loss": 0.067, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3846607669616519, |
| "grad_norm": 1.4004123210906982, |
| "learning_rate": 3.8650998668616085e-06, |
| "loss": 0.0765, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.38505408062930185, |
| "grad_norm": 1.5652803182601929, |
| "learning_rate": 3.862221559738757e-06, |
| "loss": 0.0672, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.3854473942969518, |
| "grad_norm": 4.284322738647461, |
| "learning_rate": 3.859340682130766e-06, |
| "loss": 0.0692, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3858407079646018, |
| "grad_norm": 1.21330988407135, |
| "learning_rate": 3.856457239473795e-06, |
| "loss": 0.0828, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.38623402163225173, |
| "grad_norm": 2.4526336193084717, |
| "learning_rate": 3.853571237208843e-06, |
| "loss": 0.0694, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3866273352999017, |
| "grad_norm": 1.0117402076721191, |
| "learning_rate": 3.8506826807817395e-06, |
| "loss": 0.0362, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3870206489675516, |
| "grad_norm": 1.1363615989685059, |
| "learning_rate": 3.847791575643134e-06, |
| "loss": 0.0543, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.38741396263520156, |
| "grad_norm": 1.1766973733901978, |
| "learning_rate": 3.844897927248483e-06, |
| "loss": 0.0488, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3878072763028515, |
| "grad_norm": 0.8534460067749023, |
| "learning_rate": 3.842001741058045e-06, |
| "loss": 0.0603, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3882005899705015, |
| "grad_norm": 1.5655368566513062, |
| "learning_rate": 3.839103022536865e-06, |
| "loss": 0.0713, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.38859390363815144, |
| "grad_norm": 0.6574957966804504, |
| "learning_rate": 3.836201777154769e-06, |
| "loss": 0.0583, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.3889872173058014, |
| "grad_norm": 0.8077657222747803, |
| "learning_rate": 3.833298010386347e-06, |
| "loss": 0.05, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3893805309734513, |
| "grad_norm": 1.513853669166565, |
| "learning_rate": 3.830391727710954e-06, |
| "loss": 0.0502, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.38977384464110126, |
| "grad_norm": 2.019428253173828, |
| "learning_rate": 3.827482934612684e-06, |
| "loss": 0.0557, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3901671583087512, |
| "grad_norm": 1.0257922410964966, |
| "learning_rate": 3.824571636580372e-06, |
| "loss": 0.0625, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3905604719764012, |
| "grad_norm": 0.5803849697113037, |
| "learning_rate": 3.821657839107583e-06, |
| "loss": 0.0442, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.39095378564405114, |
| "grad_norm": 0.8499471545219421, |
| "learning_rate": 3.818741547692593e-06, |
| "loss": 0.0342, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.3913470993117011, |
| "grad_norm": 0.4951908588409424, |
| "learning_rate": 3.815822767838386e-06, |
| "loss": 0.0343, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.391740412979351, |
| "grad_norm": 1.5221655368804932, |
| "learning_rate": 3.812901505052642e-06, |
| "loss": 0.0465, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.39213372664700097, |
| "grad_norm": 1.7891956567764282, |
| "learning_rate": 3.8099777648477264e-06, |
| "loss": 0.0821, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3925270403146509, |
| "grad_norm": 0.8419029116630554, |
| "learning_rate": 3.8070515527406803e-06, |
| "loss": 0.0546, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.3929203539823009, |
| "grad_norm": 0.9236086010932922, |
| "learning_rate": 3.8041228742532064e-06, |
| "loss": 0.0423, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.39331366764995085, |
| "grad_norm": 1.0892646312713623, |
| "learning_rate": 3.8011917349116633e-06, |
| "loss": 0.0531, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3937069813176008, |
| "grad_norm": 1.6544411182403564, |
| "learning_rate": 3.7982581402470536e-06, |
| "loss": 0.0404, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.39410029498525073, |
| "grad_norm": 1.8338655233383179, |
| "learning_rate": 3.795322095795012e-06, |
| "loss": 0.0535, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.3944936086529007, |
| "grad_norm": 1.4561970233917236, |
| "learning_rate": 3.7923836070957963e-06, |
| "loss": 0.0506, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3948869223205506, |
| "grad_norm": 1.1206718683242798, |
| "learning_rate": 3.7894426796942773e-06, |
| "loss": 0.07, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.3952802359882006, |
| "grad_norm": 1.5864077806472778, |
| "learning_rate": 3.786499319139926e-06, |
| "loss": 0.0511, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.39567354965585055, |
| "grad_norm": 1.6479477882385254, |
| "learning_rate": 3.7835535309868055e-06, |
| "loss": 0.1065, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3960668633235005, |
| "grad_norm": 1.173240303993225, |
| "learning_rate": 3.78060532079356e-06, |
| "loss": 0.0366, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.39646017699115044, |
| "grad_norm": 1.512009859085083, |
| "learning_rate": 3.777654694123404e-06, |
| "loss": 0.0333, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3968534906588004, |
| "grad_norm": 0.7629926800727844, |
| "learning_rate": 3.7747016565441112e-06, |
| "loss": 0.0293, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3972468043264503, |
| "grad_norm": 1.325535774230957, |
| "learning_rate": 3.771746213628006e-06, |
| "loss": 0.0494, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3976401179941003, |
| "grad_norm": 0.9456796050071716, |
| "learning_rate": 3.7687883709519496e-06, |
| "loss": 0.0347, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.39803343166175026, |
| "grad_norm": 1.6305729150772095, |
| "learning_rate": 3.7658281340973336e-06, |
| "loss": 0.0782, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3984267453294002, |
| "grad_norm": 2.3638815879821777, |
| "learning_rate": 3.7628655086500654e-06, |
| "loss": 0.0746, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.39882005899705014, |
| "grad_norm": 1.1770771741867065, |
| "learning_rate": 3.7599005002005616e-06, |
| "loss": 0.0436, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.3992133726647001, |
| "grad_norm": 1.2992199659347534, |
| "learning_rate": 3.7569331143437336e-06, |
| "loss": 0.0565, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.39960668633235, |
| "grad_norm": 1.2094827890396118, |
| "learning_rate": 3.7539633566789812e-06, |
| "loss": 0.0536, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.641381859779358, |
| "learning_rate": 3.750991232810177e-06, |
| "loss": 0.0373, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.40039331366764996, |
| "grad_norm": 0.7891103029251099, |
| "learning_rate": 3.7480167483456603e-06, |
| "loss": 0.0632, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.4007866273352999, |
| "grad_norm": 0.7216825485229492, |
| "learning_rate": 3.7450399088982247e-06, |
| "loss": 0.0513, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.40117994100294985, |
| "grad_norm": 0.7158090472221375, |
| "learning_rate": 3.742060720085107e-06, |
| "loss": 0.0456, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4015732546705998, |
| "grad_norm": 0.58232182264328, |
| "learning_rate": 3.739079187527978e-06, |
| "loss": 0.027, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.40196656833824973, |
| "grad_norm": 1.546899437904358, |
| "learning_rate": 3.73609531685293e-06, |
| "loss": 0.1034, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4023598820058997, |
| "grad_norm": 1.1753488779067993, |
| "learning_rate": 3.733109113690469e-06, |
| "loss": 0.0609, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.40275319567354967, |
| "grad_norm": 1.5217546224594116, |
| "learning_rate": 3.7301205836755006e-06, |
| "loss": 0.0853, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.4031465093411996, |
| "grad_norm": 0.9366397857666016, |
| "learning_rate": 3.727129732447322e-06, |
| "loss": 0.0511, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.40353982300884955, |
| "grad_norm": 0.8296689391136169, |
| "learning_rate": 3.7241365656496103e-06, |
| "loss": 0.0336, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4039331366764995, |
| "grad_norm": 0.8638429641723633, |
| "learning_rate": 3.7211410889304117e-06, |
| "loss": 0.0675, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.40432645034414944, |
| "grad_norm": 0.6674923896789551, |
| "learning_rate": 3.7181433079421316e-06, |
| "loss": 0.0299, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.40471976401179943, |
| "grad_norm": 1.5683988332748413, |
| "learning_rate": 3.7151432283415244e-06, |
| "loss": 0.0814, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.4051130776794494, |
| "grad_norm": 0.6941884756088257, |
| "learning_rate": 3.712140855789679e-06, |
| "loss": 0.0428, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4055063913470993, |
| "grad_norm": 0.8299364447593689, |
| "learning_rate": 3.709136195952015e-06, |
| "loss": 0.0534, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.40589970501474926, |
| "grad_norm": 1.065128207206726, |
| "learning_rate": 3.706129254498266e-06, |
| "loss": 0.0527, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4062930186823992, |
| "grad_norm": 1.3388938903808594, |
| "learning_rate": 3.703120037102469e-06, |
| "loss": 0.0619, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.40668633235004914, |
| "grad_norm": 1.6854989528656006, |
| "learning_rate": 3.7001085494429596e-06, |
| "loss": 0.0605, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.40707964601769914, |
| "grad_norm": 1.7878034114837646, |
| "learning_rate": 3.697094797202355e-06, |
| "loss": 0.0644, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4074729596853491, |
| "grad_norm": 0.7512350082397461, |
| "learning_rate": 3.694078786067546e-06, |
| "loss": 0.0561, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.407866273352999, |
| "grad_norm": 0.5946680307388306, |
| "learning_rate": 3.691060521729686e-06, |
| "loss": 0.032, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.40825958702064896, |
| "grad_norm": 0.7464413642883301, |
| "learning_rate": 3.6880400098841794e-06, |
| "loss": 0.0581, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4086529006882989, |
| "grad_norm": 1.3339935541152954, |
| "learning_rate": 3.6850172562306735e-06, |
| "loss": 0.065, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.40904621435594885, |
| "grad_norm": 1.2734817266464233, |
| "learning_rate": 3.681992266473044e-06, |
| "loss": 0.0302, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.40943952802359884, |
| "grad_norm": 1.6477503776550293, |
| "learning_rate": 3.6789650463193864e-06, |
| "loss": 0.0454, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4098328416912488, |
| "grad_norm": 1.9478659629821777, |
| "learning_rate": 3.675935601482006e-06, |
| "loss": 0.0906, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4102261553588987, |
| "grad_norm": 1.2177263498306274, |
| "learning_rate": 3.6729039376774055e-06, |
| "loss": 0.0708, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.41061946902654867, |
| "grad_norm": 1.3361903429031372, |
| "learning_rate": 3.6698700606262733e-06, |
| "loss": 0.0542, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4110127826941986, |
| "grad_norm": 0.7786129117012024, |
| "learning_rate": 3.6668339760534768e-06, |
| "loss": 0.0666, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.41140609636184855, |
| "grad_norm": 0.4651035964488983, |
| "learning_rate": 3.6637956896880465e-06, |
| "loss": 0.0442, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.41179941002949855, |
| "grad_norm": 0.28553763031959534, |
| "learning_rate": 3.6607552072631685e-06, |
| "loss": 0.0266, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4121927236971485, |
| "grad_norm": 1.054947018623352, |
| "learning_rate": 3.6577125345161748e-06, |
| "loss": 0.0533, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.41258603736479843, |
| "grad_norm": 0.6713748574256897, |
| "learning_rate": 3.6546676771885257e-06, |
| "loss": 0.0347, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.41297935103244837, |
| "grad_norm": 1.4435083866119385, |
| "learning_rate": 3.6516206410258092e-06, |
| "loss": 0.0384, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4133726647000983, |
| "grad_norm": 1.4494538307189941, |
| "learning_rate": 3.6485714317777223e-06, |
| "loss": 0.068, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.41376597836774826, |
| "grad_norm": 1.666913390159607, |
| "learning_rate": 3.6455200551980605e-06, |
| "loss": 0.0685, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.41415929203539825, |
| "grad_norm": 2.99609375, |
| "learning_rate": 3.642466517044713e-06, |
| "loss": 0.1213, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.4145526057030482, |
| "grad_norm": 1.6199326515197754, |
| "learning_rate": 3.6394108230796455e-06, |
| "loss": 0.0557, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.41494591937069814, |
| "grad_norm": 0.6611631512641907, |
| "learning_rate": 3.636352979068891e-06, |
| "loss": 0.0333, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.4153392330383481, |
| "grad_norm": 0.8349502086639404, |
| "learning_rate": 3.6332929907825426e-06, |
| "loss": 0.0285, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.415732546705998, |
| "grad_norm": 1.6354492902755737, |
| "learning_rate": 3.630230863994736e-06, |
| "loss": 0.0808, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.41612586037364796, |
| "grad_norm": 0.8214701414108276, |
| "learning_rate": 3.6271666044836433e-06, |
| "loss": 0.0355, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.41651917404129796, |
| "grad_norm": 1.321581244468689, |
| "learning_rate": 3.624100218031464e-06, |
| "loss": 0.0444, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4169124877089479, |
| "grad_norm": 0.7428562641143799, |
| "learning_rate": 3.621031710424407e-06, |
| "loss": 0.0259, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.41730580137659784, |
| "grad_norm": 0.7929845452308655, |
| "learning_rate": 3.6179610874526856e-06, |
| "loss": 0.0345, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.4176991150442478, |
| "grad_norm": 0.6758319139480591, |
| "learning_rate": 3.614888354910505e-06, |
| "loss": 0.037, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.4180924287118977, |
| "grad_norm": 1.5147916078567505, |
| "learning_rate": 3.6118135185960507e-06, |
| "loss": 0.0855, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.41848574237954766, |
| "grad_norm": 1.0528610944747925, |
| "learning_rate": 3.6087365843114773e-06, |
| "loss": 0.0324, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.41887905604719766, |
| "grad_norm": 1.3274002075195312, |
| "learning_rate": 3.6056575578629006e-06, |
| "loss": 0.0475, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.4192723697148476, |
| "grad_norm": 0.5520153641700745, |
| "learning_rate": 3.6025764450603808e-06, |
| "loss": 0.022, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.41966568338249755, |
| "grad_norm": 1.81023371219635, |
| "learning_rate": 3.5994932517179182e-06, |
| "loss": 0.043, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4200589970501475, |
| "grad_norm": 1.3602193593978882, |
| "learning_rate": 3.596407983653436e-06, |
| "loss": 0.073, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.42045231071779743, |
| "grad_norm": 1.921582579612732, |
| "learning_rate": 3.5933206466887755e-06, |
| "loss": 0.0759, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.42084562438544737, |
| "grad_norm": 0.8578033447265625, |
| "learning_rate": 3.59023124664968e-06, |
| "loss": 0.0249, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.42123893805309737, |
| "grad_norm": 1.7219325304031372, |
| "learning_rate": 3.5871397893657867e-06, |
| "loss": 0.0596, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.4216322517207473, |
| "grad_norm": 0.9463638663291931, |
| "learning_rate": 3.5840462806706126e-06, |
| "loss": 0.0454, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.42202556538839725, |
| "grad_norm": 1.9718307256698608, |
| "learning_rate": 3.5809507264015502e-06, |
| "loss": 0.0623, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4224188790560472, |
| "grad_norm": 2.0382165908813477, |
| "learning_rate": 3.5778531323998465e-06, |
| "loss": 0.0497, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.42281219272369713, |
| "grad_norm": 1.496324062347412, |
| "learning_rate": 3.574753504510602e-06, |
| "loss": 0.0826, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4232055063913471, |
| "grad_norm": 0.49463126063346863, |
| "learning_rate": 3.571651848582753e-06, |
| "loss": 0.0415, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.42359882005899707, |
| "grad_norm": 1.1558905839920044, |
| "learning_rate": 3.5685481704690617e-06, |
| "loss": 0.0473, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.423992133726647, |
| "grad_norm": 3.914982795715332, |
| "learning_rate": 3.5654424760261082e-06, |
| "loss": 0.0853, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.42438544739429696, |
| "grad_norm": 1.7288295030593872, |
| "learning_rate": 3.5623347711142764e-06, |
| "loss": 0.0817, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 1.0033987760543823, |
| "learning_rate": 3.5592250615977434e-06, |
| "loss": 0.0552, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.42517207472959684, |
| "grad_norm": 1.461305856704712, |
| "learning_rate": 3.5561133533444703e-06, |
| "loss": 0.0659, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.4255653883972468, |
| "grad_norm": 0.7007796168327332, |
| "learning_rate": 3.552999652226189e-06, |
| "loss": 0.0332, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4259587020648968, |
| "grad_norm": 0.7041943073272705, |
| "learning_rate": 3.549883964118392e-06, |
| "loss": 0.0205, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4263520157325467, |
| "grad_norm": 1.5797779560089111, |
| "learning_rate": 3.54676629490032e-06, |
| "loss": 0.0564, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.42674532940019666, |
| "grad_norm": 1.4408408403396606, |
| "learning_rate": 3.543646650454955e-06, |
| "loss": 0.0347, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4271386430678466, |
| "grad_norm": 0.709080159664154, |
| "learning_rate": 3.5405250366690023e-06, |
| "loss": 0.0259, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.42753195673549654, |
| "grad_norm": 1.4579590559005737, |
| "learning_rate": 3.5374014594328877e-06, |
| "loss": 0.0712, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.4279252704031465, |
| "grad_norm": 0.9378184676170349, |
| "learning_rate": 3.5342759246407378e-06, |
| "loss": 0.0583, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.4283185840707965, |
| "grad_norm": 0.9149574041366577, |
| "learning_rate": 3.5311484381903754e-06, |
| "loss": 0.0594, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4287118977384464, |
| "grad_norm": 1.2301528453826904, |
| "learning_rate": 3.528019005983306e-06, |
| "loss": 0.0603, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.42910521140609637, |
| "grad_norm": 1.222373127937317, |
| "learning_rate": 3.5248876339247053e-06, |
| "loss": 0.0331, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4294985250737463, |
| "grad_norm": 1.5141066312789917, |
| "learning_rate": 3.521754327923412e-06, |
| "loss": 0.0662, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.42989183874139625, |
| "grad_norm": 1.581040620803833, |
| "learning_rate": 3.5186190938919106e-06, |
| "loss": 0.0634, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.4302851524090462, |
| "grad_norm": 1.1250847578048706, |
| "learning_rate": 3.515481937746327e-06, |
| "loss": 0.0428, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4306784660766962, |
| "grad_norm": 1.6886603832244873, |
| "learning_rate": 3.5123428654064134e-06, |
| "loss": 0.043, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.43107177974434613, |
| "grad_norm": 2.050182819366455, |
| "learning_rate": 3.509201882795536e-06, |
| "loss": 0.1201, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.43146509341199607, |
| "grad_norm": 1.2001996040344238, |
| "learning_rate": 3.5060589958406677e-06, |
| "loss": 0.0453, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.431858407079646, |
| "grad_norm": 1.0683172941207886, |
| "learning_rate": 3.5029142104723725e-06, |
| "loss": 0.0331, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.43225172074729595, |
| "grad_norm": 2.0737650394439697, |
| "learning_rate": 3.4997675326247993e-06, |
| "loss": 0.0526, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.4326450344149459, |
| "grad_norm": 0.8983532190322876, |
| "learning_rate": 3.4966189682356677e-06, |
| "loss": 0.0532, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4330383480825959, |
| "grad_norm": 1.8358802795410156, |
| "learning_rate": 3.493468523246255e-06, |
| "loss": 0.0598, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.43343166175024583, |
| "grad_norm": 2.076266050338745, |
| "learning_rate": 3.4903162036013894e-06, |
| "loss": 0.0836, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.4338249754178958, |
| "grad_norm": 2.4419870376586914, |
| "learning_rate": 3.487162015249436e-06, |
| "loss": 0.0758, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.4342182890855457, |
| "grad_norm": 1.3942052125930786, |
| "learning_rate": 3.484005964142285e-06, |
| "loss": 0.0803, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.43461160275319566, |
| "grad_norm": 1.3950960636138916, |
| "learning_rate": 3.4808480562353426e-06, |
| "loss": 0.0675, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.4350049164208456, |
| "grad_norm": 1.5000733137130737, |
| "learning_rate": 3.477688297487519e-06, |
| "loss": 0.0448, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.4353982300884956, |
| "grad_norm": 1.5005849599838257, |
| "learning_rate": 3.474526693861216e-06, |
| "loss": 0.0729, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.43579154375614554, |
| "grad_norm": 0.6299577951431274, |
| "learning_rate": 3.4713632513223178e-06, |
| "loss": 0.039, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4361848574237955, |
| "grad_norm": 0.8964212536811829, |
| "learning_rate": 3.4681979758401767e-06, |
| "loss": 0.0521, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.4365781710914454, |
| "grad_norm": 1.3757152557373047, |
| "learning_rate": 3.465030873387606e-06, |
| "loss": 0.0598, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.43697148475909536, |
| "grad_norm": 0.48663070797920227, |
| "learning_rate": 3.461861949940865e-06, |
| "loss": 0.0442, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.4373647984267453, |
| "grad_norm": 0.8878856897354126, |
| "learning_rate": 3.458691211479649e-06, |
| "loss": 0.023, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.4377581120943953, |
| "grad_norm": 1.1162179708480835, |
| "learning_rate": 3.4555186639870795e-06, |
| "loss": 0.0493, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.43815142576204524, |
| "grad_norm": 1.1180258989334106, |
| "learning_rate": 3.4523443134496916e-06, |
| "loss": 0.0577, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.4385447394296952, |
| "grad_norm": 0.6240465641021729, |
| "learning_rate": 3.4491681658574205e-06, |
| "loss": 0.0295, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.4389380530973451, |
| "grad_norm": 2.439685106277466, |
| "learning_rate": 3.445990227203594e-06, |
| "loss": 0.0676, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.43933136676499507, |
| "grad_norm": 1.1544771194458008, |
| "learning_rate": 3.442810503484921e-06, |
| "loss": 0.0487, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.439724680432645, |
| "grad_norm": 1.794083833694458, |
| "learning_rate": 3.4396290007014752e-06, |
| "loss": 0.043, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.440117994100295, |
| "grad_norm": 0.8073402643203735, |
| "learning_rate": 3.4364457248566913e-06, |
| "loss": 0.0404, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.44051130776794495, |
| "grad_norm": 0.4391036331653595, |
| "learning_rate": 3.433260681957346e-06, |
| "loss": 0.0394, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4409046214355949, |
| "grad_norm": 1.0611299276351929, |
| "learning_rate": 3.430073878013554e-06, |
| "loss": 0.0263, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.44129793510324483, |
| "grad_norm": 0.48767581582069397, |
| "learning_rate": 3.4268853190387496e-06, |
| "loss": 0.0341, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.4416912487708948, |
| "grad_norm": 0.6423639059066772, |
| "learning_rate": 3.423695011049683e-06, |
| "loss": 0.0234, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.4420845624385447, |
| "grad_norm": 1.0390664339065552, |
| "learning_rate": 3.4205029600663996e-06, |
| "loss": 0.0593, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4424778761061947, |
| "grad_norm": 1.2516858577728271, |
| "learning_rate": 3.4173091721122375e-06, |
| "loss": 0.0375, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.44287118977384465, |
| "grad_norm": 1.670310139656067, |
| "learning_rate": 3.414113653213812e-06, |
| "loss": 0.0504, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.4432645034414946, |
| "grad_norm": 2.317314624786377, |
| "learning_rate": 3.410916409401004e-06, |
| "loss": 0.0911, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.44365781710914454, |
| "grad_norm": 1.418398141860962, |
| "learning_rate": 3.407717446706948e-06, |
| "loss": 0.0439, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4440511307767945, |
| "grad_norm": 1.1104565858840942, |
| "learning_rate": 3.4045167711680244e-06, |
| "loss": 0.0485, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.8792333602905273, |
| "learning_rate": 3.4013143888238455e-06, |
| "loss": 0.064, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4448377581120944, |
| "grad_norm": 1.7921650409698486, |
| "learning_rate": 3.398110305717241e-06, |
| "loss": 0.0495, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.44523107177974436, |
| "grad_norm": 1.4747095108032227, |
| "learning_rate": 3.3949045278942545e-06, |
| "loss": 0.0743, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4456243854473943, |
| "grad_norm": 0.6847875118255615, |
| "learning_rate": 3.3916970614041244e-06, |
| "loss": 0.0224, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.44601769911504424, |
| "grad_norm": 0.7522935271263123, |
| "learning_rate": 3.3884879122992762e-06, |
| "loss": 0.0334, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.4464110127826942, |
| "grad_norm": 1.5176104307174683, |
| "learning_rate": 3.3852770866353125e-06, |
| "loss": 0.0729, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.4468043264503441, |
| "grad_norm": 1.188468337059021, |
| "learning_rate": 3.382064590470996e-06, |
| "loss": 0.0315, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.4471976401179941, |
| "grad_norm": 0.5583229660987854, |
| "learning_rate": 3.378850429868244e-06, |
| "loss": 0.0292, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.44759095378564406, |
| "grad_norm": 0.7804880738258362, |
| "learning_rate": 3.3756346108921145e-06, |
| "loss": 0.0378, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.447984267453294, |
| "grad_norm": 1.090079426765442, |
| "learning_rate": 3.372417139610793e-06, |
| "loss": 0.0549, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.44837758112094395, |
| "grad_norm": 1.363856554031372, |
| "learning_rate": 3.369198022095585e-06, |
| "loss": 0.0859, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4487708947885939, |
| "grad_norm": 1.162818431854248, |
| "learning_rate": 3.3659772644209023e-06, |
| "loss": 0.0292, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.44916420845624383, |
| "grad_norm": 0.8213643431663513, |
| "learning_rate": 3.36275487266425e-06, |
| "loss": 0.0435, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.4495575221238938, |
| "grad_norm": 0.8050291538238525, |
| "learning_rate": 3.3595308529062176e-06, |
| "loss": 0.0279, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.44995083579154377, |
| "grad_norm": 1.1065354347229004, |
| "learning_rate": 3.3563052112304674e-06, |
| "loss": 0.0425, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.4503441494591937, |
| "grad_norm": 0.9072518348693848, |
| "learning_rate": 3.3530779537237194e-06, |
| "loss": 0.0315, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.45073746312684365, |
| "grad_norm": 0.8572150468826294, |
| "learning_rate": 3.349849086475747e-06, |
| "loss": 0.0306, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.4511307767944936, |
| "grad_norm": 1.552173137664795, |
| "learning_rate": 3.346618615579359e-06, |
| "loss": 0.0671, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.45152409046214353, |
| "grad_norm": 0.9978398084640503, |
| "learning_rate": 3.3433865471303876e-06, |
| "loss": 0.0667, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.45191740412979353, |
| "grad_norm": 2.7961080074310303, |
| "learning_rate": 3.3401528872276847e-06, |
| "loss": 0.0696, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.4523107177974435, |
| "grad_norm": 1.520912528038025, |
| "learning_rate": 3.3369176419731004e-06, |
| "loss": 0.0722, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4527040314650934, |
| "grad_norm": 0.8389769196510315, |
| "learning_rate": 3.33368081747148e-06, |
| "loss": 0.0444, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.45309734513274336, |
| "grad_norm": 2.075424909591675, |
| "learning_rate": 3.3304424198306464e-06, |
| "loss": 0.0826, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.4534906588003933, |
| "grad_norm": 0.7416201829910278, |
| "learning_rate": 3.3272024551613926e-06, |
| "loss": 0.0283, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.45388397246804324, |
| "grad_norm": 1.0457786321640015, |
| "learning_rate": 3.3239609295774667e-06, |
| "loss": 0.0418, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.45427728613569324, |
| "grad_norm": 0.9312077760696411, |
| "learning_rate": 3.3207178491955656e-06, |
| "loss": 0.0341, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.4546705998033432, |
| "grad_norm": 0.9886119365692139, |
| "learning_rate": 3.3174732201353155e-06, |
| "loss": 0.0623, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4550639134709931, |
| "grad_norm": 1.2970693111419678, |
| "learning_rate": 3.3142270485192683e-06, |
| "loss": 0.087, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.45545722713864306, |
| "grad_norm": 1.273305892944336, |
| "learning_rate": 3.3109793404728855e-06, |
| "loss": 0.0654, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.455850540806293, |
| "grad_norm": 0.8121715188026428, |
| "learning_rate": 3.3077301021245285e-06, |
| "loss": 0.0257, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.45624385447394294, |
| "grad_norm": 1.6593793630599976, |
| "learning_rate": 3.3044793396054447e-06, |
| "loss": 0.0679, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.45663716814159294, |
| "grad_norm": 1.2623846530914307, |
| "learning_rate": 3.3012270590497596e-06, |
| "loss": 0.071, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4570304818092429, |
| "grad_norm": 0.9096400737762451, |
| "learning_rate": 3.2979732665944615e-06, |
| "loss": 0.067, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4574237954768928, |
| "grad_norm": 0.9472593069076538, |
| "learning_rate": 3.2947179683793928e-06, |
| "loss": 0.0395, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.45781710914454277, |
| "grad_norm": 0.9576103091239929, |
| "learning_rate": 3.291461170547237e-06, |
| "loss": 0.049, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4582104228121927, |
| "grad_norm": 0.9918181300163269, |
| "learning_rate": 3.2882028792435072e-06, |
| "loss": 0.0318, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.45860373647984265, |
| "grad_norm": 1.843493938446045, |
| "learning_rate": 3.2849431006165343e-06, |
| "loss": 0.0634, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.45899705014749265, |
| "grad_norm": 0.8672575950622559, |
| "learning_rate": 3.2816818408174567e-06, |
| "loss": 0.0826, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.4593903638151426, |
| "grad_norm": 1.5660734176635742, |
| "learning_rate": 3.278419106000206e-06, |
| "loss": 0.0695, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.45978367748279253, |
| "grad_norm": 1.3234399557113647, |
| "learning_rate": 3.2751549023214995e-06, |
| "loss": 0.0381, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.46017699115044247, |
| "grad_norm": 1.7596269845962524, |
| "learning_rate": 3.2718892359408245e-06, |
| "loss": 0.0438, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4605703048180924, |
| "grad_norm": 0.6878931522369385, |
| "learning_rate": 3.2686221130204287e-06, |
| "loss": 0.0347, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.46096361848574235, |
| "grad_norm": 1.0857138633728027, |
| "learning_rate": 3.265353539725309e-06, |
| "loss": 0.0609, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.46135693215339235, |
| "grad_norm": 0.777098536491394, |
| "learning_rate": 3.2620835222231972e-06, |
| "loss": 0.0597, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4617502458210423, |
| "grad_norm": 4.028940677642822, |
| "learning_rate": 3.2588120666845534e-06, |
| "loss": 0.0702, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.46214355948869223, |
| "grad_norm": 1.3609766960144043, |
| "learning_rate": 3.255539179282548e-06, |
| "loss": 0.0478, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.4625368731563422, |
| "grad_norm": 1.3808916807174683, |
| "learning_rate": 3.2522648661930558e-06, |
| "loss": 0.0787, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.4629301868239921, |
| "grad_norm": 1.464201807975769, |
| "learning_rate": 3.2489891335946413e-06, |
| "loss": 0.0565, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.46332350049164206, |
| "grad_norm": 1.4196548461914062, |
| "learning_rate": 3.245711987668545e-06, |
| "loss": 0.0747, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.46371681415929206, |
| "grad_norm": 1.5526188611984253, |
| "learning_rate": 3.2424334345986787e-06, |
| "loss": 0.0384, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.464110127826942, |
| "grad_norm": 1.4707880020141602, |
| "learning_rate": 3.239153480571605e-06, |
| "loss": 0.0669, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.46450344149459194, |
| "grad_norm": 1.5997252464294434, |
| "learning_rate": 3.2358721317765344e-06, |
| "loss": 0.063, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.4648967551622419, |
| "grad_norm": 0.7773184180259705, |
| "learning_rate": 3.2325893944053066e-06, |
| "loss": 0.0515, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.4652900688298918, |
| "grad_norm": 1.1635929346084595, |
| "learning_rate": 3.2293052746523814e-06, |
| "loss": 0.0494, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.46568338249754176, |
| "grad_norm": 0.9854192137718201, |
| "learning_rate": 3.2260197787148277e-06, |
| "loss": 0.0559, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.46607669616519176, |
| "grad_norm": 1.9313583374023438, |
| "learning_rate": 3.222732912792313e-06, |
| "loss": 0.0447, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4664700098328417, |
| "grad_norm": 2.149656295776367, |
| "learning_rate": 3.2194446830870865e-06, |
| "loss": 0.0772, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.46686332350049164, |
| "grad_norm": 1.784822940826416, |
| "learning_rate": 3.2161550958039732e-06, |
| "loss": 0.0746, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.4672566371681416, |
| "grad_norm": 1.5821526050567627, |
| "learning_rate": 3.2128641571503594e-06, |
| "loss": 0.0613, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.46764995083579153, |
| "grad_norm": 1.6123450994491577, |
| "learning_rate": 3.2095718733361803e-06, |
| "loss": 0.0419, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.46804326450344147, |
| "grad_norm": 1.5458816289901733, |
| "learning_rate": 3.2062782505739125e-06, |
| "loss": 0.0854, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.46843657817109147, |
| "grad_norm": 1.5308221578598022, |
| "learning_rate": 3.202983295078555e-06, |
| "loss": 0.063, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4688298918387414, |
| "grad_norm": 1.166703224182129, |
| "learning_rate": 3.199687013067624e-06, |
| "loss": 0.0759, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.46922320550639135, |
| "grad_norm": 1.2040659189224243, |
| "learning_rate": 3.1963894107611395e-06, |
| "loss": 0.0648, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.4696165191740413, |
| "grad_norm": 0.8159343004226685, |
| "learning_rate": 3.1930904943816104e-06, |
| "loss": 0.0252, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.47000983284169123, |
| "grad_norm": 0.5714221596717834, |
| "learning_rate": 3.189790270154028e-06, |
| "loss": 0.0402, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.4704031465093412, |
| "grad_norm": 1.1028029918670654, |
| "learning_rate": 3.186488744305849e-06, |
| "loss": 0.0358, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.47079646017699117, |
| "grad_norm": 1.1706167459487915, |
| "learning_rate": 3.183185923066988e-06, |
| "loss": 0.0405, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.4711897738446411, |
| "grad_norm": 2.2323551177978516, |
| "learning_rate": 3.179881812669804e-06, |
| "loss": 0.0626, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.47158308751229105, |
| "grad_norm": 1.4933780431747437, |
| "learning_rate": 3.1765764193490863e-06, |
| "loss": 0.0421, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.471976401179941, |
| "grad_norm": 1.759582281112671, |
| "learning_rate": 3.173269749342047e-06, |
| "loss": 0.0386, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.47236971484759094, |
| "grad_norm": 0.9716536998748779, |
| "learning_rate": 3.1699618088883094e-06, |
| "loss": 0.0469, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4727630285152409, |
| "grad_norm": 1.4588727951049805, |
| "learning_rate": 3.1666526042298883e-06, |
| "loss": 0.062, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4731563421828909, |
| "grad_norm": 0.7807295918464661, |
| "learning_rate": 3.16334214161119e-06, |
| "loss": 0.0516, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.4735496558505408, |
| "grad_norm": 0.9360034465789795, |
| "learning_rate": 3.1600304272789904e-06, |
| "loss": 0.0413, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.47394296951819076, |
| "grad_norm": 3.0252861976623535, |
| "learning_rate": 3.1567174674824303e-06, |
| "loss": 0.0517, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4743362831858407, |
| "grad_norm": 1.2127926349639893, |
| "learning_rate": 3.1534032684729978e-06, |
| "loss": 0.0634, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.47472959685349064, |
| "grad_norm": 1.008239984512329, |
| "learning_rate": 3.1500878365045217e-06, |
| "loss": 0.035, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.4751229105211406, |
| "grad_norm": 0.8630732893943787, |
| "learning_rate": 3.1467711778331573e-06, |
| "loss": 0.0432, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4755162241887906, |
| "grad_norm": 0.5713632702827454, |
| "learning_rate": 3.143453298717373e-06, |
| "loss": 0.0293, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.4759095378564405, |
| "grad_norm": 1.3503292798995972, |
| "learning_rate": 3.14013420541794e-06, |
| "loss": 0.0488, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.47630285152409046, |
| "grad_norm": 0.6340729594230652, |
| "learning_rate": 3.1368139041979235e-06, |
| "loss": 0.0352, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.4766961651917404, |
| "grad_norm": 2.0643789768218994, |
| "learning_rate": 3.133492401322666e-06, |
| "loss": 0.0602, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.47708947885939035, |
| "grad_norm": 1.456824779510498, |
| "learning_rate": 3.1301697030597772e-06, |
| "loss": 0.0576, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4774827925270403, |
| "grad_norm": 1.6788169145584106, |
| "learning_rate": 3.126845815679123e-06, |
| "loss": 0.0473, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4778761061946903, |
| "grad_norm": 0.9894094467163086, |
| "learning_rate": 3.1235207454528137e-06, |
| "loss": 0.0486, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.47826941986234023, |
| "grad_norm": 0.6644244194030762, |
| "learning_rate": 3.12019449865519e-06, |
| "loss": 0.0348, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.47866273352999017, |
| "grad_norm": 1.8796205520629883, |
| "learning_rate": 3.116867081562815e-06, |
| "loss": 0.0711, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4790560471976401, |
| "grad_norm": 0.71921706199646, |
| "learning_rate": 3.1135385004544584e-06, |
| "loss": 0.0439, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.47944936086529005, |
| "grad_norm": 1.4723786115646362, |
| "learning_rate": 3.1102087616110866e-06, |
| "loss": 0.0948, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.47984267453294, |
| "grad_norm": 1.0385109186172485, |
| "learning_rate": 3.1068778713158515e-06, |
| "loss": 0.0481, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.48023598820059, |
| "grad_norm": 1.8688119649887085, |
| "learning_rate": 3.1035458358540764e-06, |
| "loss": 0.0962, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.48062930186823993, |
| "grad_norm": 0.988058865070343, |
| "learning_rate": 3.100212661513247e-06, |
| "loss": 0.0862, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4810226155358899, |
| "grad_norm": 0.7118948698043823, |
| "learning_rate": 3.096878354582998e-06, |
| "loss": 0.0492, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.4814159292035398, |
| "grad_norm": 1.1759183406829834, |
| "learning_rate": 3.093542921355099e-06, |
| "loss": 0.0278, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.48180924287118976, |
| "grad_norm": 0.8185058832168579, |
| "learning_rate": 3.0902063681234473e-06, |
| "loss": 0.0618, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4822025565388397, |
| "grad_norm": 1.0773781538009644, |
| "learning_rate": 3.086868701184054e-06, |
| "loss": 0.0393, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.4825958702064897, |
| "grad_norm": 1.4859130382537842, |
| "learning_rate": 3.083529926835028e-06, |
| "loss": 0.0425, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.48298918387413964, |
| "grad_norm": 0.8524113297462463, |
| "learning_rate": 3.0801900513765732e-06, |
| "loss": 0.0667, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.4833824975417896, |
| "grad_norm": 1.2344658374786377, |
| "learning_rate": 3.076849081110967e-06, |
| "loss": 0.0469, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4837758112094395, |
| "grad_norm": 1.4112597703933716, |
| "learning_rate": 3.073507022342554e-06, |
| "loss": 0.0439, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.48416912487708946, |
| "grad_norm": 1.0202746391296387, |
| "learning_rate": 3.070163881377734e-06, |
| "loss": 0.0953, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4845624385447394, |
| "grad_norm": 1.2902711629867554, |
| "learning_rate": 3.066819664524947e-06, |
| "loss": 0.0378, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4849557522123894, |
| "grad_norm": 0.8746582269668579, |
| "learning_rate": 3.063474378094665e-06, |
| "loss": 0.0404, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.48534906588003934, |
| "grad_norm": 1.8847814798355103, |
| "learning_rate": 3.060128028399376e-06, |
| "loss": 0.0779, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4857423795476893, |
| "grad_norm": 1.2793282270431519, |
| "learning_rate": 3.056780621753577e-06, |
| "loss": 0.0433, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4861356932153392, |
| "grad_norm": 1.4302126169204712, |
| "learning_rate": 3.0534321644737574e-06, |
| "loss": 0.0565, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.48652900688298917, |
| "grad_norm": 0.8506616353988647, |
| "learning_rate": 3.0500826628783903e-06, |
| "loss": 0.0448, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4869223205506391, |
| "grad_norm": 1.7796978950500488, |
| "learning_rate": 3.046732123287918e-06, |
| "loss": 0.0449, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4873156342182891, |
| "grad_norm": 1.4967756271362305, |
| "learning_rate": 3.043380552024744e-06, |
| "loss": 0.0409, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.48770894788593905, |
| "grad_norm": 1.2920217514038086, |
| "learning_rate": 3.0400279554132157e-06, |
| "loss": 0.0465, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.488102261553589, |
| "grad_norm": 1.9115070104599, |
| "learning_rate": 3.0366743397796166e-06, |
| "loss": 0.0591, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.48849557522123893, |
| "grad_norm": 0.988409161567688, |
| "learning_rate": 3.033319711452154e-06, |
| "loss": 0.042, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 2.1158268451690674, |
| "learning_rate": 3.0299640767609447e-06, |
| "loss": 0.0792, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4892822025565388, |
| "grad_norm": 1.1518357992172241, |
| "learning_rate": 3.0266074420380043e-06, |
| "loss": 0.0554, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4896755162241888, |
| "grad_norm": 1.3400568962097168, |
| "learning_rate": 3.023249813617238e-06, |
| "loss": 0.0545, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.49006882989183875, |
| "grad_norm": 0.8380603790283203, |
| "learning_rate": 3.0198911978344213e-06, |
| "loss": 0.0377, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4904621435594887, |
| "grad_norm": 1.3251253366470337, |
| "learning_rate": 3.0165316010271982e-06, |
| "loss": 0.0419, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.49085545722713864, |
| "grad_norm": 0.7429760098457336, |
| "learning_rate": 3.0131710295350615e-06, |
| "loss": 0.0487, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4912487708947886, |
| "grad_norm": 1.619492530822754, |
| "learning_rate": 3.0098094896993413e-06, |
| "loss": 0.0364, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4916420845624385, |
| "grad_norm": 1.8555465936660767, |
| "learning_rate": 3.0064469878631986e-06, |
| "loss": 0.0327, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4920353982300885, |
| "grad_norm": 2.1514008045196533, |
| "learning_rate": 3.003083530371606e-06, |
| "loss": 0.0961, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.49242871189773846, |
| "grad_norm": 1.1894843578338623, |
| "learning_rate": 2.9997191235713435e-06, |
| "loss": 0.0773, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4928220255653884, |
| "grad_norm": 1.375878095626831, |
| "learning_rate": 2.9963537738109783e-06, |
| "loss": 0.0635, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.49321533923303834, |
| "grad_norm": 0.9740056395530701, |
| "learning_rate": 2.9929874874408595e-06, |
| "loss": 0.0581, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.4936086529006883, |
| "grad_norm": 1.21156907081604, |
| "learning_rate": 2.9896202708131027e-06, |
| "loss": 0.0524, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4940019665683382, |
| "grad_norm": 4.271803855895996, |
| "learning_rate": 2.98625213028158e-06, |
| "loss": 0.0437, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4943952802359882, |
| "grad_norm": 1.0697994232177734, |
| "learning_rate": 2.9828830722019046e-06, |
| "loss": 0.0693, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.49478859390363816, |
| "grad_norm": 1.0657457113265991, |
| "learning_rate": 2.979513102931424e-06, |
| "loss": 0.0788, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.4951819075712881, |
| "grad_norm": 1.6833268404006958, |
| "learning_rate": 2.9761422288292017e-06, |
| "loss": 0.0755, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.49557522123893805, |
| "grad_norm": 0.7139087915420532, |
| "learning_rate": 2.9727704562560124e-06, |
| "loss": 0.0416, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.495968534906588, |
| "grad_norm": 1.025672435760498, |
| "learning_rate": 2.9693977915743227e-06, |
| "loss": 0.057, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.49636184857423793, |
| "grad_norm": 1.6005637645721436, |
| "learning_rate": 2.9660242411482848e-06, |
| "loss": 0.0694, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.4967551622418879, |
| "grad_norm": 1.2426131963729858, |
| "learning_rate": 2.9626498113437215e-06, |
| "loss": 0.0443, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.49714847590953787, |
| "grad_norm": 1.0461783409118652, |
| "learning_rate": 2.9592745085281154e-06, |
| "loss": 0.0449, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4975417895771878, |
| "grad_norm": 1.1440929174423218, |
| "learning_rate": 2.955898339070596e-06, |
| "loss": 0.0429, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.49793510324483775, |
| "grad_norm": 1.5936861038208008, |
| "learning_rate": 2.9525213093419275e-06, |
| "loss": 0.0517, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4983284169124877, |
| "grad_norm": 0.9140682220458984, |
| "learning_rate": 2.9491434257144995e-06, |
| "loss": 0.0699, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.49872173058013763, |
| "grad_norm": 0.6656792759895325, |
| "learning_rate": 2.9457646945623107e-06, |
| "loss": 0.023, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.49911504424778763, |
| "grad_norm": 1.1062997579574585, |
| "learning_rate": 2.9423851222609607e-06, |
| "loss": 0.0801, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4995083579154376, |
| "grad_norm": 0.9155628085136414, |
| "learning_rate": 2.939004715187635e-06, |
| "loss": 0.0704, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4999016715830875, |
| "grad_norm": 0.8905113339424133, |
| "learning_rate": 2.935623479721095e-06, |
| "loss": 0.0442, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.5002949852507375, |
| "grad_norm": 0.8276392817497253, |
| "learning_rate": 2.932241422241665e-06, |
| "loss": 0.0535, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.5006882989183874, |
| "grad_norm": 0.5640360713005066, |
| "learning_rate": 2.9288585491312206e-06, |
| "loss": 0.0411, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.5010816125860373, |
| "grad_norm": 1.5979022979736328, |
| "learning_rate": 2.925474866773176e-06, |
| "loss": 0.0703, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.5014749262536873, |
| "grad_norm": 1.1477428674697876, |
| "learning_rate": 2.922090381552475e-06, |
| "loss": 0.0488, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5018682399213372, |
| "grad_norm": 1.544410228729248, |
| "learning_rate": 2.9187050998555715e-06, |
| "loss": 0.0689, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.5022615535889872, |
| "grad_norm": 1.16623055934906, |
| "learning_rate": 2.915319028070427e-06, |
| "loss": 0.0681, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.5026548672566372, |
| "grad_norm": 0.2639702558517456, |
| "learning_rate": 2.9119321725864914e-06, |
| "loss": 0.0321, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.5030481809242872, |
| "grad_norm": 0.9400918483734131, |
| "learning_rate": 2.908544539794693e-06, |
| "loss": 0.0726, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.5034414945919371, |
| "grad_norm": 2.083108425140381, |
| "learning_rate": 2.9051561360874297e-06, |
| "loss": 0.0567, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.503834808259587, |
| "grad_norm": 0.9149637818336487, |
| "learning_rate": 2.901766967858551e-06, |
| "loss": 0.0626, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.504228121927237, |
| "grad_norm": 0.6115841269493103, |
| "learning_rate": 2.8983770415033507e-06, |
| "loss": 0.0386, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.5046214355948869, |
| "grad_norm": 1.530674695968628, |
| "learning_rate": 2.8949863634185533e-06, |
| "loss": 0.0743, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.5050147492625369, |
| "grad_norm": 0.9860877990722656, |
| "learning_rate": 2.8915949400022995e-06, |
| "loss": 0.0397, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.5054080629301868, |
| "grad_norm": 1.6740636825561523, |
| "learning_rate": 2.8882027776541406e-06, |
| "loss": 0.0997, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5058013765978367, |
| "grad_norm": 1.1494807004928589, |
| "learning_rate": 2.8848098827750186e-06, |
| "loss": 0.0639, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5061946902654867, |
| "grad_norm": 1.5039880275726318, |
| "learning_rate": 2.8814162617672586e-06, |
| "loss": 0.0615, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5065880039331366, |
| "grad_norm": 1.2192140817642212, |
| "learning_rate": 2.8780219210345573e-06, |
| "loss": 0.0543, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.5069813176007866, |
| "grad_norm": 1.1865425109863281, |
| "learning_rate": 2.8746268669819676e-06, |
| "loss": 0.069, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.5073746312684366, |
| "grad_norm": 1.6422653198242188, |
| "learning_rate": 2.8712311060158904e-06, |
| "loss": 0.0407, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5077679449360866, |
| "grad_norm": 1.0872414112091064, |
| "learning_rate": 2.8678346445440588e-06, |
| "loss": 0.0485, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5081612586037365, |
| "grad_norm": 1.3887152671813965, |
| "learning_rate": 2.8644374889755284e-06, |
| "loss": 0.0594, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5085545722713865, |
| "grad_norm": 0.9311152100563049, |
| "learning_rate": 2.861039645720664e-06, |
| "loss": 0.0558, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5089478859390364, |
| "grad_norm": 0.5611655116081238, |
| "learning_rate": 2.85764112119113e-06, |
| "loss": 0.0326, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.5093411996066863, |
| "grad_norm": 0.6655589938163757, |
| "learning_rate": 2.854241921799874e-06, |
| "loss": 0.0608, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5097345132743363, |
| "grad_norm": 0.9743668437004089, |
| "learning_rate": 2.850842053961119e-06, |
| "loss": 0.0674, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5101278269419862, |
| "grad_norm": 0.3803253471851349, |
| "learning_rate": 2.847441524090347e-06, |
| "loss": 0.0318, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5105211406096362, |
| "grad_norm": 0.9651347398757935, |
| "learning_rate": 2.844040338604291e-06, |
| "loss": 0.0467, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5109144542772861, |
| "grad_norm": 1.3503124713897705, |
| "learning_rate": 2.8406385039209217e-06, |
| "loss": 0.0353, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.511307767944936, |
| "grad_norm": 1.3085218667984009, |
| "learning_rate": 2.837236026459432e-06, |
| "loss": 0.0677, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.511701081612586, |
| "grad_norm": 0.759332537651062, |
| "learning_rate": 2.833832912640232e-06, |
| "loss": 0.0399, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.512094395280236, |
| "grad_norm": 1.254012107849121, |
| "learning_rate": 2.8304291688849283e-06, |
| "loss": 0.0469, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.512487708947886, |
| "grad_norm": 1.6213202476501465, |
| "learning_rate": 2.827024801616319e-06, |
| "loss": 0.077, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5128810226155359, |
| "grad_norm": 0.751507580280304, |
| "learning_rate": 2.8236198172583765e-06, |
| "loss": 0.0499, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5132743362831859, |
| "grad_norm": 0.6438438296318054, |
| "learning_rate": 2.820214222236241e-06, |
| "loss": 0.0638, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5136676499508358, |
| "grad_norm": 0.8826209902763367, |
| "learning_rate": 2.816808022976201e-06, |
| "loss": 0.0422, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.5140609636184857, |
| "grad_norm": 0.4389915466308594, |
| "learning_rate": 2.813401225905688e-06, |
| "loss": 0.0192, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.5144542772861357, |
| "grad_norm": 0.7698509693145752, |
| "learning_rate": 2.8099938374532615e-06, |
| "loss": 0.043, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5148475909537856, |
| "grad_norm": 1.0304797887802124, |
| "learning_rate": 2.806585864048594e-06, |
| "loss": 0.0648, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5152409046214356, |
| "grad_norm": 0.9679722189903259, |
| "learning_rate": 2.8031773121224665e-06, |
| "loss": 0.0528, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5156342182890855, |
| "grad_norm": 0.8979973793029785, |
| "learning_rate": 2.799768188106747e-06, |
| "loss": 0.0493, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5160275319567355, |
| "grad_norm": 1.266461730003357, |
| "learning_rate": 2.7963584984343856e-06, |
| "loss": 0.0489, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.5164208456243854, |
| "grad_norm": 1.1776021718978882, |
| "learning_rate": 2.7929482495393995e-06, |
| "loss": 0.0453, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5168141592920354, |
| "grad_norm": 0.89280104637146, |
| "learning_rate": 2.7895374478568608e-06, |
| "loss": 0.0506, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5172074729596854, |
| "grad_norm": 1.046673059463501, |
| "learning_rate": 2.786126099822885e-06, |
| "loss": 0.0812, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5176007866273353, |
| "grad_norm": 1.451196312904358, |
| "learning_rate": 2.7827142118746187e-06, |
| "loss": 0.0388, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5179941002949853, |
| "grad_norm": 0.9998504519462585, |
| "learning_rate": 2.779301790450226e-06, |
| "loss": 0.0505, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5183874139626352, |
| "grad_norm": 1.0535742044448853, |
| "learning_rate": 2.7758888419888797e-06, |
| "loss": 0.0377, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.5187807276302852, |
| "grad_norm": 0.9973492622375488, |
| "learning_rate": 2.7724753729307454e-06, |
| "loss": 0.0512, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.5191740412979351, |
| "grad_norm": 1.3732929229736328, |
| "learning_rate": 2.769061389716971e-06, |
| "loss": 0.0992, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.519567354965585, |
| "grad_norm": 1.1079411506652832, |
| "learning_rate": 2.765646898789677e-06, |
| "loss": 0.0438, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.519960668633235, |
| "grad_norm": 1.0692771673202515, |
| "learning_rate": 2.762231906591939e-06, |
| "loss": 0.0482, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.5203539823008849, |
| "grad_norm": 0.773914098739624, |
| "learning_rate": 2.75881641956778e-06, |
| "loss": 0.0307, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.5207472959685349, |
| "grad_norm": 0.8193982243537903, |
| "learning_rate": 2.7554004441621562e-06, |
| "loss": 0.0357, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.5211406096361848, |
| "grad_norm": 1.0655934810638428, |
| "learning_rate": 2.7519839868209462e-06, |
| "loss": 0.0564, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5215339233038349, |
| "grad_norm": 0.668292760848999, |
| "learning_rate": 2.748567053990937e-06, |
| "loss": 0.0394, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.5219272369714848, |
| "grad_norm": 1.5048760175704956, |
| "learning_rate": 2.7451496521198144e-06, |
| "loss": 0.0756, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.5223205506391347, |
| "grad_norm": 1.869588017463684, |
| "learning_rate": 2.741731787656146e-06, |
| "loss": 0.08, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.5227138643067847, |
| "grad_norm": 1.6091140508651733, |
| "learning_rate": 2.7383134670493765e-06, |
| "loss": 0.0618, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.5231071779744346, |
| "grad_norm": 0.5614988207817078, |
| "learning_rate": 2.734894696749808e-06, |
| "loss": 0.022, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5235004916420846, |
| "grad_norm": 1.5846737623214722, |
| "learning_rate": 2.7314754832085926e-06, |
| "loss": 0.0617, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.5238938053097345, |
| "grad_norm": 1.0142868757247925, |
| "learning_rate": 2.728055832877719e-06, |
| "loss": 0.1201, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.5242871189773844, |
| "grad_norm": 0.9764862060546875, |
| "learning_rate": 2.7246357522099996e-06, |
| "loss": 0.0576, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.5246804326450344, |
| "grad_norm": 0.7208642363548279, |
| "learning_rate": 2.721215247659059e-06, |
| "loss": 0.0165, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.5250737463126843, |
| "grad_norm": 1.2766616344451904, |
| "learning_rate": 2.7177943256793214e-06, |
| "loss": 0.0589, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5254670599803343, |
| "grad_norm": 1.7238527536392212, |
| "learning_rate": 2.7143729927259992e-06, |
| "loss": 0.0415, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.5258603736479842, |
| "grad_norm": 0.9424237608909607, |
| "learning_rate": 2.7109512552550804e-06, |
| "loss": 0.088, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.5262536873156343, |
| "grad_norm": 0.8586751818656921, |
| "learning_rate": 2.707529119723315e-06, |
| "loss": 0.0621, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.5266470009832842, |
| "grad_norm": 0.6910445690155029, |
| "learning_rate": 2.7041065925882054e-06, |
| "loss": 0.0473, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.5270403146509341, |
| "grad_norm": 0.6774911880493164, |
| "learning_rate": 2.7006836803079934e-06, |
| "loss": 0.0401, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5274336283185841, |
| "grad_norm": 1.1810059547424316, |
| "learning_rate": 2.697260389341645e-06, |
| "loss": 0.0464, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.527826941986234, |
| "grad_norm": 0.6813443303108215, |
| "learning_rate": 2.693836726148844e-06, |
| "loss": 0.0502, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.528220255653884, |
| "grad_norm": 1.6458402872085571, |
| "learning_rate": 2.6904126971899754e-06, |
| "loss": 0.0644, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.5286135693215339, |
| "grad_norm": 1.4540367126464844, |
| "learning_rate": 2.686988308926112e-06, |
| "loss": 0.0564, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.5290068829891839, |
| "grad_norm": 0.6865090131759644, |
| "learning_rate": 2.68356356781901e-06, |
| "loss": 0.0448, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.5294001966568338, |
| "grad_norm": 1.91966712474823, |
| "learning_rate": 2.6801384803310855e-06, |
| "loss": 0.0431, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.5297935103244837, |
| "grad_norm": 0.6628435254096985, |
| "learning_rate": 2.676713052925411e-06, |
| "loss": 0.0513, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.5301868239921337, |
| "grad_norm": 1.0600309371948242, |
| "learning_rate": 2.6732872920657018e-06, |
| "loss": 0.0321, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.5305801376597836, |
| "grad_norm": 0.5295042991638184, |
| "learning_rate": 2.6698612042162995e-06, |
| "loss": 0.0299, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 1.229316234588623, |
| "learning_rate": 2.6664347958421647e-06, |
| "loss": 0.0475, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5313667649950836, |
| "grad_norm": 0.8785441517829895, |
| "learning_rate": 2.6630080734088625e-06, |
| "loss": 0.0424, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.5317600786627336, |
| "grad_norm": 1.3285952806472778, |
| "learning_rate": 2.6595810433825496e-06, |
| "loss": 0.0359, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.5321533923303835, |
| "grad_norm": 0.8368435502052307, |
| "learning_rate": 2.6561537122299647e-06, |
| "loss": 0.0503, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.5325467059980334, |
| "grad_norm": 0.790544331073761, |
| "learning_rate": 2.6527260864184135e-06, |
| "loss": 0.0321, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.5329400196656834, |
| "grad_norm": 1.5722286701202393, |
| "learning_rate": 2.6492981724157576e-06, |
| "loss": 0.0765, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.0913268327713013, |
| "learning_rate": 2.6458699766904033e-06, |
| "loss": 0.0526, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.5337266470009833, |
| "grad_norm": 1.2754257917404175, |
| "learning_rate": 2.6424415057112883e-06, |
| "loss": 0.0585, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.5341199606686332, |
| "grad_norm": 2.0785610675811768, |
| "learning_rate": 2.6390127659478698e-06, |
| "loss": 0.0995, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.5345132743362832, |
| "grad_norm": 1.3484556674957275, |
| "learning_rate": 2.6355837638701115e-06, |
| "loss": 0.0462, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.5349065880039331, |
| "grad_norm": 0.7563539147377014, |
| "learning_rate": 2.632154505948472e-06, |
| "loss": 0.0614, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.535299901671583, |
| "grad_norm": 0.7201266288757324, |
| "learning_rate": 2.6287249986538944e-06, |
| "loss": 0.0449, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.5356932153392331, |
| "grad_norm": 1.439516544342041, |
| "learning_rate": 2.62529524845779e-06, |
| "loss": 0.0694, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.536086529006883, |
| "grad_norm": 0.6716679334640503, |
| "learning_rate": 2.6218652618320306e-06, |
| "loss": 0.0302, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.536479842674533, |
| "grad_norm": 1.9574276208877563, |
| "learning_rate": 2.6184350452489317e-06, |
| "loss": 0.0708, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.5368731563421829, |
| "grad_norm": 1.3900701999664307, |
| "learning_rate": 2.615004605181246e-06, |
| "loss": 0.0833, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5372664700098329, |
| "grad_norm": 0.9019057154655457, |
| "learning_rate": 2.611573948102144e-06, |
| "loss": 0.0625, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.5376597836774828, |
| "grad_norm": 2.0217947959899902, |
| "learning_rate": 2.6081430804852093e-06, |
| "loss": 0.0837, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.5380530973451327, |
| "grad_norm": 1.5341334342956543, |
| "learning_rate": 2.604712008804421e-06, |
| "loss": 0.0734, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.5384464110127827, |
| "grad_norm": 1.3491941690444946, |
| "learning_rate": 2.601280739534143e-06, |
| "loss": 0.0631, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.5388397246804326, |
| "grad_norm": 1.264906406402588, |
| "learning_rate": 2.5978492791491126e-06, |
| "loss": 0.0361, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5392330383480826, |
| "grad_norm": 1.567254900932312, |
| "learning_rate": 2.594417634124428e-06, |
| "loss": 0.0802, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.5396263520157325, |
| "grad_norm": 0.912144124507904, |
| "learning_rate": 2.590985810935535e-06, |
| "loss": 0.0321, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.5400196656833824, |
| "grad_norm": 0.7098456025123596, |
| "learning_rate": 2.5875538160582176e-06, |
| "loss": 0.0625, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.5404129793510325, |
| "grad_norm": 1.4193458557128906, |
| "learning_rate": 2.58412165596858e-06, |
| "loss": 0.0518, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.5408062930186824, |
| "grad_norm": 1.3003660440444946, |
| "learning_rate": 2.5806893371430413e-06, |
| "loss": 0.0625, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5411996066863324, |
| "grad_norm": 1.4275062084197998, |
| "learning_rate": 2.57725686605832e-06, |
| "loss": 0.0628, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.5415929203539823, |
| "grad_norm": 1.3604398965835571, |
| "learning_rate": 2.5738242491914206e-06, |
| "loss": 0.0733, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.5419862340216323, |
| "grad_norm": 2.859689235687256, |
| "learning_rate": 2.5703914930196227e-06, |
| "loss": 0.0547, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5423795476892822, |
| "grad_norm": 0.770262598991394, |
| "learning_rate": 2.5669586040204697e-06, |
| "loss": 0.0644, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.5427728613569321, |
| "grad_norm": 0.7974931001663208, |
| "learning_rate": 2.5635255886717553e-06, |
| "loss": 0.0687, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5431661750245821, |
| "grad_norm": 0.9779230356216431, |
| "learning_rate": 2.560092453451512e-06, |
| "loss": 0.0586, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.543559488692232, |
| "grad_norm": 2.3653101921081543, |
| "learning_rate": 2.5566592048379975e-06, |
| "loss": 0.0697, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.543952802359882, |
| "grad_norm": 1.6566016674041748, |
| "learning_rate": 2.553225849309684e-06, |
| "loss": 0.104, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5443461160275319, |
| "grad_norm": 1.516684889793396, |
| "learning_rate": 2.5497923933452464e-06, |
| "loss": 0.0423, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.5447394296951819, |
| "grad_norm": 1.3681788444519043, |
| "learning_rate": 2.5463588434235463e-06, |
| "loss": 0.052, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5451327433628319, |
| "grad_norm": 0.49628522992134094, |
| "learning_rate": 2.542925206023626e-06, |
| "loss": 0.0255, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5455260570304818, |
| "grad_norm": 0.9334824681282043, |
| "learning_rate": 2.5394914876246916e-06, |
| "loss": 0.0517, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.5459193706981318, |
| "grad_norm": 1.3869428634643555, |
| "learning_rate": 2.5360576947061004e-06, |
| "loss": 0.051, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5463126843657817, |
| "grad_norm": 0.7261596918106079, |
| "learning_rate": 2.5326238337473537e-06, |
| "loss": 0.0349, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5467059980334317, |
| "grad_norm": 1.0270626544952393, |
| "learning_rate": 2.5291899112280765e-06, |
| "loss": 0.0574, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5470993117010816, |
| "grad_norm": 0.9097653031349182, |
| "learning_rate": 2.5257559336280145e-06, |
| "loss": 0.0434, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5474926253687316, |
| "grad_norm": 1.5684995651245117, |
| "learning_rate": 2.522321907427016e-06, |
| "loss": 0.0394, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.5478859390363815, |
| "grad_norm": 0.5134732723236084, |
| "learning_rate": 2.5188878391050187e-06, |
| "loss": 0.0642, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5482792527040314, |
| "grad_norm": 1.6495331525802612, |
| "learning_rate": 2.515453735142043e-06, |
| "loss": 0.0335, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5486725663716814, |
| "grad_norm": 0.949030876159668, |
| "learning_rate": 2.5120196020181752e-06, |
| "loss": 0.069, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5490658800393313, |
| "grad_norm": 0.5853769183158875, |
| "learning_rate": 2.5085854462135556e-06, |
| "loss": 0.035, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5494591937069813, |
| "grad_norm": 1.0677484273910522, |
| "learning_rate": 2.505151274208369e-06, |
| "loss": 0.0511, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5498525073746313, |
| "grad_norm": 1.5644643306732178, |
| "learning_rate": 2.50171709248283e-06, |
| "loss": 0.0814, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5502458210422813, |
| "grad_norm": 0.736179769039154, |
| "learning_rate": 2.4982829075171714e-06, |
| "loss": 0.0452, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5506391347099312, |
| "grad_norm": 0.8911694288253784, |
| "learning_rate": 2.494848725791632e-06, |
| "loss": 0.0564, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5510324483775811, |
| "grad_norm": 1.9409581422805786, |
| "learning_rate": 2.4914145537864453e-06, |
| "loss": 0.0724, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5514257620452311, |
| "grad_norm": 1.1989744901657104, |
| "learning_rate": 2.4879803979818256e-06, |
| "loss": 0.0496, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.551819075712881, |
| "grad_norm": 1.8545705080032349, |
| "learning_rate": 2.4845462648579573e-06, |
| "loss": 0.0527, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.552212389380531, |
| "grad_norm": 1.8136131763458252, |
| "learning_rate": 2.481112160894982e-06, |
| "loss": 0.0601, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5526057030481809, |
| "grad_norm": 1.070971131324768, |
| "learning_rate": 2.4776780925729853e-06, |
| "loss": 0.0612, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5529990167158308, |
| "grad_norm": 1.127616047859192, |
| "learning_rate": 2.474244066371986e-06, |
| "loss": 0.0503, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5533923303834808, |
| "grad_norm": 1.5506644248962402, |
| "learning_rate": 2.4708100887719243e-06, |
| "loss": 0.0638, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5537856440511307, |
| "grad_norm": 1.5224863290786743, |
| "learning_rate": 2.4673761662526475e-06, |
| "loss": 0.0521, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5541789577187807, |
| "grad_norm": 1.2066714763641357, |
| "learning_rate": 2.4639423052938995e-06, |
| "loss": 0.0533, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5545722713864307, |
| "grad_norm": 1.389074683189392, |
| "learning_rate": 2.4605085123753097e-06, |
| "loss": 0.0809, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5549655850540807, |
| "grad_norm": 0.6731852293014526, |
| "learning_rate": 2.4570747939763745e-06, |
| "loss": 0.0249, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5553588987217306, |
| "grad_norm": 1.2953534126281738, |
| "learning_rate": 2.453641156576454e-06, |
| "loss": 0.0473, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5557522123893806, |
| "grad_norm": 0.9251944422721863, |
| "learning_rate": 2.4502076066547545e-06, |
| "loss": 0.0765, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5561455260570305, |
| "grad_norm": 1.831679344177246, |
| "learning_rate": 2.4467741506903162e-06, |
| "loss": 0.0798, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.5565388397246804, |
| "grad_norm": 1.2218101024627686, |
| "learning_rate": 2.443340795162003e-06, |
| "loss": 0.0393, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5569321533923304, |
| "grad_norm": 1.164400577545166, |
| "learning_rate": 2.4399075465484883e-06, |
| "loss": 0.0681, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5573254670599803, |
| "grad_norm": 1.0514402389526367, |
| "learning_rate": 2.4364744113282455e-06, |
| "loss": 0.0593, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5577187807276303, |
| "grad_norm": 1.9647271633148193, |
| "learning_rate": 2.433041395979531e-06, |
| "loss": 0.0785, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.5581120943952802, |
| "grad_norm": 0.7550022006034851, |
| "learning_rate": 2.429608506980378e-06, |
| "loss": 0.0443, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5585054080629301, |
| "grad_norm": 1.2886439561843872, |
| "learning_rate": 2.4261757508085803e-06, |
| "loss": 0.0625, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5588987217305801, |
| "grad_norm": 0.6531363129615784, |
| "learning_rate": 2.422743133941681e-06, |
| "loss": 0.0437, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.5592920353982301, |
| "grad_norm": 1.3166404962539673, |
| "learning_rate": 2.419310662856959e-06, |
| "loss": 0.0363, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5596853490658801, |
| "grad_norm": 0.9738766551017761, |
| "learning_rate": 2.415878344031421e-06, |
| "loss": 0.0499, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.56007866273353, |
| "grad_norm": 1.1199309825897217, |
| "learning_rate": 2.4124461839417832e-06, |
| "loss": 0.0638, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.56047197640118, |
| "grad_norm": 0.7884669303894043, |
| "learning_rate": 2.4090141890644654e-06, |
| "loss": 0.0219, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5608652900688299, |
| "grad_norm": 1.508720874786377, |
| "learning_rate": 2.405582365875573e-06, |
| "loss": 0.0722, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5612586037364798, |
| "grad_norm": 0.9353559017181396, |
| "learning_rate": 2.4021507208508882e-06, |
| "loss": 0.0654, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5616519174041298, |
| "grad_norm": 1.9918673038482666, |
| "learning_rate": 2.398719260465858e-06, |
| "loss": 0.0741, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5620452310717797, |
| "grad_norm": 0.9243260622024536, |
| "learning_rate": 2.3952879911955794e-06, |
| "loss": 0.0369, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5624385447394297, |
| "grad_norm": 1.3456679582595825, |
| "learning_rate": 2.391856919514791e-06, |
| "loss": 0.0811, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5628318584070796, |
| "grad_norm": 1.5919969081878662, |
| "learning_rate": 2.3884260518978562e-06, |
| "loss": 0.0402, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.5632251720747296, |
| "grad_norm": 0.5894349813461304, |
| "learning_rate": 2.3849953948187552e-06, |
| "loss": 0.0396, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5636184857423795, |
| "grad_norm": 1.708106517791748, |
| "learning_rate": 2.3815649547510687e-06, |
| "loss": 0.0575, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5640117994100295, |
| "grad_norm": 1.6241428852081299, |
| "learning_rate": 2.37813473816797e-06, |
| "loss": 0.047, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5644051130776795, |
| "grad_norm": 1.1760050058364868, |
| "learning_rate": 2.3747047515422102e-06, |
| "loss": 0.049, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5647984267453294, |
| "grad_norm": 0.6579201221466064, |
| "learning_rate": 2.371275001346106e-06, |
| "loss": 0.0569, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5651917404129794, |
| "grad_norm": 0.5577812194824219, |
| "learning_rate": 2.367845494051529e-06, |
| "loss": 0.0338, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5655850540806293, |
| "grad_norm": 0.9575706124305725, |
| "learning_rate": 2.3644162361298897e-06, |
| "loss": 0.0622, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5659783677482793, |
| "grad_norm": 0.6951814889907837, |
| "learning_rate": 2.360987234052131e-06, |
| "loss": 0.0329, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 1.079609990119934, |
| "learning_rate": 2.357558494288712e-06, |
| "loss": 0.0672, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5667649950835791, |
| "grad_norm": 1.0509586334228516, |
| "learning_rate": 2.354130023309597e-06, |
| "loss": 0.0755, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5671583087512291, |
| "grad_norm": 0.9782833456993103, |
| "learning_rate": 2.350701827584243e-06, |
| "loss": 0.0319, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.567551622418879, |
| "grad_norm": 1.019370675086975, |
| "learning_rate": 2.3472739135815877e-06, |
| "loss": 0.0696, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.567944936086529, |
| "grad_norm": 1.419137716293335, |
| "learning_rate": 2.343846287770036e-06, |
| "loss": 0.0797, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5683382497541789, |
| "grad_norm": 1.8223907947540283, |
| "learning_rate": 2.340418956617451e-06, |
| "loss": 0.0462, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.568731563421829, |
| "grad_norm": 1.1286693811416626, |
| "learning_rate": 2.336991926591138e-06, |
| "loss": 0.0735, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5691248770894789, |
| "grad_norm": 1.7998546361923218, |
| "learning_rate": 2.3335652041578352e-06, |
| "loss": 0.0964, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5695181907571288, |
| "grad_norm": 1.0016109943389893, |
| "learning_rate": 2.3301387957837017e-06, |
| "loss": 0.0631, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5699115044247788, |
| "grad_norm": 1.876328706741333, |
| "learning_rate": 2.326712707934299e-06, |
| "loss": 0.0683, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5703048180924287, |
| "grad_norm": 1.8099371194839478, |
| "learning_rate": 2.3232869470745893e-06, |
| "loss": 0.058, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5706981317600787, |
| "grad_norm": 0.8637019395828247, |
| "learning_rate": 2.3198615196689153e-06, |
| "loss": 0.0655, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5710914454277286, |
| "grad_norm": 2.1426312923431396, |
| "learning_rate": 2.3164364321809906e-06, |
| "loss": 0.0572, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5714847590953785, |
| "grad_norm": 1.6157870292663574, |
| "learning_rate": 2.3130116910738874e-06, |
| "loss": 0.0321, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5718780727630285, |
| "grad_norm": 0.8953425288200378, |
| "learning_rate": 2.309587302810026e-06, |
| "loss": 0.0292, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5722713864306784, |
| "grad_norm": 0.8132373094558716, |
| "learning_rate": 2.306163273851157e-06, |
| "loss": 0.0517, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5726647000983284, |
| "grad_norm": 0.8843181729316711, |
| "learning_rate": 2.302739610658356e-06, |
| "loss": 0.0389, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5730580137659783, |
| "grad_norm": 1.1060006618499756, |
| "learning_rate": 2.2993163196920075e-06, |
| "loss": 0.08, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5734513274336284, |
| "grad_norm": 1.1257623434066772, |
| "learning_rate": 2.295893407411795e-06, |
| "loss": 0.053, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5738446411012783, |
| "grad_norm": 1.0160799026489258, |
| "learning_rate": 2.2924708802766857e-06, |
| "loss": 0.0439, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5742379547689282, |
| "grad_norm": 1.231930136680603, |
| "learning_rate": 2.2890487447449204e-06, |
| "loss": 0.0569, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5746312684365782, |
| "grad_norm": 0.8130099177360535, |
| "learning_rate": 2.285627007274001e-06, |
| "loss": 0.0361, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5750245821042281, |
| "grad_norm": 0.6949229836463928, |
| "learning_rate": 2.282205674320679e-06, |
| "loss": 0.0598, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5754178957718781, |
| "grad_norm": 1.0386853218078613, |
| "learning_rate": 2.2787847523409416e-06, |
| "loss": 0.0601, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.575811209439528, |
| "grad_norm": 0.48775455355644226, |
| "learning_rate": 2.2753642477900012e-06, |
| "loss": 0.0483, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.576204523107178, |
| "grad_norm": 1.220493197441101, |
| "learning_rate": 2.2719441671222815e-06, |
| "loss": 0.0398, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5765978367748279, |
| "grad_norm": 0.747078537940979, |
| "learning_rate": 2.268524516791408e-06, |
| "loss": 0.0313, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5769911504424778, |
| "grad_norm": 0.7773571014404297, |
| "learning_rate": 2.2651053032501928e-06, |
| "loss": 0.0395, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5773844641101278, |
| "grad_norm": 0.4083022177219391, |
| "learning_rate": 2.261686532950624e-06, |
| "loss": 0.0255, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 1.0136034488677979, |
| "learning_rate": 2.2582682123438547e-06, |
| "loss": 0.0499, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5781710914454278, |
| "grad_norm": 1.2290290594100952, |
| "learning_rate": 2.254850347880187e-06, |
| "loss": 0.0649, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5785644051130777, |
| "grad_norm": 1.4913883209228516, |
| "learning_rate": 2.2514329460090633e-06, |
| "loss": 0.0595, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5789577187807277, |
| "grad_norm": 1.210160732269287, |
| "learning_rate": 2.248016013179054e-06, |
| "loss": 0.0433, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5793510324483776, |
| "grad_norm": 0.757161557674408, |
| "learning_rate": 2.244599555837844e-06, |
| "loss": 0.035, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5797443461160275, |
| "grad_norm": 1.0250403881072998, |
| "learning_rate": 2.2411835804322206e-06, |
| "loss": 0.0375, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5801376597836775, |
| "grad_norm": 1.1955897808074951, |
| "learning_rate": 2.2377680934080625e-06, |
| "loss": 0.0449, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5805309734513274, |
| "grad_norm": 1.7066453695297241, |
| "learning_rate": 2.2343531012103244e-06, |
| "loss": 0.0722, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5809242871189774, |
| "grad_norm": 0.6709203720092773, |
| "learning_rate": 2.2309386102830295e-06, |
| "loss": 0.0354, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5813176007866273, |
| "grad_norm": 0.9403322339057922, |
| "learning_rate": 2.227524627069256e-06, |
| "loss": 0.039, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5817109144542773, |
| "grad_norm": 1.1907342672348022, |
| "learning_rate": 2.2241111580111207e-06, |
| "loss": 0.0894, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5821042281219272, |
| "grad_norm": 0.9678034782409668, |
| "learning_rate": 2.220698209549774e-06, |
| "loss": 0.0492, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5824975417895771, |
| "grad_norm": 0.5867919325828552, |
| "learning_rate": 2.2172857881253825e-06, |
| "loss": 0.0329, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5828908554572272, |
| "grad_norm": 0.9085230827331543, |
| "learning_rate": 2.2138739001771157e-06, |
| "loss": 0.0501, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5832841691248771, |
| "grad_norm": 1.015177845954895, |
| "learning_rate": 2.2104625521431396e-06, |
| "loss": 0.0297, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5836774827925271, |
| "grad_norm": 0.48682698607444763, |
| "learning_rate": 2.207051750460601e-06, |
| "loss": 0.0329, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.584070796460177, |
| "grad_norm": 1.861662745475769, |
| "learning_rate": 2.2036415015656148e-06, |
| "loss": 0.0619, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.584464110127827, |
| "grad_norm": 0.9373002648353577, |
| "learning_rate": 2.2002318118932543e-06, |
| "loss": 0.0563, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5848574237954769, |
| "grad_norm": 0.4820902943611145, |
| "learning_rate": 2.1968226878775347e-06, |
| "loss": 0.0206, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5852507374631268, |
| "grad_norm": 0.6255022287368774, |
| "learning_rate": 2.1934141359514062e-06, |
| "loss": 0.0319, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5856440511307768, |
| "grad_norm": 0.8468760848045349, |
| "learning_rate": 2.1900061625467393e-06, |
| "loss": 0.0574, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5860373647984267, |
| "grad_norm": 0.519826352596283, |
| "learning_rate": 2.1865987740943116e-06, |
| "loss": 0.0595, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5864306784660767, |
| "grad_norm": 1.6838140487670898, |
| "learning_rate": 2.183191977023799e-06, |
| "loss": 0.0549, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5868239921337266, |
| "grad_norm": 1.3588017225265503, |
| "learning_rate": 2.17978577776376e-06, |
| "loss": 0.058, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5872173058013765, |
| "grad_norm": 0.9913402199745178, |
| "learning_rate": 2.176380182741624e-06, |
| "loss": 0.021, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5876106194690266, |
| "grad_norm": 1.7032448053359985, |
| "learning_rate": 2.172975198383682e-06, |
| "loss": 0.0565, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5880039331366765, |
| "grad_norm": 0.9853689670562744, |
| "learning_rate": 2.169570831115072e-06, |
| "loss": 0.0532, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5883972468043265, |
| "grad_norm": 1.061571717262268, |
| "learning_rate": 2.1661670873597686e-06, |
| "loss": 0.042, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5887905604719764, |
| "grad_norm": 1.0780665874481201, |
| "learning_rate": 2.1627639735405683e-06, |
| "loss": 0.0412, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5891838741396264, |
| "grad_norm": 1.1072509288787842, |
| "learning_rate": 2.1593614960790795e-06, |
| "loss": 0.0369, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5895771878072763, |
| "grad_norm": 0.9231078028678894, |
| "learning_rate": 2.15595966139571e-06, |
| "loss": 0.0388, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5899705014749262, |
| "grad_norm": 0.8702555894851685, |
| "learning_rate": 2.152558475909654e-06, |
| "loss": 0.0719, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5903638151425762, |
| "grad_norm": 0.910358726978302, |
| "learning_rate": 2.149157946038882e-06, |
| "loss": 0.0468, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5907571288102261, |
| "grad_norm": 1.3807059526443481, |
| "learning_rate": 2.145758078200126e-06, |
| "loss": 0.0729, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5911504424778761, |
| "grad_norm": 0.9765854477882385, |
| "learning_rate": 2.1423588788088704e-06, |
| "loss": 0.0407, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.591543756145526, |
| "grad_norm": 1.021924376487732, |
| "learning_rate": 2.1389603542793364e-06, |
| "loss": 0.0342, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.591937069813176, |
| "grad_norm": 1.098352313041687, |
| "learning_rate": 2.1355625110244725e-06, |
| "loss": 0.0668, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.592330383480826, |
| "grad_norm": 1.5986775159835815, |
| "learning_rate": 2.1321653554559425e-06, |
| "loss": 0.0673, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.592723697148476, |
| "grad_norm": 1.2270184755325317, |
| "learning_rate": 2.1287688939841104e-06, |
| "loss": 0.0405, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5931170108161259, |
| "grad_norm": 0.6227984428405762, |
| "learning_rate": 2.125373133018033e-06, |
| "loss": 0.0362, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5935103244837758, |
| "grad_norm": 1.1838734149932861, |
| "learning_rate": 2.1219780789654436e-06, |
| "loss": 0.0705, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5939036381514258, |
| "grad_norm": 1.5811330080032349, |
| "learning_rate": 2.1185837382327422e-06, |
| "loss": 0.0811, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5942969518190757, |
| "grad_norm": 1.6723252534866333, |
| "learning_rate": 2.1151901172249823e-06, |
| "loss": 0.0711, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5946902654867257, |
| "grad_norm": 1.1075739860534668, |
| "learning_rate": 2.1117972223458598e-06, |
| "loss": 0.0365, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5950835791543756, |
| "grad_norm": 1.0250906944274902, |
| "learning_rate": 2.108405059997701e-06, |
| "loss": 0.0534, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5954768928220255, |
| "grad_norm": 1.4097585678100586, |
| "learning_rate": 2.1050136365814484e-06, |
| "loss": 0.0633, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5958702064896755, |
| "grad_norm": 1.0003234148025513, |
| "learning_rate": 2.10162295849665e-06, |
| "loss": 0.0331, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5962635201573254, |
| "grad_norm": 1.203927755355835, |
| "learning_rate": 2.0982330321414495e-06, |
| "loss": 0.0397, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5966568338249754, |
| "grad_norm": 1.1078671216964722, |
| "learning_rate": 2.094843863912571e-06, |
| "loss": 0.061, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5970501474926254, |
| "grad_norm": 0.9437456130981445, |
| "learning_rate": 2.0914554602053072e-06, |
| "loss": 0.0549, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5974434611602754, |
| "grad_norm": 0.34665971994400024, |
| "learning_rate": 2.0880678274135103e-06, |
| "loss": 0.0374, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5978367748279253, |
| "grad_norm": 1.6303670406341553, |
| "learning_rate": 2.084680971929574e-06, |
| "loss": 0.0729, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5982300884955752, |
| "grad_norm": 1.1011961698532104, |
| "learning_rate": 2.0812949001444293e-06, |
| "loss": 0.0399, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5986234021632252, |
| "grad_norm": 0.8066303730010986, |
| "learning_rate": 2.077909618447526e-06, |
| "loss": 0.05, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5990167158308751, |
| "grad_norm": 1.4448401927947998, |
| "learning_rate": 2.0745251332268238e-06, |
| "loss": 0.0616, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5994100294985251, |
| "grad_norm": 0.49370574951171875, |
| "learning_rate": 2.07114145086878e-06, |
| "loss": 0.0496, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.599803343166175, |
| "grad_norm": 1.0275585651397705, |
| "learning_rate": 2.0677585777583366e-06, |
| "loss": 0.038, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.600196656833825, |
| "grad_norm": 1.1347780227661133, |
| "learning_rate": 2.0643765202789064e-06, |
| "loss": 0.0324, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.6005899705014749, |
| "grad_norm": 1.2602198123931885, |
| "learning_rate": 2.060995284812366e-06, |
| "loss": 0.0699, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.6009832841691248, |
| "grad_norm": 1.4369268417358398, |
| "learning_rate": 2.0576148777390397e-06, |
| "loss": 0.0664, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.6013765978367748, |
| "grad_norm": 1.8620692491531372, |
| "learning_rate": 2.0542353054376893e-06, |
| "loss": 0.0566, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.6017699115044248, |
| "grad_norm": 1.026005506515503, |
| "learning_rate": 2.0508565742855017e-06, |
| "loss": 0.023, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6021632251720748, |
| "grad_norm": 0.8947687149047852, |
| "learning_rate": 2.0474786906580733e-06, |
| "loss": 0.0573, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.6025565388397247, |
| "grad_norm": 1.1179437637329102, |
| "learning_rate": 2.044101660929405e-06, |
| "loss": 0.0551, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.6029498525073747, |
| "grad_norm": 0.6822925806045532, |
| "learning_rate": 2.040725491471885e-06, |
| "loss": 0.0393, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.6033431661750246, |
| "grad_norm": 1.8381119966506958, |
| "learning_rate": 2.037350188656279e-06, |
| "loss": 0.0502, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.6037364798426745, |
| "grad_norm": 1.5118048191070557, |
| "learning_rate": 2.0339757588517165e-06, |
| "loss": 0.0403, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6041297935103245, |
| "grad_norm": 1.0197237730026245, |
| "learning_rate": 2.0306022084256786e-06, |
| "loss": 0.0651, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.6045231071779744, |
| "grad_norm": 2.17777943611145, |
| "learning_rate": 2.027229543743989e-06, |
| "loss": 0.069, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.6049164208456244, |
| "grad_norm": 1.1577013731002808, |
| "learning_rate": 2.0238577711707987e-06, |
| "loss": 0.0615, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.6053097345132743, |
| "grad_norm": 1.1709601879119873, |
| "learning_rate": 2.0204868970685764e-06, |
| "loss": 0.0548, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.6057030481809242, |
| "grad_norm": 0.8054937124252319, |
| "learning_rate": 2.0171169277980954e-06, |
| "loss": 0.0479, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6060963618485742, |
| "grad_norm": 0.9096735715866089, |
| "learning_rate": 2.0137478697184205e-06, |
| "loss": 0.0655, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.6064896755162242, |
| "grad_norm": 0.9453304409980774, |
| "learning_rate": 2.0103797291868977e-06, |
| "loss": 0.0812, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.6068829891838742, |
| "grad_norm": 0.8558923602104187, |
| "learning_rate": 2.0070125125591414e-06, |
| "loss": 0.0468, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.6072763028515241, |
| "grad_norm": 1.2030149698257446, |
| "learning_rate": 2.0036462261890225e-06, |
| "loss": 0.0542, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.6076696165191741, |
| "grad_norm": 0.9261341691017151, |
| "learning_rate": 2.0002808764286573e-06, |
| "loss": 0.0706, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.608062930186824, |
| "grad_norm": 0.7496268153190613, |
| "learning_rate": 1.9969164696283945e-06, |
| "loss": 0.0298, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.6084562438544739, |
| "grad_norm": 1.2815377712249756, |
| "learning_rate": 1.9935530121368023e-06, |
| "loss": 0.0555, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.6088495575221239, |
| "grad_norm": 0.964885413646698, |
| "learning_rate": 1.990190510300659e-06, |
| "loss": 0.0211, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.6092428711897738, |
| "grad_norm": 0.8117434978485107, |
| "learning_rate": 1.986828970464939e-06, |
| "loss": 0.0417, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.6096361848574238, |
| "grad_norm": 0.4136671721935272, |
| "learning_rate": 1.983468398972802e-06, |
| "loss": 0.0177, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6100294985250737, |
| "grad_norm": 0.8469100594520569, |
| "learning_rate": 1.980108802165579e-06, |
| "loss": 0.0375, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.6104228121927237, |
| "grad_norm": 0.8030047416687012, |
| "learning_rate": 1.976750186382764e-06, |
| "loss": 0.0237, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.6108161258603736, |
| "grad_norm": 1.6747819185256958, |
| "learning_rate": 1.9733925579619965e-06, |
| "loss": 0.072, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.6112094395280236, |
| "grad_norm": 0.8288264870643616, |
| "learning_rate": 1.970035923239056e-06, |
| "loss": 0.0347, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.6116027531956736, |
| "grad_norm": 0.8544471859931946, |
| "learning_rate": 1.9666802885478463e-06, |
| "loss": 0.0445, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.6119960668633235, |
| "grad_norm": 0.8386610150337219, |
| "learning_rate": 1.963325660220384e-06, |
| "loss": 0.0609, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.6123893805309735, |
| "grad_norm": 1.3670865297317505, |
| "learning_rate": 1.9599720445867856e-06, |
| "loss": 0.0601, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.6127826941986234, |
| "grad_norm": 1.0806509256362915, |
| "learning_rate": 1.956619447975257e-06, |
| "loss": 0.058, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.6131760078662734, |
| "grad_norm": 0.9588520526885986, |
| "learning_rate": 1.9532678767120827e-06, |
| "loss": 0.0422, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.6135693215339233, |
| "grad_norm": 1.370969295501709, |
| "learning_rate": 1.9499173371216105e-06, |
| "loss": 0.0646, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6139626352015732, |
| "grad_norm": 1.074244499206543, |
| "learning_rate": 1.946567835526243e-06, |
| "loss": 0.0613, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.6143559488692232, |
| "grad_norm": 0.8812416195869446, |
| "learning_rate": 1.943219378246423e-06, |
| "loss": 0.0626, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.6147492625368731, |
| "grad_norm": 1.3703498840332031, |
| "learning_rate": 1.9398719716006246e-06, |
| "loss": 0.0673, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.6151425762045231, |
| "grad_norm": 1.3188180923461914, |
| "learning_rate": 1.936525621905336e-06, |
| "loss": 0.0711, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.615535889872173, |
| "grad_norm": 0.5656819939613342, |
| "learning_rate": 1.9331803354750537e-06, |
| "loss": 0.0496, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.6159292035398231, |
| "grad_norm": 1.2018178701400757, |
| "learning_rate": 1.9298361186222665e-06, |
| "loss": 0.052, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.616322517207473, |
| "grad_norm": 1.197943091392517, |
| "learning_rate": 1.926492977657446e-06, |
| "loss": 0.0667, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.6167158308751229, |
| "grad_norm": 0.6885368227958679, |
| "learning_rate": 1.9231509188890345e-06, |
| "loss": 0.0374, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.6171091445427729, |
| "grad_norm": 0.8017690181732178, |
| "learning_rate": 1.919809948623428e-06, |
| "loss": 0.053, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.6175024582104228, |
| "grad_norm": 1.5223562717437744, |
| "learning_rate": 1.9164700731649723e-06, |
| "loss": 0.0605, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6178957718780728, |
| "grad_norm": 1.8122631311416626, |
| "learning_rate": 1.913131298815947e-06, |
| "loss": 0.0719, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.6182890855457227, |
| "grad_norm": 1.5113699436187744, |
| "learning_rate": 1.9097936318765527e-06, |
| "loss": 0.0547, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.6186823992133726, |
| "grad_norm": 0.7732280492782593, |
| "learning_rate": 1.906457078644901e-06, |
| "loss": 0.0456, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.6190757128810226, |
| "grad_norm": 1.347740650177002, |
| "learning_rate": 1.903121645417003e-06, |
| "loss": 0.0469, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.6194690265486725, |
| "grad_norm": 0.6614682674407959, |
| "learning_rate": 1.8997873384867534e-06, |
| "loss": 0.0266, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6198623402163225, |
| "grad_norm": 1.1419849395751953, |
| "learning_rate": 1.8964541641459242e-06, |
| "loss": 0.0465, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.6202556538839724, |
| "grad_norm": 0.9635249972343445, |
| "learning_rate": 1.893122128684149e-06, |
| "loss": 0.0482, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.6206489675516225, |
| "grad_norm": 0.9544531106948853, |
| "learning_rate": 1.8897912383889138e-06, |
| "loss": 0.0689, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.6210422812192724, |
| "grad_norm": 0.7220961451530457, |
| "learning_rate": 1.886461499545543e-06, |
| "loss": 0.0521, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.6214355948869223, |
| "grad_norm": 2.5634989738464355, |
| "learning_rate": 1.883132918437186e-06, |
| "loss": 0.0702, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6218289085545723, |
| "grad_norm": 1.1183925867080688, |
| "learning_rate": 1.8798055013448105e-06, |
| "loss": 0.0623, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 0.7888696193695068, |
| "learning_rate": 1.8764792545471872e-06, |
| "loss": 0.0452, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.6226155358898722, |
| "grad_norm": 0.4925548732280731, |
| "learning_rate": 1.8731541843208772e-06, |
| "loss": 0.0481, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.6230088495575221, |
| "grad_norm": 1.184525489807129, |
| "learning_rate": 1.869830296940223e-06, |
| "loss": 0.0947, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.6234021632251721, |
| "grad_norm": 1.0969839096069336, |
| "learning_rate": 1.8665075986773346e-06, |
| "loss": 0.0786, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.623795476892822, |
| "grad_norm": 1.2557084560394287, |
| "learning_rate": 1.863186095802077e-06, |
| "loss": 0.048, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.6241887905604719, |
| "grad_norm": 0.9532119631767273, |
| "learning_rate": 1.8598657945820605e-06, |
| "loss": 0.0356, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.6245821042281219, |
| "grad_norm": 0.6121819019317627, |
| "learning_rate": 1.8565467012826282e-06, |
| "loss": 0.0395, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.6249754178957718, |
| "grad_norm": 0.9521839022636414, |
| "learning_rate": 1.853228822166843e-06, |
| "loss": 0.0417, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.6253687315634219, |
| "grad_norm": 1.3007653951644897, |
| "learning_rate": 1.849912163495479e-06, |
| "loss": 0.0376, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6257620452310718, |
| "grad_norm": 1.0467530488967896, |
| "learning_rate": 1.8465967315270029e-06, |
| "loss": 0.0531, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.6261553588987218, |
| "grad_norm": 0.8435487747192383, |
| "learning_rate": 1.8432825325175707e-06, |
| "loss": 0.0333, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.6265486725663717, |
| "grad_norm": 1.2616933584213257, |
| "learning_rate": 1.8399695727210098e-06, |
| "loss": 0.0556, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.6269419862340216, |
| "grad_norm": 1.1721434593200684, |
| "learning_rate": 1.836657858388811e-06, |
| "loss": 0.0658, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.6273352999016716, |
| "grad_norm": 0.6084288954734802, |
| "learning_rate": 1.8333473957701126e-06, |
| "loss": 0.0385, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6277286135693215, |
| "grad_norm": 1.4398316144943237, |
| "learning_rate": 1.830038191111692e-06, |
| "loss": 0.0606, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.6281219272369715, |
| "grad_norm": 1.9486684799194336, |
| "learning_rate": 1.8267302506579532e-06, |
| "loss": 0.0853, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.6285152409046214, |
| "grad_norm": 0.7250006794929504, |
| "learning_rate": 1.8234235806509145e-06, |
| "loss": 0.0295, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.6289085545722713, |
| "grad_norm": 1.2927533388137817, |
| "learning_rate": 1.8201181873301967e-06, |
| "loss": 0.046, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.6293018682399213, |
| "grad_norm": 1.2859911918640137, |
| "learning_rate": 1.816814076933012e-06, |
| "loss": 0.0579, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6296951819075712, |
| "grad_norm": 1.900543451309204, |
| "learning_rate": 1.813511255694152e-06, |
| "loss": 0.0567, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.6300884955752213, |
| "grad_norm": 2.090280532836914, |
| "learning_rate": 1.8102097298459732e-06, |
| "loss": 0.0865, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.6304818092428712, |
| "grad_norm": 1.3595722913742065, |
| "learning_rate": 1.80690950561839e-06, |
| "loss": 0.0561, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.6308751229105212, |
| "grad_norm": 1.022291660308838, |
| "learning_rate": 1.8036105892388611e-06, |
| "loss": 0.0382, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.6312684365781711, |
| "grad_norm": 0.8052154779434204, |
| "learning_rate": 1.800312986932376e-06, |
| "loss": 0.0529, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.631661750245821, |
| "grad_norm": 4.667014122009277, |
| "learning_rate": 1.7970167049214466e-06, |
| "loss": 0.0492, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.632055063913471, |
| "grad_norm": 1.5009123086929321, |
| "learning_rate": 1.7937217494260888e-06, |
| "loss": 0.0779, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.6324483775811209, |
| "grad_norm": 1.570505976676941, |
| "learning_rate": 1.7904281266638201e-06, |
| "loss": 0.0577, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.6328416912487709, |
| "grad_norm": 1.3305639028549194, |
| "learning_rate": 1.7871358428496416e-06, |
| "loss": 0.0979, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.6332350049164208, |
| "grad_norm": 0.6136133074760437, |
| "learning_rate": 1.7838449041960276e-06, |
| "loss": 0.0424, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6336283185840708, |
| "grad_norm": 0.7882452607154846, |
| "learning_rate": 1.7805553169129142e-06, |
| "loss": 0.0656, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.6340216322517207, |
| "grad_norm": 2.1648337841033936, |
| "learning_rate": 1.7772670872076883e-06, |
| "loss": 0.0622, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.6344149459193706, |
| "grad_norm": 0.5130072832107544, |
| "learning_rate": 1.773980221285173e-06, |
| "loss": 0.0394, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.6348082595870207, |
| "grad_norm": 1.0151782035827637, |
| "learning_rate": 1.7706947253476194e-06, |
| "loss": 0.0424, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.6352015732546706, |
| "grad_norm": 0.8527183532714844, |
| "learning_rate": 1.767410605594694e-06, |
| "loss": 0.0394, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.6355948869223206, |
| "grad_norm": 1.3671120405197144, |
| "learning_rate": 1.7641278682234658e-06, |
| "loss": 0.0625, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.6359882005899705, |
| "grad_norm": 0.8969728350639343, |
| "learning_rate": 1.7608465194283958e-06, |
| "loss": 0.0295, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.6363815142576205, |
| "grad_norm": 0.7407302260398865, |
| "learning_rate": 1.757566565401323e-06, |
| "loss": 0.055, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.6367748279252704, |
| "grad_norm": 1.153152346611023, |
| "learning_rate": 1.7542880123314559e-06, |
| "loss": 0.0945, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.6371681415929203, |
| "grad_norm": 1.259879231452942, |
| "learning_rate": 1.75101086640536e-06, |
| "loss": 0.0537, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6375614552605703, |
| "grad_norm": 0.6502655744552612, |
| "learning_rate": 1.7477351338069442e-06, |
| "loss": 0.0443, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.6379547689282202, |
| "grad_norm": 0.9160225987434387, |
| "learning_rate": 1.7444608207174519e-06, |
| "loss": 0.0494, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.6383480825958702, |
| "grad_norm": 1.6503887176513672, |
| "learning_rate": 1.741187933315448e-06, |
| "loss": 0.0415, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.6387413962635201, |
| "grad_norm": 1.2449769973754883, |
| "learning_rate": 1.7379164777768038e-06, |
| "loss": 0.0607, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.63913470993117, |
| "grad_norm": 0.799196720123291, |
| "learning_rate": 1.734646460274692e-06, |
| "loss": 0.0404, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6395280235988201, |
| "grad_norm": 1.6735135316848755, |
| "learning_rate": 1.7313778869795717e-06, |
| "loss": 0.0626, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.63992133726647, |
| "grad_norm": 1.090598702430725, |
| "learning_rate": 1.728110764059176e-06, |
| "loss": 0.0649, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.64031465093412, |
| "grad_norm": 0.6586104035377502, |
| "learning_rate": 1.7248450976785011e-06, |
| "loss": 0.0501, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.6407079646017699, |
| "grad_norm": 1.8684154748916626, |
| "learning_rate": 1.7215808939997945e-06, |
| "loss": 0.0653, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.6411012782694199, |
| "grad_norm": 1.1549500226974487, |
| "learning_rate": 1.7183181591825437e-06, |
| "loss": 0.0332, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6414945919370698, |
| "grad_norm": 1.295351505279541, |
| "learning_rate": 1.7150568993834666e-06, |
| "loss": 0.0535, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.6418879056047198, |
| "grad_norm": 0.8795567750930786, |
| "learning_rate": 1.7117971207564934e-06, |
| "loss": 0.0866, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.6422812192723697, |
| "grad_norm": 0.6757074594497681, |
| "learning_rate": 1.7085388294527632e-06, |
| "loss": 0.0385, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.6426745329400196, |
| "grad_norm": 0.9733456373214722, |
| "learning_rate": 1.705282031620608e-06, |
| "loss": 0.0923, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.6430678466076696, |
| "grad_norm": 1.0591400861740112, |
| "learning_rate": 1.7020267334055393e-06, |
| "loss": 0.0492, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6434611602753195, |
| "grad_norm": 0.8595137596130371, |
| "learning_rate": 1.6987729409502412e-06, |
| "loss": 0.0411, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.6438544739429695, |
| "grad_norm": 1.831631064414978, |
| "learning_rate": 1.6955206603945557e-06, |
| "loss": 0.0733, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.6442477876106195, |
| "grad_norm": 0.5861109495162964, |
| "learning_rate": 1.6922698978754726e-06, |
| "loss": 0.045, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.6446411012782695, |
| "grad_norm": 1.3072712421417236, |
| "learning_rate": 1.6890206595271153e-06, |
| "loss": 0.0713, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.6450344149459194, |
| "grad_norm": 0.8035500049591064, |
| "learning_rate": 1.6857729514807325e-06, |
| "loss": 0.0379, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6454277286135693, |
| "grad_norm": 0.7814714312553406, |
| "learning_rate": 1.6825267798646851e-06, |
| "loss": 0.041, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.6458210422812193, |
| "grad_norm": 1.3243709802627563, |
| "learning_rate": 1.6792821508044352e-06, |
| "loss": 0.0633, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.6462143559488692, |
| "grad_norm": 0.8479057550430298, |
| "learning_rate": 1.6760390704225333e-06, |
| "loss": 0.0561, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.6466076696165192, |
| "grad_norm": 1.0051478147506714, |
| "learning_rate": 1.672797544838608e-06, |
| "loss": 0.0372, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.6470009832841691, |
| "grad_norm": 0.962547779083252, |
| "learning_rate": 1.6695575801693549e-06, |
| "loss": 0.0398, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.647394296951819, |
| "grad_norm": 1.314014196395874, |
| "learning_rate": 1.6663191825285214e-06, |
| "loss": 0.0492, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.647787610619469, |
| "grad_norm": 0.6934694647789001, |
| "learning_rate": 1.6630823580269005e-06, |
| "loss": 0.0367, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.6481809242871189, |
| "grad_norm": 1.1256476640701294, |
| "learning_rate": 1.6598471127723162e-06, |
| "loss": 0.0476, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.6485742379547689, |
| "grad_norm": 1.5946294069290161, |
| "learning_rate": 1.6566134528696126e-06, |
| "loss": 0.0484, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.6489675516224189, |
| "grad_norm": 1.1677006483078003, |
| "learning_rate": 1.6533813844206426e-06, |
| "loss": 0.0443, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6493608652900689, |
| "grad_norm": 0.9727287292480469, |
| "learning_rate": 1.6501509135242533e-06, |
| "loss": 0.036, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.6497541789577188, |
| "grad_norm": 1.6365562677383423, |
| "learning_rate": 1.6469220462762807e-06, |
| "loss": 0.0794, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.6501474926253688, |
| "grad_norm": 0.9197725057601929, |
| "learning_rate": 1.6436947887695336e-06, |
| "loss": 0.0314, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.6505408062930187, |
| "grad_norm": 0.9444229006767273, |
| "learning_rate": 1.6404691470937829e-06, |
| "loss": 0.017, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6509341199606686, |
| "grad_norm": 1.0287470817565918, |
| "learning_rate": 1.6372451273357504e-06, |
| "loss": 0.0674, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6513274336283186, |
| "grad_norm": 0.9683353900909424, |
| "learning_rate": 1.6340227355790988e-06, |
| "loss": 0.0727, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.6517207472959685, |
| "grad_norm": 0.9869152903556824, |
| "learning_rate": 1.6308019779044154e-06, |
| "loss": 0.0526, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.6521140609636185, |
| "grad_norm": 2.224297046661377, |
| "learning_rate": 1.6275828603892078e-06, |
| "loss": 0.0635, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6525073746312684, |
| "grad_norm": 0.8496151566505432, |
| "learning_rate": 1.6243653891078864e-06, |
| "loss": 0.0581, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6529006882989183, |
| "grad_norm": 1.2158007621765137, |
| "learning_rate": 1.6211495701317565e-06, |
| "loss": 0.0728, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6532940019665683, |
| "grad_norm": 0.48335015773773193, |
| "learning_rate": 1.6179354095290051e-06, |
| "loss": 0.0405, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.6536873156342183, |
| "grad_norm": 0.679865300655365, |
| "learning_rate": 1.6147229133646885e-06, |
| "loss": 0.0497, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.6540806293018683, |
| "grad_norm": 2.487617254257202, |
| "learning_rate": 1.611512087700724e-06, |
| "loss": 0.1029, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6544739429695182, |
| "grad_norm": 1.0901083946228027, |
| "learning_rate": 1.6083029385958762e-06, |
| "loss": 0.0706, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.6548672566371682, |
| "grad_norm": 1.4582974910736084, |
| "learning_rate": 1.6050954721057461e-06, |
| "loss": 0.0651, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6552605703048181, |
| "grad_norm": 1.1469032764434814, |
| "learning_rate": 1.6018896942827595e-06, |
| "loss": 0.0533, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.655653883972468, |
| "grad_norm": 1.5001522302627563, |
| "learning_rate": 1.5986856111761562e-06, |
| "loss": 0.0688, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.656047197640118, |
| "grad_norm": 0.7778475880622864, |
| "learning_rate": 1.595483228831976e-06, |
| "loss": 0.0457, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6564405113077679, |
| "grad_norm": 0.910394549369812, |
| "learning_rate": 1.5922825532930526e-06, |
| "loss": 0.0295, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6568338249754179, |
| "grad_norm": 1.1938371658325195, |
| "learning_rate": 1.5890835905989969e-06, |
| "loss": 0.0533, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6572271386430678, |
| "grad_norm": 0.9362410306930542, |
| "learning_rate": 1.5858863467861882e-06, |
| "loss": 0.054, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6576204523107178, |
| "grad_norm": 0.5481738448143005, |
| "learning_rate": 1.582690827887763e-06, |
| "loss": 0.037, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6580137659783677, |
| "grad_norm": 0.8186729550361633, |
| "learning_rate": 1.5794970399336012e-06, |
| "loss": 0.0355, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6584070796460177, |
| "grad_norm": 0.885360598564148, |
| "learning_rate": 1.576304988950318e-06, |
| "loss": 0.0478, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6588003933136677, |
| "grad_norm": 1.0103771686553955, |
| "learning_rate": 1.5731146809612508e-06, |
| "loss": 0.0562, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6591937069813176, |
| "grad_norm": 0.9461012482643127, |
| "learning_rate": 1.569926121986447e-06, |
| "loss": 0.0301, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6595870206489676, |
| "grad_norm": 1.5684260129928589, |
| "learning_rate": 1.566739318042655e-06, |
| "loss": 0.0339, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6599803343166175, |
| "grad_norm": 0.7456137537956238, |
| "learning_rate": 1.56355427514331e-06, |
| "loss": 0.0592, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6603736479842675, |
| "grad_norm": 1.6279810667037964, |
| "learning_rate": 1.5603709992985256e-06, |
| "loss": 0.0452, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6607669616519174, |
| "grad_norm": 1.3496975898742676, |
| "learning_rate": 1.5571894965150796e-06, |
| "loss": 0.058, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6611602753195673, |
| "grad_norm": 1.0409663915634155, |
| "learning_rate": 1.554009772796406e-06, |
| "loss": 0.0635, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.6615535889872173, |
| "grad_norm": 0.6893079876899719, |
| "learning_rate": 1.55083183414258e-06, |
| "loss": 0.042, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6619469026548672, |
| "grad_norm": 1.3735069036483765, |
| "learning_rate": 1.5476556865503095e-06, |
| "loss": 0.0418, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6623402163225172, |
| "grad_norm": 0.9965916275978088, |
| "learning_rate": 1.5444813360129207e-06, |
| "loss": 0.0436, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6627335299901671, |
| "grad_norm": 0.41811513900756836, |
| "learning_rate": 1.5413087885203515e-06, |
| "loss": 0.032, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6631268436578172, |
| "grad_norm": 1.2320137023925781, |
| "learning_rate": 1.538138050059136e-06, |
| "loss": 0.0588, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6635201573254671, |
| "grad_norm": 1.2540123462677002, |
| "learning_rate": 1.5349691266123946e-06, |
| "loss": 0.0527, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.663913470993117, |
| "grad_norm": 0.8406708240509033, |
| "learning_rate": 1.5318020241598248e-06, |
| "loss": 0.0479, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.664306784660767, |
| "grad_norm": 1.1033174991607666, |
| "learning_rate": 1.5286367486776835e-06, |
| "loss": 0.0566, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6647000983284169, |
| "grad_norm": 1.4875179529190063, |
| "learning_rate": 1.5254733061387846e-06, |
| "loss": 0.0566, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6650934119960669, |
| "grad_norm": 1.0827391147613525, |
| "learning_rate": 1.5223117025124817e-06, |
| "loss": 0.0333, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6654867256637168, |
| "grad_norm": 1.2373061180114746, |
| "learning_rate": 1.5191519437646576e-06, |
| "loss": 0.048, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6658800393313667, |
| "grad_norm": 0.9508680701255798, |
| "learning_rate": 1.5159940358577151e-06, |
| "loss": 0.0499, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6662733529990167, |
| "grad_norm": 0.4500909447669983, |
| "learning_rate": 1.512837984750565e-06, |
| "loss": 0.0207, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.83719003200531, |
| "learning_rate": 1.5096837963986112e-06, |
| "loss": 0.0541, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6670599803343166, |
| "grad_norm": 1.0231764316558838, |
| "learning_rate": 1.5065314767537453e-06, |
| "loss": 0.0255, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6674532940019666, |
| "grad_norm": 0.8618975877761841, |
| "learning_rate": 1.5033810317643327e-06, |
| "loss": 0.0398, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6678466076696166, |
| "grad_norm": 0.40866029262542725, |
| "learning_rate": 1.5002324673752006e-06, |
| "loss": 0.031, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6682399213372665, |
| "grad_norm": 0.7475729584693909, |
| "learning_rate": 1.4970857895276285e-06, |
| "loss": 0.0534, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6686332350049164, |
| "grad_norm": 1.0545064210891724, |
| "learning_rate": 1.4939410041593338e-06, |
| "loss": 0.0451, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6690265486725664, |
| "grad_norm": 1.023006796836853, |
| "learning_rate": 1.4907981172044647e-06, |
| "loss": 0.0594, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6694198623402163, |
| "grad_norm": 0.9975923299789429, |
| "learning_rate": 1.487657134593587e-06, |
| "loss": 0.0634, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6698131760078663, |
| "grad_norm": 1.2105883359909058, |
| "learning_rate": 1.4845180622536728e-06, |
| "loss": 0.0482, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6702064896755162, |
| "grad_norm": 1.007332682609558, |
| "learning_rate": 1.4813809061080893e-06, |
| "loss": 0.0706, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6705998033431662, |
| "grad_norm": 0.7119497060775757, |
| "learning_rate": 1.4782456720765895e-06, |
| "loss": 0.0409, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6709931170108161, |
| "grad_norm": 1.0542527437210083, |
| "learning_rate": 1.4751123660752955e-06, |
| "loss": 0.0388, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.671386430678466, |
| "grad_norm": 2.3204405307769775, |
| "learning_rate": 1.4719809940166952e-06, |
| "loss": 0.0724, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.671779744346116, |
| "grad_norm": 0.5740649700164795, |
| "learning_rate": 1.4688515618096252e-06, |
| "loss": 0.0319, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.672173058013766, |
| "grad_norm": 0.9803503155708313, |
| "learning_rate": 1.4657240753592627e-06, |
| "loss": 0.0504, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.672566371681416, |
| "grad_norm": 0.8115725517272949, |
| "learning_rate": 1.462598540567113e-06, |
| "loss": 0.0605, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6729596853490659, |
| "grad_norm": 1.3304479122161865, |
| "learning_rate": 1.4594749633309981e-06, |
| "loss": 0.0758, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6733529990167159, |
| "grad_norm": 1.208067774772644, |
| "learning_rate": 1.456353349545046e-06, |
| "loss": 0.0706, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6737463126843658, |
| "grad_norm": 1.1107121706008911, |
| "learning_rate": 1.4532337050996804e-06, |
| "loss": 0.0468, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6741396263520157, |
| "grad_norm": 1.192116618156433, |
| "learning_rate": 1.4501160358816085e-06, |
| "loss": 0.0657, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.6745329400196657, |
| "grad_norm": 1.0967481136322021, |
| "learning_rate": 1.4470003477738111e-06, |
| "loss": 0.0499, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6749262536873156, |
| "grad_norm": 1.3263583183288574, |
| "learning_rate": 1.4438866466555308e-06, |
| "loss": 0.0449, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6753195673549656, |
| "grad_norm": 1.5055456161499023, |
| "learning_rate": 1.4407749384022576e-06, |
| "loss": 0.0489, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6757128810226155, |
| "grad_norm": 1.5726017951965332, |
| "learning_rate": 1.4376652288857249e-06, |
| "loss": 0.0626, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6761061946902654, |
| "grad_norm": 1.6234389543533325, |
| "learning_rate": 1.4345575239738928e-06, |
| "loss": 0.0606, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6764995083579154, |
| "grad_norm": 1.7149680852890015, |
| "learning_rate": 1.431451829530939e-06, |
| "loss": 0.0527, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6768928220255654, |
| "grad_norm": 0.8043215870857239, |
| "learning_rate": 1.4283481514172487e-06, |
| "loss": 0.0454, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6772861356932154, |
| "grad_norm": 1.3794721364974976, |
| "learning_rate": 1.425246495489399e-06, |
| "loss": 0.0522, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6776794493608653, |
| "grad_norm": 0.7596322298049927, |
| "learning_rate": 1.4221468676001544e-06, |
| "loss": 0.0507, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6780727630285153, |
| "grad_norm": 0.9277907013893127, |
| "learning_rate": 1.419049273598451e-06, |
| "loss": 0.0406, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.6784660766961652, |
| "grad_norm": 1.7175707817077637, |
| "learning_rate": 1.4159537193293876e-06, |
| "loss": 0.0477, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6788593903638152, |
| "grad_norm": 0.5326056480407715, |
| "learning_rate": 1.4128602106342154e-06, |
| "loss": 0.0248, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6792527040314651, |
| "grad_norm": 1.259993314743042, |
| "learning_rate": 1.4097687533503213e-06, |
| "loss": 0.05, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.679646017699115, |
| "grad_norm": 0.9844882488250732, |
| "learning_rate": 1.4066793533112255e-06, |
| "loss": 0.0407, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.680039331366765, |
| "grad_norm": 1.6221920251846313, |
| "learning_rate": 1.4035920163465648e-06, |
| "loss": 0.0589, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6804326450344149, |
| "grad_norm": 2.0537407398223877, |
| "learning_rate": 1.400506748282083e-06, |
| "loss": 0.0622, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6808259587020649, |
| "grad_norm": 1.1460561752319336, |
| "learning_rate": 1.3974235549396198e-06, |
| "loss": 0.0448, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6812192723697148, |
| "grad_norm": 1.2280306816101074, |
| "learning_rate": 1.3943424421370998e-06, |
| "loss": 0.0621, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.6816125860373649, |
| "grad_norm": 1.9272797107696533, |
| "learning_rate": 1.3912634156885235e-06, |
| "loss": 0.0559, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6820058997050148, |
| "grad_norm": 0.8985779285430908, |
| "learning_rate": 1.3881864814039503e-06, |
| "loss": 0.0568, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6823992133726647, |
| "grad_norm": 0.5459672808647156, |
| "learning_rate": 1.3851116450894959e-06, |
| "loss": 0.03, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6827925270403147, |
| "grad_norm": 0.8683139085769653, |
| "learning_rate": 1.382038912547315e-06, |
| "loss": 0.0513, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6831858407079646, |
| "grad_norm": 0.7696962952613831, |
| "learning_rate": 1.3789682895755935e-06, |
| "loss": 0.0448, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6835791543756146, |
| "grad_norm": 1.2431952953338623, |
| "learning_rate": 1.3758997819685366e-06, |
| "loss": 0.0493, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6839724680432645, |
| "grad_norm": 0.9553192853927612, |
| "learning_rate": 1.3728333955163565e-06, |
| "loss": 0.0321, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6843657817109144, |
| "grad_norm": 1.2432819604873657, |
| "learning_rate": 1.3697691360052646e-06, |
| "loss": 0.0744, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6847590953785644, |
| "grad_norm": 0.6021830439567566, |
| "learning_rate": 1.3667070092174587e-06, |
| "loss": 0.0471, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6851524090462143, |
| "grad_norm": 1.0340098142623901, |
| "learning_rate": 1.3636470209311093e-06, |
| "loss": 0.0645, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6855457227138643, |
| "grad_norm": 1.2661107778549194, |
| "learning_rate": 1.360589176920355e-06, |
| "loss": 0.0314, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.6859390363815142, |
| "grad_norm": 1.7685880661010742, |
| "learning_rate": 1.357533482955287e-06, |
| "loss": 0.0635, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6863323500491643, |
| "grad_norm": 1.249866008758545, |
| "learning_rate": 1.354479944801939e-06, |
| "loss": 0.0257, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6867256637168142, |
| "grad_norm": 0.8888324499130249, |
| "learning_rate": 1.3514285682222777e-06, |
| "loss": 0.0501, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6871189773844641, |
| "grad_norm": 0.9306212067604065, |
| "learning_rate": 1.3483793589741901e-06, |
| "loss": 0.0535, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6875122910521141, |
| "grad_norm": 1.239108920097351, |
| "learning_rate": 1.3453323228114745e-06, |
| "loss": 0.0645, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.687905604719764, |
| "grad_norm": 1.971179723739624, |
| "learning_rate": 1.3422874654838263e-06, |
| "loss": 0.0617, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.688298918387414, |
| "grad_norm": 0.8780958652496338, |
| "learning_rate": 1.3392447927368315e-06, |
| "loss": 0.0303, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6886922320550639, |
| "grad_norm": 0.5229460000991821, |
| "learning_rate": 1.3362043103119537e-06, |
| "loss": 0.0408, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6890855457227139, |
| "grad_norm": 1.0178303718566895, |
| "learning_rate": 1.3331660239465232e-06, |
| "loss": 0.0692, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6894788593903638, |
| "grad_norm": 1.1098684072494507, |
| "learning_rate": 1.3301299393737262e-06, |
| "loss": 0.0553, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6898721730580137, |
| "grad_norm": 0.9905382990837097, |
| "learning_rate": 1.3270960623225953e-06, |
| "loss": 0.0551, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6902654867256637, |
| "grad_norm": 1.15705406665802, |
| "learning_rate": 1.324064398517994e-06, |
| "loss": 0.0606, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.6906588003933136, |
| "grad_norm": 0.7547001838684082, |
| "learning_rate": 1.3210349536806138e-06, |
| "loss": 0.0375, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6910521140609637, |
| "grad_norm": 0.9143390655517578, |
| "learning_rate": 1.3180077335269565e-06, |
| "loss": 0.0557, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6914454277286136, |
| "grad_norm": 1.5813028812408447, |
| "learning_rate": 1.3149827437693267e-06, |
| "loss": 0.0734, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6918387413962636, |
| "grad_norm": 1.3135156631469727, |
| "learning_rate": 1.3119599901158214e-06, |
| "loss": 0.0454, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6922320550639135, |
| "grad_norm": 1.3713979721069336, |
| "learning_rate": 1.3089394782703152e-06, |
| "loss": 0.0459, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6926253687315634, |
| "grad_norm": 1.0648804903030396, |
| "learning_rate": 1.3059212139324548e-06, |
| "loss": 0.0562, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6930186823992134, |
| "grad_norm": 0.8367137312889099, |
| "learning_rate": 1.3029052027976457e-06, |
| "loss": 0.0269, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6934119960668633, |
| "grad_norm": 1.1222723722457886, |
| "learning_rate": 1.299891450557041e-06, |
| "loss": 0.0458, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6938053097345133, |
| "grad_norm": 1.087550163269043, |
| "learning_rate": 1.2968799628975311e-06, |
| "loss": 0.0357, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6941986234021632, |
| "grad_norm": 0.8797011375427246, |
| "learning_rate": 1.2938707455017358e-06, |
| "loss": 0.0459, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6945919370698131, |
| "grad_norm": 1.4389101266860962, |
| "learning_rate": 1.2908638040479855e-06, |
| "loss": 0.0715, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6949852507374631, |
| "grad_norm": 0.826977014541626, |
| "learning_rate": 1.2878591442103215e-06, |
| "loss": 0.0498, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.695378564405113, |
| "grad_norm": 1.2073124647140503, |
| "learning_rate": 1.2848567716584764e-06, |
| "loss": 0.0401, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6957718780727631, |
| "grad_norm": 1.2512377500534058, |
| "learning_rate": 1.2818566920578684e-06, |
| "loss": 0.0545, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.696165191740413, |
| "grad_norm": 1.003304123878479, |
| "learning_rate": 1.2788589110695896e-06, |
| "loss": 0.0657, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.696558505408063, |
| "grad_norm": 1.6829479932785034, |
| "learning_rate": 1.275863434350391e-06, |
| "loss": 0.0488, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6969518190757129, |
| "grad_norm": 1.0957913398742676, |
| "learning_rate": 1.2728702675526788e-06, |
| "loss": 0.0695, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6973451327433628, |
| "grad_norm": 1.2029186487197876, |
| "learning_rate": 1.2698794163244998e-06, |
| "loss": 0.0574, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6977384464110128, |
| "grad_norm": 0.8925944566726685, |
| "learning_rate": 1.2668908863095311e-06, |
| "loss": 0.0424, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6981317600786627, |
| "grad_norm": 0.8353788256645203, |
| "learning_rate": 1.2639046831470697e-06, |
| "loss": 0.038, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6985250737463127, |
| "grad_norm": 2.284682273864746, |
| "learning_rate": 1.2609208124720228e-06, |
| "loss": 0.0687, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6989183874139626, |
| "grad_norm": 0.9992805123329163, |
| "learning_rate": 1.2579392799148938e-06, |
| "loss": 0.0401, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.6993117010816126, |
| "grad_norm": 1.329393744468689, |
| "learning_rate": 1.2549600911017761e-06, |
| "loss": 0.0768, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6997050147492625, |
| "grad_norm": 1.184579849243164, |
| "learning_rate": 1.25198325165434e-06, |
| "loss": 0.0467, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.7000983284169124, |
| "grad_norm": 0.6934780478477478, |
| "learning_rate": 1.2490087671898234e-06, |
| "loss": 0.0454, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7004916420845625, |
| "grad_norm": 0.5612182021141052, |
| "learning_rate": 1.24603664332102e-06, |
| "loss": 0.0397, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.7008849557522124, |
| "grad_norm": 1.493826985359192, |
| "learning_rate": 1.243066885656267e-06, |
| "loss": 0.0815, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.7012782694198624, |
| "grad_norm": 0.7363511323928833, |
| "learning_rate": 1.240099499799439e-06, |
| "loss": 0.0496, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.7016715830875123, |
| "grad_norm": 1.6472634077072144, |
| "learning_rate": 1.237134491349935e-06, |
| "loss": 0.0741, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.7020648967551623, |
| "grad_norm": 1.3183567523956299, |
| "learning_rate": 1.234171865902667e-06, |
| "loss": 0.043, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.7024582104228122, |
| "grad_norm": 1.0543493032455444, |
| "learning_rate": 1.2312116290480506e-06, |
| "loss": 0.0401, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.7028515240904621, |
| "grad_norm": 0.8686029314994812, |
| "learning_rate": 1.228253786371995e-06, |
| "loss": 0.0335, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.7032448377581121, |
| "grad_norm": 1.9254342317581177, |
| "learning_rate": 1.2252983434558894e-06, |
| "loss": 0.0361, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.703638151425762, |
| "grad_norm": 0.8810344338417053, |
| "learning_rate": 1.2223453058765966e-06, |
| "loss": 0.0442, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.704031465093412, |
| "grad_norm": 1.138178825378418, |
| "learning_rate": 1.2193946792064403e-06, |
| "loss": 0.0768, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.7044247787610619, |
| "grad_norm": 0.7755922675132751, |
| "learning_rate": 1.2164464690131947e-06, |
| "loss": 0.0303, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.7048180924287119, |
| "grad_norm": 1.5868074893951416, |
| "learning_rate": 1.2135006808600752e-06, |
| "loss": 0.052, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.7052114060963619, |
| "grad_norm": 0.9672881364822388, |
| "learning_rate": 1.2105573203057233e-06, |
| "loss": 0.0432, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.7056047197640118, |
| "grad_norm": 0.9986976981163025, |
| "learning_rate": 1.207616392904204e-06, |
| "loss": 0.0464, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.7059980334316618, |
| "grad_norm": 0.646554708480835, |
| "learning_rate": 1.2046779042049883e-06, |
| "loss": 0.0268, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.7063913470993117, |
| "grad_norm": 0.6818554997444153, |
| "learning_rate": 1.2017418597529464e-06, |
| "loss": 0.0521, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.7067846607669617, |
| "grad_norm": 0.5991765260696411, |
| "learning_rate": 1.1988082650883376e-06, |
| "loss": 0.0538, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.7071779744346116, |
| "grad_norm": 1.1525814533233643, |
| "learning_rate": 1.1958771257467946e-06, |
| "loss": 0.0451, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.7075712881022616, |
| "grad_norm": 0.8486371040344238, |
| "learning_rate": 1.1929484472593205e-06, |
| "loss": 0.0514, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 1.393419623374939, |
| "learning_rate": 1.190022235152274e-06, |
| "loss": 0.0609, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7083579154375614, |
| "grad_norm": 0.7574542760848999, |
| "learning_rate": 1.1870984949473586e-06, |
| "loss": 0.0604, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.7087512291052114, |
| "grad_norm": 1.0601574182510376, |
| "learning_rate": 1.184177232161615e-06, |
| "loss": 0.0459, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.7091445427728613, |
| "grad_norm": 0.7535306811332703, |
| "learning_rate": 1.1812584523074089e-06, |
| "loss": 0.0351, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.7095378564405113, |
| "grad_norm": 1.3023512363433838, |
| "learning_rate": 1.1783421608924183e-06, |
| "loss": 0.0598, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.7099311701081613, |
| "grad_norm": 1.1070560216903687, |
| "learning_rate": 1.1754283634196285e-06, |
| "loss": 0.0471, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.7103244837758113, |
| "grad_norm": 0.9613627791404724, |
| "learning_rate": 1.1725170653873174e-06, |
| "loss": 0.0486, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.7107177974434612, |
| "grad_norm": 0.7932494282722473, |
| "learning_rate": 1.1696082722890474e-06, |
| "loss": 0.0774, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.684893786907196, |
| "learning_rate": 1.1667019896136539e-06, |
| "loss": 0.0454, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.7115044247787611, |
| "grad_norm": 1.3207006454467773, |
| "learning_rate": 1.1637982228452329e-06, |
| "loss": 0.0473, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.711897738446411, |
| "grad_norm": 1.3429388999938965, |
| "learning_rate": 1.1608969774631366e-06, |
| "loss": 0.0412, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.712291052114061, |
| "grad_norm": 1.4132349491119385, |
| "learning_rate": 1.1579982589419568e-06, |
| "loss": 0.0549, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.7126843657817109, |
| "grad_norm": 0.7561691999435425, |
| "learning_rate": 1.155102072751518e-06, |
| "loss": 0.0337, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.7130776794493608, |
| "grad_norm": 0.7749929428100586, |
| "learning_rate": 1.152208424356867e-06, |
| "loss": 0.034, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.7134709931170108, |
| "grad_norm": 1.1324396133422852, |
| "learning_rate": 1.1493173192182613e-06, |
| "loss": 0.032, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.7138643067846607, |
| "grad_norm": 0.7702449560165405, |
| "learning_rate": 1.1464287627911577e-06, |
| "loss": 0.0451, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.7142576204523107, |
| "grad_norm": 0.7402438521385193, |
| "learning_rate": 1.1435427605262057e-06, |
| "loss": 0.0489, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.7146509341199607, |
| "grad_norm": 1.3986225128173828, |
| "learning_rate": 1.1406593178692346e-06, |
| "loss": 0.0463, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.7150442477876107, |
| "grad_norm": 0.7235271334648132, |
| "learning_rate": 1.1377784402612439e-06, |
| "loss": 0.0519, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.7154375614552606, |
| "grad_norm": 0.8625795841217041, |
| "learning_rate": 1.1349001331383921e-06, |
| "loss": 0.0375, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.7158308751229105, |
| "grad_norm": 1.5163322687149048, |
| "learning_rate": 1.132024401931988e-06, |
| "loss": 0.0557, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7162241887905605, |
| "grad_norm": 0.6675801277160645, |
| "learning_rate": 1.12915125206848e-06, |
| "loss": 0.0261, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.7166175024582104, |
| "grad_norm": 0.9029967188835144, |
| "learning_rate": 1.1262806889694455e-06, |
| "loss": 0.037, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.7170108161258604, |
| "grad_norm": 0.716080367565155, |
| "learning_rate": 1.1234127180515787e-06, |
| "loss": 0.0559, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.7174041297935103, |
| "grad_norm": 0.9414195418357849, |
| "learning_rate": 1.1205473447266843e-06, |
| "loss": 0.0466, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.7177974434611603, |
| "grad_norm": 0.9414455890655518, |
| "learning_rate": 1.117684574401666e-06, |
| "loss": 0.0408, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7181907571288102, |
| "grad_norm": 0.6914128065109253, |
| "learning_rate": 1.1148244124785143e-06, |
| "loss": 0.0286, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.7185840707964601, |
| "grad_norm": 1.238477349281311, |
| "learning_rate": 1.111966864354298e-06, |
| "loss": 0.0606, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.7189773844641101, |
| "grad_norm": 1.5670506954193115, |
| "learning_rate": 1.1091119354211544e-06, |
| "loss": 0.045, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.7193706981317601, |
| "grad_norm": 1.5129029750823975, |
| "learning_rate": 1.1062596310662775e-06, |
| "loss": 0.0352, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.7197640117994101, |
| "grad_norm": 1.0257515907287598, |
| "learning_rate": 1.1034099566719104e-06, |
| "loss": 0.0267, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.72015732546706, |
| "grad_norm": 0.8426341414451599, |
| "learning_rate": 1.1005629176153302e-06, |
| "loss": 0.0331, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.72055063913471, |
| "grad_norm": 1.1478296518325806, |
| "learning_rate": 1.097718519268844e-06, |
| "loss": 0.0601, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.7209439528023599, |
| "grad_norm": 1.6983435153961182, |
| "learning_rate": 1.0948767669997762e-06, |
| "loss": 0.0671, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.7213372664700098, |
| "grad_norm": 0.992310643196106, |
| "learning_rate": 1.092037666170456e-06, |
| "loss": 0.0554, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.7217305801376598, |
| "grad_norm": 1.258967399597168, |
| "learning_rate": 1.0892012221382115e-06, |
| "loss": 0.0423, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7221238938053097, |
| "grad_norm": 0.8152772188186646, |
| "learning_rate": 1.0863674402553564e-06, |
| "loss": 0.0638, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.7225172074729597, |
| "grad_norm": 0.8680564165115356, |
| "learning_rate": 1.08353632586918e-06, |
| "loss": 0.0322, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.7229105211406096, |
| "grad_norm": 0.4944194257259369, |
| "learning_rate": 1.0807078843219395e-06, |
| "loss": 0.0684, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.7233038348082595, |
| "grad_norm": 1.0787291526794434, |
| "learning_rate": 1.077882120950849e-06, |
| "loss": 0.0355, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.7236971484759095, |
| "grad_norm": 0.4451111853122711, |
| "learning_rate": 1.0750590410880671e-06, |
| "loss": 0.0291, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7240904621435595, |
| "grad_norm": 0.48384201526641846, |
| "learning_rate": 1.072238650060691e-06, |
| "loss": 0.0344, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.7244837758112095, |
| "grad_norm": 1.1826977729797363, |
| "learning_rate": 1.0694209531907412e-06, |
| "loss": 0.0302, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.7248770894788594, |
| "grad_norm": 0.5904631614685059, |
| "learning_rate": 1.0666059557951566e-06, |
| "loss": 0.0268, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.7252704031465094, |
| "grad_norm": 0.7693639993667603, |
| "learning_rate": 1.0637936631857815e-06, |
| "loss": 0.0329, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.7256637168141593, |
| "grad_norm": 1.1267420053482056, |
| "learning_rate": 1.0609840806693567e-06, |
| "loss": 0.0584, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.7260570304818093, |
| "grad_norm": 0.8826761841773987, |
| "learning_rate": 1.0581772135475089e-06, |
| "loss": 0.0371, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.7264503441494592, |
| "grad_norm": 0.9510964751243591, |
| "learning_rate": 1.0553730671167412e-06, |
| "loss": 0.0366, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.7268436578171091, |
| "grad_norm": 1.4061312675476074, |
| "learning_rate": 1.052571646668421e-06, |
| "loss": 0.0548, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.7272369714847591, |
| "grad_norm": 1.7235345840454102, |
| "learning_rate": 1.0497729574887744e-06, |
| "loss": 0.0729, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.727630285152409, |
| "grad_norm": 1.10977041721344, |
| "learning_rate": 1.0469770048588723e-06, |
| "loss": 0.042, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.728023598820059, |
| "grad_norm": 1.054607629776001, |
| "learning_rate": 1.0441837940546217e-06, |
| "loss": 0.0286, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.7284169124877089, |
| "grad_norm": 1.315953016281128, |
| "learning_rate": 1.0413933303467578e-06, |
| "loss": 0.0415, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.728810226155359, |
| "grad_norm": 1.4497429132461548, |
| "learning_rate": 1.038605619000828e-06, |
| "loss": 0.0566, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.7292035398230089, |
| "grad_norm": 1.1214773654937744, |
| "learning_rate": 1.0358206652771896e-06, |
| "loss": 0.0388, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.7295968534906588, |
| "grad_norm": 0.8499764204025269, |
| "learning_rate": 1.033038474430995e-06, |
| "loss": 0.022, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.7299901671583088, |
| "grad_norm": 0.993175745010376, |
| "learning_rate": 1.0302590517121835e-06, |
| "loss": 0.0351, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.7303834808259587, |
| "grad_norm": 1.3063788414001465, |
| "learning_rate": 1.0274824023654717e-06, |
| "loss": 0.049, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.7307767944936087, |
| "grad_norm": 0.6438285112380981, |
| "learning_rate": 1.0247085316303401e-06, |
| "loss": 0.0322, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.7311701081612586, |
| "grad_norm": 1.801291823387146, |
| "learning_rate": 1.0219374447410289e-06, |
| "loss": 0.0724, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.7315634218289085, |
| "grad_norm": 1.5461159944534302, |
| "learning_rate": 1.019169146926524e-06, |
| "loss": 0.0466, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7319567354965585, |
| "grad_norm": 1.0814778804779053, |
| "learning_rate": 1.016403643410549e-06, |
| "loss": 0.0532, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.7323500491642084, |
| "grad_norm": 1.1939774751663208, |
| "learning_rate": 1.013640939411554e-06, |
| "loss": 0.0349, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.7327433628318584, |
| "grad_norm": 2.0183346271514893, |
| "learning_rate": 1.010881040142708e-06, |
| "loss": 0.0802, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.7331366764995083, |
| "grad_norm": 1.4486076831817627, |
| "learning_rate": 1.0081239508118842e-06, |
| "loss": 0.0381, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.7335299901671584, |
| "grad_norm": 0.7198472023010254, |
| "learning_rate": 1.0053696766216566e-06, |
| "loss": 0.0332, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7339233038348083, |
| "grad_norm": 1.0703610181808472, |
| "learning_rate": 1.0026182227692865e-06, |
| "loss": 0.0321, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.7343166175024582, |
| "grad_norm": 0.9748527407646179, |
| "learning_rate": 9.998695944467127e-07, |
| "loss": 0.0312, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.7347099311701082, |
| "grad_norm": 0.6599907279014587, |
| "learning_rate": 9.97123796840543e-07, |
| "loss": 0.05, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.7351032448377581, |
| "grad_norm": 1.033435583114624, |
| "learning_rate": 9.943808351320418e-07, |
| "loss": 0.0482, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.7354965585054081, |
| "grad_norm": 1.139096975326538, |
| "learning_rate": 9.916407144971245e-07, |
| "loss": 0.046, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.735889872173058, |
| "grad_norm": 1.5064547061920166, |
| "learning_rate": 9.889034401063443e-07, |
| "loss": 0.0629, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.736283185840708, |
| "grad_norm": 0.7273301482200623, |
| "learning_rate": 9.861690171248841e-07, |
| "loss": 0.0314, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.7366764995083579, |
| "grad_norm": 0.579467236995697, |
| "learning_rate": 9.834374507125458e-07, |
| "loss": 0.0527, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.7370698131760078, |
| "grad_norm": 0.8448885679244995, |
| "learning_rate": 9.807087460237419e-07, |
| "loss": 0.0326, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.7374631268436578, |
| "grad_norm": 1.0001413822174072, |
| "learning_rate": 9.779829082074827e-07, |
| "loss": 0.0657, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7378564405113077, |
| "grad_norm": 1.2145143747329712, |
| "learning_rate": 9.752599424073707e-07, |
| "loss": 0.0339, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.7382497541789578, |
| "grad_norm": 1.0525156259536743, |
| "learning_rate": 9.725398537615894e-07, |
| "loss": 0.0459, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.7386430678466077, |
| "grad_norm": 1.2982537746429443, |
| "learning_rate": 9.698226474028913e-07, |
| "loss": 0.0744, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.7390363815142577, |
| "grad_norm": 0.8789856433868408, |
| "learning_rate": 9.671083284585925e-07, |
| "loss": 0.0442, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.7394296951819076, |
| "grad_norm": 2.672044515609741, |
| "learning_rate": 9.643969020505573e-07, |
| "loss": 0.0769, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7398230088495575, |
| "grad_norm": 1.0391490459442139, |
| "learning_rate": 9.616883732951945e-07, |
| "loss": 0.0721, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.7402163225172075, |
| "grad_norm": 1.1753817796707153, |
| "learning_rate": 9.589827473034443e-07, |
| "loss": 0.0463, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.7406096361848574, |
| "grad_norm": 1.260125994682312, |
| "learning_rate": 9.562800291807695e-07, |
| "loss": 0.0637, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.7410029498525074, |
| "grad_norm": 0.9175117015838623, |
| "learning_rate": 9.535802240271455e-07, |
| "loss": 0.037, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.7413962635201573, |
| "grad_norm": 0.9132412075996399, |
| "learning_rate": 9.508833369370524e-07, |
| "loss": 0.056, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7417895771878072, |
| "grad_norm": 1.965725302696228, |
| "learning_rate": 9.481893729994609e-07, |
| "loss": 0.0545, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.7421828908554572, |
| "grad_norm": 2.073374032974243, |
| "learning_rate": 9.454983372978288e-07, |
| "loss": 0.0754, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.7425762045231071, |
| "grad_norm": 1.0531790256500244, |
| "learning_rate": 9.428102349100868e-07, |
| "loss": 0.0459, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.7429695181907572, |
| "grad_norm": 1.7750204801559448, |
| "learning_rate": 9.40125070908631e-07, |
| "loss": 0.061, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.7433628318584071, |
| "grad_norm": 0.6801098585128784, |
| "learning_rate": 9.374428503603139e-07, |
| "loss": 0.0597, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7437561455260571, |
| "grad_norm": 0.6724294424057007, |
| "learning_rate": 9.347635783264309e-07, |
| "loss": 0.0302, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.744149459193707, |
| "grad_norm": 0.7799742817878723, |
| "learning_rate": 9.32087259862716e-07, |
| "loss": 0.0679, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.744542772861357, |
| "grad_norm": 1.623399257659912, |
| "learning_rate": 9.294139000193292e-07, |
| "loss": 0.0553, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.7449360865290069, |
| "grad_norm": 0.8977343440055847, |
| "learning_rate": 9.267435038408479e-07, |
| "loss": 0.0284, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.7453294001966568, |
| "grad_norm": 0.7733441591262817, |
| "learning_rate": 9.240760763662562e-07, |
| "loss": 0.0339, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.7457227138643068, |
| "grad_norm": 1.5382790565490723, |
| "learning_rate": 9.214116226289388e-07, |
| "loss": 0.0746, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.7461160275319567, |
| "grad_norm": 1.144547700881958, |
| "learning_rate": 9.187501476566648e-07, |
| "loss": 0.0351, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.7465093411996067, |
| "grad_norm": 0.7251105904579163, |
| "learning_rate": 9.16091656471586e-07, |
| "loss": 0.0634, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.7469026548672566, |
| "grad_norm": 0.999096155166626, |
| "learning_rate": 9.134361540902225e-07, |
| "loss": 0.0421, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.7472959685349065, |
| "grad_norm": 0.830605685710907, |
| "learning_rate": 9.10783645523455e-07, |
| "loss": 0.0426, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7476892822025566, |
| "grad_norm": 1.5645976066589355, |
| "learning_rate": 9.081341357765145e-07, |
| "loss": 0.0416, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.7480825958702065, |
| "grad_norm": 0.8770972490310669, |
| "learning_rate": 9.054876298489742e-07, |
| "loss": 0.0561, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.7484759095378565, |
| "grad_norm": 1.5209007263183594, |
| "learning_rate": 9.02844132734737e-07, |
| "loss": 0.0419, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.7488692232055064, |
| "grad_norm": 3.409085512161255, |
| "learning_rate": 9.002036494220306e-07, |
| "loss": 0.0752, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.7492625368731564, |
| "grad_norm": 1.448819875717163, |
| "learning_rate": 8.975661848933945e-07, |
| "loss": 0.0523, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7496558505408063, |
| "grad_norm": 0.998282790184021, |
| "learning_rate": 8.949317441256724e-07, |
| "loss": 0.0733, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.7500491642084562, |
| "grad_norm": 1.4408761262893677, |
| "learning_rate": 8.923003320900014e-07, |
| "loss": 0.0577, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.7504424778761062, |
| "grad_norm": 0.9130271077156067, |
| "learning_rate": 8.896719537518048e-07, |
| "loss": 0.0317, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.7508357915437561, |
| "grad_norm": 1.9195144176483154, |
| "learning_rate": 8.870466140707795e-07, |
| "loss": 0.0666, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.7512291052114061, |
| "grad_norm": 1.457318902015686, |
| "learning_rate": 8.844243180008913e-07, |
| "loss": 0.0762, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.751622418879056, |
| "grad_norm": 1.4528069496154785, |
| "learning_rate": 8.818050704903589e-07, |
| "loss": 0.0423, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.752015732546706, |
| "grad_norm": 0.849536120891571, |
| "learning_rate": 8.791888764816514e-07, |
| "loss": 0.0289, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.752409046214356, |
| "grad_norm": 1.4856075048446655, |
| "learning_rate": 8.765757409114753e-07, |
| "loss": 0.0665, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.752802359882006, |
| "grad_norm": 0.8997237086296082, |
| "learning_rate": 8.739656687107656e-07, |
| "loss": 0.0619, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.7531956735496559, |
| "grad_norm": 0.8566966652870178, |
| "learning_rate": 8.713586648046768e-07, |
| "loss": 0.0476, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7535889872173058, |
| "grad_norm": 0.9483917355537415, |
| "learning_rate": 8.68754734112574e-07, |
| "loss": 0.0486, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.7539823008849558, |
| "grad_norm": 1.0472768545150757, |
| "learning_rate": 8.661538815480228e-07, |
| "loss": 0.0422, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.7543756145526057, |
| "grad_norm": 1.4821901321411133, |
| "learning_rate": 8.635561120187813e-07, |
| "loss": 0.0408, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.7547689282202557, |
| "grad_norm": 0.7954731583595276, |
| "learning_rate": 8.609614304267877e-07, |
| "loss": 0.059, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.7551622418879056, |
| "grad_norm": 0.9966669082641602, |
| "learning_rate": 8.583698416681555e-07, |
| "loss": 0.0303, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 0.39692261815071106, |
| "learning_rate": 8.557813506331616e-07, |
| "loss": 0.0324, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.7559488692232055, |
| "grad_norm": 1.7129300832748413, |
| "learning_rate": 8.531959622062372e-07, |
| "loss": 0.0397, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.7563421828908554, |
| "grad_norm": 1.0999704599380493, |
| "learning_rate": 8.506136812659601e-07, |
| "loss": 0.0455, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.7567354965585054, |
| "grad_norm": 1.2547434568405151, |
| "learning_rate": 8.480345126850414e-07, |
| "loss": 0.0658, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.7571288102261554, |
| "grad_norm": 1.1041603088378906, |
| "learning_rate": 8.454584613303227e-07, |
| "loss": 0.0339, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7575221238938054, |
| "grad_norm": 0.8621834516525269, |
| "learning_rate": 8.428855320627613e-07, |
| "loss": 0.0294, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.7579154375614553, |
| "grad_norm": 0.7350767254829407, |
| "learning_rate": 8.403157297374239e-07, |
| "loss": 0.023, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.7583087512291052, |
| "grad_norm": 0.9072149991989136, |
| "learning_rate": 8.377490592034779e-07, |
| "loss": 0.0704, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.7587020648967552, |
| "grad_norm": 0.715020477771759, |
| "learning_rate": 8.35185525304178e-07, |
| "loss": 0.0321, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7590953785644051, |
| "grad_norm": 0.7303974032402039, |
| "learning_rate": 8.326251328768626e-07, |
| "loss": 0.0207, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7594886922320551, |
| "grad_norm": 1.534783124923706, |
| "learning_rate": 8.300678867529415e-07, |
| "loss": 0.0715, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.759882005899705, |
| "grad_norm": 0.6678977012634277, |
| "learning_rate": 8.275137917578879e-07, |
| "loss": 0.0454, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.760275319567355, |
| "grad_norm": 0.7839411497116089, |
| "learning_rate": 8.249628527112282e-07, |
| "loss": 0.053, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7606686332350049, |
| "grad_norm": 0.6599370241165161, |
| "learning_rate": 8.224150744265352e-07, |
| "loss": 0.0312, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7610619469026548, |
| "grad_norm": 0.8593689799308777, |
| "learning_rate": 8.198704617114143e-07, |
| "loss": 0.0219, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7614552605703048, |
| "grad_norm": 1.0792686939239502, |
| "learning_rate": 8.173290193674996e-07, |
| "loss": 0.0688, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.7618485742379548, |
| "grad_norm": 1.1030522584915161, |
| "learning_rate": 8.147907521904433e-07, |
| "loss": 0.0598, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.7622418879056048, |
| "grad_norm": 1.4342604875564575, |
| "learning_rate": 8.122556649699051e-07, |
| "loss": 0.072, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7626352015732547, |
| "grad_norm": 1.555779218673706, |
| "learning_rate": 8.097237624895452e-07, |
| "loss": 0.0875, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.7630285152409046, |
| "grad_norm": 1.7069602012634277, |
| "learning_rate": 8.07195049527012e-07, |
| "loss": 0.0625, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7634218289085546, |
| "grad_norm": 1.4105464220046997, |
| "learning_rate": 8.046695308539376e-07, |
| "loss": 0.0302, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7638151425762045, |
| "grad_norm": 0.9220629930496216, |
| "learning_rate": 8.021472112359255e-07, |
| "loss": 0.0788, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7642084562438545, |
| "grad_norm": 1.7221704721450806, |
| "learning_rate": 7.996280954325433e-07, |
| "loss": 0.0701, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.7646017699115044, |
| "grad_norm": 1.240715503692627, |
| "learning_rate": 7.971121881973126e-07, |
| "loss": 0.0605, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7649950835791544, |
| "grad_norm": 1.054165005683899, |
| "learning_rate": 7.945994942777016e-07, |
| "loss": 0.0278, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7653883972468043, |
| "grad_norm": 0.3918832242488861, |
| "learning_rate": 7.92090018415112e-07, |
| "loss": 0.0433, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.7657817109144542, |
| "grad_norm": 1.2010436058044434, |
| "learning_rate": 7.895837653448759e-07, |
| "loss": 0.0645, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.7661750245821042, |
| "grad_norm": 0.6880310773849487, |
| "learning_rate": 7.870807397962438e-07, |
| "loss": 0.0466, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7665683382497542, |
| "grad_norm": 0.8154659867286682, |
| "learning_rate": 7.845809464923748e-07, |
| "loss": 0.0478, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7669616519174042, |
| "grad_norm": 0.7172273397445679, |
| "learning_rate": 7.820843901503308e-07, |
| "loss": 0.0352, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7673549655850541, |
| "grad_norm": 1.7781319618225098, |
| "learning_rate": 7.79591075481062e-07, |
| "loss": 0.0732, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7677482792527041, |
| "grad_norm": 0.6639533638954163, |
| "learning_rate": 7.771010071894052e-07, |
| "loss": 0.0179, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.768141592920354, |
| "grad_norm": 0.8761031627655029, |
| "learning_rate": 7.7461418997407e-07, |
| "loss": 0.0281, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7685349065880039, |
| "grad_norm": 0.7496312856674194, |
| "learning_rate": 7.721306285276309e-07, |
| "loss": 0.053, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.7689282202556539, |
| "grad_norm": 0.46650174260139465, |
| "learning_rate": 7.696503275365194e-07, |
| "loss": 0.0513, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7693215339233038, |
| "grad_norm": 1.1080721616744995, |
| "learning_rate": 7.671732916810154e-07, |
| "loss": 0.0507, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7697148475909538, |
| "grad_norm": 0.6540339589118958, |
| "learning_rate": 7.646995256352346e-07, |
| "loss": 0.028, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7701081612586037, |
| "grad_norm": 1.099401593208313, |
| "learning_rate": 7.622290340671256e-07, |
| "loss": 0.0623, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.7705014749262536, |
| "grad_norm": 0.9163020253181458, |
| "learning_rate": 7.597618216384576e-07, |
| "loss": 0.0251, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7708947885939036, |
| "grad_norm": 1.32003915309906, |
| "learning_rate": 7.572978930048108e-07, |
| "loss": 0.0467, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7712881022615536, |
| "grad_norm": 1.0354825258255005, |
| "learning_rate": 7.54837252815571e-07, |
| "loss": 0.0491, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7716814159292036, |
| "grad_norm": 1.0285413265228271, |
| "learning_rate": 7.523799057139158e-07, |
| "loss": 0.0598, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7720747295968535, |
| "grad_norm": 1.7109252214431763, |
| "learning_rate": 7.49925856336812e-07, |
| "loss": 0.058, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7724680432645035, |
| "grad_norm": 1.3561407327651978, |
| "learning_rate": 7.474751093150015e-07, |
| "loss": 0.0351, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7728613569321534, |
| "grad_norm": 0.4150741696357727, |
| "learning_rate": 7.450276692729957e-07, |
| "loss": 0.0181, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7732546705998034, |
| "grad_norm": 1.0091959238052368, |
| "learning_rate": 7.425835408290655e-07, |
| "loss": 0.0403, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7736479842674533, |
| "grad_norm": 2.851815938949585, |
| "learning_rate": 7.40142728595234e-07, |
| "loss": 0.0491, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7740412979351032, |
| "grad_norm": 1.306333303451538, |
| "learning_rate": 7.377052371772637e-07, |
| "loss": 0.058, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7744346116027532, |
| "grad_norm": 0.8560998439788818, |
| "learning_rate": 7.352710711746536e-07, |
| "loss": 0.0284, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7748279252704031, |
| "grad_norm": 1.8746119737625122, |
| "learning_rate": 7.328402351806269e-07, |
| "loss": 0.0654, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7752212389380531, |
| "grad_norm": 1.0875734090805054, |
| "learning_rate": 7.304127337821229e-07, |
| "loss": 0.0402, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.775614552605703, |
| "grad_norm": 0.8440957069396973, |
| "learning_rate": 7.279885715597896e-07, |
| "loss": 0.0367, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.776007866273353, |
| "grad_norm": 1.528245210647583, |
| "learning_rate": 7.255677530879713e-07, |
| "loss": 0.0336, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.776401179941003, |
| "grad_norm": 1.6772621870040894, |
| "learning_rate": 7.231502829347056e-07, |
| "loss": 0.0388, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7767944936086529, |
| "grad_norm": 0.85129314661026, |
| "learning_rate": 7.207361656617112e-07, |
| "loss": 0.0521, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7771878072763029, |
| "grad_norm": 1.1908273696899414, |
| "learning_rate": 7.183254058243791e-07, |
| "loss": 0.0419, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7775811209439528, |
| "grad_norm": 1.2314374446868896, |
| "learning_rate": 7.159180079717656e-07, |
| "loss": 0.044, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.7779744346116028, |
| "grad_norm": 1.7192610502243042, |
| "learning_rate": 7.135139766465838e-07, |
| "loss": 0.0663, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7783677482792527, |
| "grad_norm": 1.5432205200195312, |
| "learning_rate": 7.111133163851916e-07, |
| "loss": 0.0267, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7787610619469026, |
| "grad_norm": 0.759152352809906, |
| "learning_rate": 7.087160317175881e-07, |
| "loss": 0.0299, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7791543756145526, |
| "grad_norm": 0.9122269749641418, |
| "learning_rate": 7.06322127167402e-07, |
| "loss": 0.0301, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7795476892822025, |
| "grad_norm": 0.7516564130783081, |
| "learning_rate": 7.03931607251884e-07, |
| "loss": 0.0627, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.7799410029498525, |
| "grad_norm": 1.2953605651855469, |
| "learning_rate": 7.015444764818988e-07, |
| "loss": 0.0571, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7803343166175024, |
| "grad_norm": 0.8770161271095276, |
| "learning_rate": 6.991607393619129e-07, |
| "loss": 0.0322, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7807276302851525, |
| "grad_norm": 0.8347287774085999, |
| "learning_rate": 6.967804003899925e-07, |
| "loss": 0.0497, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7811209439528024, |
| "grad_norm": 0.5185628533363342, |
| "learning_rate": 6.944034640577896e-07, |
| "loss": 0.0292, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7815142576204523, |
| "grad_norm": 0.9084299802780151, |
| "learning_rate": 6.920299348505365e-07, |
| "loss": 0.0343, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7819075712881023, |
| "grad_norm": 1.2148305177688599, |
| "learning_rate": 6.896598172470356e-07, |
| "loss": 0.07, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7823008849557522, |
| "grad_norm": 1.0693104267120361, |
| "learning_rate": 6.872931157196519e-07, |
| "loss": 0.0509, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7826941986234022, |
| "grad_norm": 0.5483916997909546, |
| "learning_rate": 6.849298347343044e-07, |
| "loss": 0.04, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7830875122910521, |
| "grad_norm": 0.9246038794517517, |
| "learning_rate": 6.825699787504586e-07, |
| "loss": 0.0602, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.783480825958702, |
| "grad_norm": 0.7501392960548401, |
| "learning_rate": 6.802135522211142e-07, |
| "loss": 0.0331, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.783874139626352, |
| "grad_norm": 0.8467764854431152, |
| "learning_rate": 6.778605595928025e-07, |
| "loss": 0.0325, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7842674532940019, |
| "grad_norm": 0.5727487206459045, |
| "learning_rate": 6.755110053055738e-07, |
| "loss": 0.0264, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7846607669616519, |
| "grad_norm": 1.1488757133483887, |
| "learning_rate": 6.731648937929911e-07, |
| "loss": 0.0548, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7850540806293018, |
| "grad_norm": 0.7147387862205505, |
| "learning_rate": 6.708222294821196e-07, |
| "loss": 0.0548, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7854473942969519, |
| "grad_norm": 1.0995930433273315, |
| "learning_rate": 6.684830167935207e-07, |
| "loss": 0.0476, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7858407079646018, |
| "grad_norm": 1.1355059146881104, |
| "learning_rate": 6.66147260141243e-07, |
| "loss": 0.0501, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7862340216322518, |
| "grad_norm": 0.7553796768188477, |
| "learning_rate": 6.638149639328134e-07, |
| "loss": 0.0686, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7866273352999017, |
| "grad_norm": 0.8902336359024048, |
| "learning_rate": 6.614861325692277e-07, |
| "loss": 0.0349, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7870206489675516, |
| "grad_norm": 1.090766429901123, |
| "learning_rate": 6.591607704449446e-07, |
| "loss": 0.0527, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.7874139626352016, |
| "grad_norm": 1.142582654953003, |
| "learning_rate": 6.568388819478769e-07, |
| "loss": 0.0537, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7878072763028515, |
| "grad_norm": 1.449288010597229, |
| "learning_rate": 6.545204714593825e-07, |
| "loss": 0.0587, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7882005899705015, |
| "grad_norm": 1.7187999486923218, |
| "learning_rate": 6.522055433542557e-07, |
| "loss": 0.0624, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7885939036381514, |
| "grad_norm": 1.5539288520812988, |
| "learning_rate": 6.49894102000721e-07, |
| "loss": 0.0553, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7889872173058013, |
| "grad_norm": 1.4520833492279053, |
| "learning_rate": 6.47586151760421e-07, |
| "loss": 0.0297, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7893805309734513, |
| "grad_norm": 1.2936962842941284, |
| "learning_rate": 6.452816969884127e-07, |
| "loss": 0.0335, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7897738446411012, |
| "grad_norm": 1.2932931184768677, |
| "learning_rate": 6.429807420331568e-07, |
| "loss": 0.0622, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7901671583087513, |
| "grad_norm": 0.9521369934082031, |
| "learning_rate": 6.406832912365101e-07, |
| "loss": 0.0669, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7905604719764012, |
| "grad_norm": 0.9570633172988892, |
| "learning_rate": 6.383893489337172e-07, |
| "loss": 0.054, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7909537856440512, |
| "grad_norm": 0.7929260730743408, |
| "learning_rate": 6.360989194534004e-07, |
| "loss": 0.028, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7913470993117011, |
| "grad_norm": 1.2527369260787964, |
| "learning_rate": 6.338120071175558e-07, |
| "loss": 0.0631, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.791740412979351, |
| "grad_norm": 0.9790352582931519, |
| "learning_rate": 6.315286162415412e-07, |
| "loss": 0.0485, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.792133726647001, |
| "grad_norm": 1.417540431022644, |
| "learning_rate": 6.292487511340709e-07, |
| "loss": 0.0575, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7925270403146509, |
| "grad_norm": 1.3456201553344727, |
| "learning_rate": 6.269724160972043e-07, |
| "loss": 0.0709, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7929203539823009, |
| "grad_norm": 1.3013477325439453, |
| "learning_rate": 6.246996154263421e-07, |
| "loss": 0.0571, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7933136676499508, |
| "grad_norm": 1.0679081678390503, |
| "learning_rate": 6.224303534102125e-07, |
| "loss": 0.0395, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7937069813176008, |
| "grad_norm": 1.3359334468841553, |
| "learning_rate": 6.201646343308685e-07, |
| "loss": 0.0439, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7941002949852507, |
| "grad_norm": 1.4549192190170288, |
| "learning_rate": 6.179024624636772e-07, |
| "loss": 0.057, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7944936086529006, |
| "grad_norm": 0.8267070055007935, |
| "learning_rate": 6.156438420773125e-07, |
| "loss": 0.0207, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7948869223205507, |
| "grad_norm": 1.1873496770858765, |
| "learning_rate": 6.133887774337471e-07, |
| "loss": 0.0449, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7952802359882006, |
| "grad_norm": 1.971118450164795, |
| "learning_rate": 6.111372727882417e-07, |
| "loss": 0.0444, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7956735496558506, |
| "grad_norm": 0.5039023160934448, |
| "learning_rate": 6.088893323893419e-07, |
| "loss": 0.0165, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7960668633235005, |
| "grad_norm": 1.2124491930007935, |
| "learning_rate": 6.066449604788666e-07, |
| "loss": 0.0384, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 1.4836233854293823, |
| "learning_rate": 6.044041612919016e-07, |
| "loss": 0.0711, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7968534906588004, |
| "grad_norm": 1.4890559911727905, |
| "learning_rate": 6.021669390567902e-07, |
| "loss": 0.048, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7972468043264503, |
| "grad_norm": 0.5430221557617188, |
| "learning_rate": 5.999332979951272e-07, |
| "loss": 0.049, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7976401179941003, |
| "grad_norm": 0.9645549654960632, |
| "learning_rate": 5.977032423217482e-07, |
| "loss": 0.0201, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7980334316617502, |
| "grad_norm": 1.7599254846572876, |
| "learning_rate": 5.954767762447244e-07, |
| "loss": 0.0524, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7984267453294002, |
| "grad_norm": 0.6832358241081238, |
| "learning_rate": 5.932539039653535e-07, |
| "loss": 0.0451, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7988200589970501, |
| "grad_norm": 0.5469837188720703, |
| "learning_rate": 5.910346296781511e-07, |
| "loss": 0.0342, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7992133726647, |
| "grad_norm": 1.466138482093811, |
| "learning_rate": 5.888189575708453e-07, |
| "loss": 0.0619, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7996066863323501, |
| "grad_norm": 1.1846930980682373, |
| "learning_rate": 5.866068918243634e-07, |
| "loss": 0.0527, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.8236525058746338, |
| "learning_rate": 5.843984366128308e-07, |
| "loss": 0.0427, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.80039331366765, |
| "grad_norm": 0.8086917996406555, |
| "learning_rate": 5.821935961035589e-07, |
| "loss": 0.0743, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.8007866273352999, |
| "grad_norm": 1.3642960786819458, |
| "learning_rate": 5.799923744570376e-07, |
| "loss": 0.0609, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.8011799410029499, |
| "grad_norm": 1.4578794240951538, |
| "learning_rate": 5.777947758269295e-07, |
| "loss": 0.0828, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.8015732546705998, |
| "grad_norm": 0.5745184421539307, |
| "learning_rate": 5.756008043600594e-07, |
| "loss": 0.0444, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.8019665683382498, |
| "grad_norm": 2.3881709575653076, |
| "learning_rate": 5.734104641964075e-07, |
| "loss": 0.074, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.8023598820058997, |
| "grad_norm": 1.0504474639892578, |
| "learning_rate": 5.712237594691028e-07, |
| "loss": 0.0573, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8027531956735496, |
| "grad_norm": 1.7040578126907349, |
| "learning_rate": 5.690406943044138e-07, |
| "loss": 0.0472, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.8031465093411996, |
| "grad_norm": 0.9709568619728088, |
| "learning_rate": 5.668612728217412e-07, |
| "loss": 0.0305, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.8035398230088495, |
| "grad_norm": 2.0475189685821533, |
| "learning_rate": 5.646854991336112e-07, |
| "loss": 0.0661, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.8039331366764995, |
| "grad_norm": 1.4109443426132202, |
| "learning_rate": 5.625133773456639e-07, |
| "loss": 0.0698, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.8043264503441495, |
| "grad_norm": 0.8161342740058899, |
| "learning_rate": 5.603449115566511e-07, |
| "loss": 0.0417, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.8047197640117995, |
| "grad_norm": 1.1740028858184814, |
| "learning_rate": 5.581801058584252e-07, |
| "loss": 0.0444, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.8051130776794494, |
| "grad_norm": 2.580334424972534, |
| "learning_rate": 5.560189643359312e-07, |
| "loss": 0.0988, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.8055063913470993, |
| "grad_norm": 0.8429194092750549, |
| "learning_rate": 5.538614910672005e-07, |
| "loss": 0.0312, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.8058997050147493, |
| "grad_norm": 0.8115060925483704, |
| "learning_rate": 5.517076901233434e-07, |
| "loss": 0.0561, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.8062930186823992, |
| "grad_norm": 0.5982792377471924, |
| "learning_rate": 5.495575655685382e-07, |
| "loss": 0.0369, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8066863323500492, |
| "grad_norm": 1.5597193241119385, |
| "learning_rate": 5.474111214600278e-07, |
| "loss": 0.0701, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.8070796460176991, |
| "grad_norm": 1.3873978853225708, |
| "learning_rate": 5.452683618481103e-07, |
| "loss": 0.0372, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.807472959685349, |
| "grad_norm": 0.9317770004272461, |
| "learning_rate": 5.431292907761305e-07, |
| "loss": 0.0433, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.807866273352999, |
| "grad_norm": 1.736678957939148, |
| "learning_rate": 5.409939122804736e-07, |
| "loss": 0.0562, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.8082595870206489, |
| "grad_norm": 1.1516214609146118, |
| "learning_rate": 5.388622303905558e-07, |
| "loss": 0.0438, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.8086529006882989, |
| "grad_norm": 0.855049192905426, |
| "learning_rate": 5.367342491288186e-07, |
| "loss": 0.0389, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.8090462143559489, |
| "grad_norm": 0.8584917187690735, |
| "learning_rate": 5.346099725107213e-07, |
| "loss": 0.0686, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.8094395280235989, |
| "grad_norm": 1.1630586385726929, |
| "learning_rate": 5.324894045447312e-07, |
| "loss": 0.0361, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.8098328416912488, |
| "grad_norm": 1.2655314207077026, |
| "learning_rate": 5.303725492323194e-07, |
| "loss": 0.0284, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.8102261553588987, |
| "grad_norm": 1.1947369575500488, |
| "learning_rate": 5.282594105679481e-07, |
| "loss": 0.0562, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.8106194690265487, |
| "grad_norm": 0.7869384288787842, |
| "learning_rate": 5.261499925390692e-07, |
| "loss": 0.0407, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.8110127826941986, |
| "grad_norm": 1.6076072454452515, |
| "learning_rate": 5.240442991261127e-07, |
| "loss": 0.0384, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.8114060963618486, |
| "grad_norm": 2.237993001937866, |
| "learning_rate": 5.219423343024804e-07, |
| "loss": 0.0539, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.8117994100294985, |
| "grad_norm": 0.8259546756744385, |
| "learning_rate": 5.198441020345382e-07, |
| "loss": 0.0436, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.8121927236971485, |
| "grad_norm": 1.2509441375732422, |
| "learning_rate": 5.177496062816101e-07, |
| "loss": 0.0462, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.8125860373647984, |
| "grad_norm": 1.06137216091156, |
| "learning_rate": 5.156588509959659e-07, |
| "loss": 0.0339, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.8129793510324483, |
| "grad_norm": 0.7373847365379333, |
| "learning_rate": 5.13571840122821e-07, |
| "loss": 0.0301, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.8133726647000983, |
| "grad_norm": 1.1653954982757568, |
| "learning_rate": 5.114885776003234e-07, |
| "loss": 0.0427, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.8137659783677483, |
| "grad_norm": 1.518700122833252, |
| "learning_rate": 5.094090673595478e-07, |
| "loss": 0.0568, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.8141592920353983, |
| "grad_norm": 0.9491556286811829, |
| "learning_rate": 5.073333133244896e-07, |
| "loss": 0.0296, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.8145526057030482, |
| "grad_norm": 1.12187922000885, |
| "learning_rate": 5.052613194120554e-07, |
| "loss": 0.0625, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.8149459193706982, |
| "grad_norm": 0.9381184577941895, |
| "learning_rate": 5.031930895320569e-07, |
| "loss": 0.0318, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.8153392330383481, |
| "grad_norm": 0.8680362701416016, |
| "learning_rate": 5.011286275872021e-07, |
| "loss": 0.0631, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.815732546705998, |
| "grad_norm": 1.5543493032455444, |
| "learning_rate": 4.990679374730905e-07, |
| "loss": 0.0754, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.816125860373648, |
| "grad_norm": 1.3975200653076172, |
| "learning_rate": 4.970110230782035e-07, |
| "loss": 0.072, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.8165191740412979, |
| "grad_norm": 0.8037746548652649, |
| "learning_rate": 4.949578882838982e-07, |
| "loss": 0.0385, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.8169124877089479, |
| "grad_norm": 0.7833993434906006, |
| "learning_rate": 4.929085369643988e-07, |
| "loss": 0.0418, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.8173058013765978, |
| "grad_norm": 0.8177001476287842, |
| "learning_rate": 4.908629729867908e-07, |
| "loss": 0.0485, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.8176991150442477, |
| "grad_norm": 0.7933450937271118, |
| "learning_rate": 4.88821200211014e-07, |
| "loss": 0.0466, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.8180924287118977, |
| "grad_norm": 0.5968790054321289, |
| "learning_rate": 4.867832224898517e-07, |
| "loss": 0.0253, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8184857423795477, |
| "grad_norm": 1.4022417068481445, |
| "learning_rate": 4.847490436689281e-07, |
| "loss": 0.0431, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.8188790560471977, |
| "grad_norm": 2.319401264190674, |
| "learning_rate": 4.827186675866985e-07, |
| "loss": 0.0493, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.8192723697148476, |
| "grad_norm": 1.0119627714157104, |
| "learning_rate": 4.806920980744426e-07, |
| "loss": 0.0606, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.8196656833824976, |
| "grad_norm": 1.2110787630081177, |
| "learning_rate": 4.786693389562566e-07, |
| "loss": 0.0582, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.8200589970501475, |
| "grad_norm": 0.7724167704582214, |
| "learning_rate": 4.7665039404904747e-07, |
| "loss": 0.0457, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.8204523107177975, |
| "grad_norm": 1.5843499898910522, |
| "learning_rate": 4.746352671625237e-07, |
| "loss": 0.0482, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.8208456243854474, |
| "grad_norm": 1.3220843076705933, |
| "learning_rate": 4.72623962099191e-07, |
| "loss": 0.0505, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.8212389380530973, |
| "grad_norm": 1.6696242094039917, |
| "learning_rate": 4.7061648265434053e-07, |
| "loss": 0.0587, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.8216322517207473, |
| "grad_norm": 1.341960072517395, |
| "learning_rate": 4.6861283261604745e-07, |
| "loss": 0.0781, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.8220255653883972, |
| "grad_norm": 1.6525554656982422, |
| "learning_rate": 4.666130157651594e-07, |
| "loss": 0.052, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8224188790560472, |
| "grad_norm": 1.0084091424942017, |
| "learning_rate": 4.6461703587529106e-07, |
| "loss": 0.0354, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.8228121927236971, |
| "grad_norm": 0.8987352848052979, |
| "learning_rate": 4.62624896712818e-07, |
| "loss": 0.0351, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.8232055063913472, |
| "grad_norm": 1.0085314512252808, |
| "learning_rate": 4.6063660203686635e-07, |
| "loss": 0.0459, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.8235988200589971, |
| "grad_norm": 1.4987783432006836, |
| "learning_rate": 4.586521555993087e-07, |
| "loss": 0.0771, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.823992133726647, |
| "grad_norm": 1.5976486206054688, |
| "learning_rate": 4.5667156114475695e-07, |
| "loss": 0.0766, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.824385447394297, |
| "grad_norm": 0.9721060395240784, |
| "learning_rate": 4.5469482241055324e-07, |
| "loss": 0.0514, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.8247787610619469, |
| "grad_norm": 0.835397481918335, |
| "learning_rate": 4.527219431267646e-07, |
| "loss": 0.0352, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.8251720747295969, |
| "grad_norm": 1.1280697584152222, |
| "learning_rate": 4.507529270161759e-07, |
| "loss": 0.0712, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.8255653883972468, |
| "grad_norm": 1.8154939413070679, |
| "learning_rate": 4.4878777779428034e-07, |
| "loss": 0.0918, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.8259587020648967, |
| "grad_norm": 1.067765474319458, |
| "learning_rate": 4.4682649916927614e-07, |
| "loss": 0.0357, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8263520157325467, |
| "grad_norm": 1.0095484256744385, |
| "learning_rate": 4.4486909484205725e-07, |
| "loss": 0.0315, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.8267453294001966, |
| "grad_norm": 1.7903807163238525, |
| "learning_rate": 4.429155685062073e-07, |
| "loss": 0.0598, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.8271386430678466, |
| "grad_norm": 1.5948070287704468, |
| "learning_rate": 4.409659238479919e-07, |
| "loss": 0.0408, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.8275319567354965, |
| "grad_norm": 0.805156946182251, |
| "learning_rate": 4.39020164546351e-07, |
| "loss": 0.0448, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.8279252704031466, |
| "grad_norm": 0.4440039098262787, |
| "learning_rate": 4.370782942728946e-07, |
| "loss": 0.0279, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8283185840707965, |
| "grad_norm": 0.9887676239013672, |
| "learning_rate": 4.3514031669189325e-07, |
| "loss": 0.0706, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.8287118977384464, |
| "grad_norm": 1.1825933456420898, |
| "learning_rate": 4.3320623546027283e-07, |
| "loss": 0.0608, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.8291052114060964, |
| "grad_norm": 1.8713337182998657, |
| "learning_rate": 4.312760542276059e-07, |
| "loss": 0.049, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.8294985250737463, |
| "grad_norm": 0.9182631969451904, |
| "learning_rate": 4.293497766361068e-07, |
| "loss": 0.0436, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.8298918387413963, |
| "grad_norm": 1.1083096265792847, |
| "learning_rate": 4.2742740632062243e-07, |
| "loss": 0.0483, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8302851524090462, |
| "grad_norm": 2.0837628841400146, |
| "learning_rate": 4.255089469086279e-07, |
| "loss": 0.0663, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.8306784660766962, |
| "grad_norm": 1.2065215110778809, |
| "learning_rate": 4.235944020202182e-07, |
| "loss": 0.0673, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.8310717797443461, |
| "grad_norm": 1.3495663404464722, |
| "learning_rate": 4.216837752681019e-07, |
| "loss": 0.0589, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.831465093411996, |
| "grad_norm": 0.8407555818557739, |
| "learning_rate": 4.19777070257594e-07, |
| "loss": 0.0309, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.831858407079646, |
| "grad_norm": 0.9763451814651489, |
| "learning_rate": 4.1787429058660845e-07, |
| "loss": 0.0231, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8322517207472959, |
| "grad_norm": 1.1487807035446167, |
| "learning_rate": 4.159754398456531e-07, |
| "loss": 0.0582, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.832645034414946, |
| "grad_norm": 0.9778567552566528, |
| "learning_rate": 4.14080521617822e-07, |
| "loss": 0.0349, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.8330383480825959, |
| "grad_norm": 1.1251294612884521, |
| "learning_rate": 4.121895394787881e-07, |
| "loss": 0.0608, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.8334316617502459, |
| "grad_norm": 0.8375036716461182, |
| "learning_rate": 4.103024969967981e-07, |
| "loss": 0.0406, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.8338249754178958, |
| "grad_norm": 1.1409391164779663, |
| "learning_rate": 4.084193977326625e-07, |
| "loss": 0.0545, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8342182890855457, |
| "grad_norm": 1.0144537687301636, |
| "learning_rate": 4.0654024523975323e-07, |
| "loss": 0.076, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.8346116027531957, |
| "grad_norm": 1.7752301692962646, |
| "learning_rate": 4.0466504306399366e-07, |
| "loss": 0.0647, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.8350049164208456, |
| "grad_norm": 1.1848422288894653, |
| "learning_rate": 4.027937947438532e-07, |
| "loss": 0.0642, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.8353982300884956, |
| "grad_norm": 0.8530738353729248, |
| "learning_rate": 4.009265038103402e-07, |
| "loss": 0.0407, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.8357915437561455, |
| "grad_norm": 0.9213998317718506, |
| "learning_rate": 3.9906317378699684e-07, |
| "loss": 0.0306, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8361848574237954, |
| "grad_norm": 0.8134070038795471, |
| "learning_rate": 3.972038081898885e-07, |
| "loss": 0.0378, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.8365781710914454, |
| "grad_norm": 1.0904289484024048, |
| "learning_rate": 3.9534841052760174e-07, |
| "loss": 0.032, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.8369714847590953, |
| "grad_norm": 2.0691423416137695, |
| "learning_rate": 3.9349698430123566e-07, |
| "loss": 0.0737, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.8373647984267454, |
| "grad_norm": 1.1641324758529663, |
| "learning_rate": 3.9164953300439456e-07, |
| "loss": 0.0546, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.8377581120943953, |
| "grad_norm": 0.9116164445877075, |
| "learning_rate": 3.898060601231832e-07, |
| "loss": 0.0533, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8381514257620453, |
| "grad_norm": 1.0761325359344482, |
| "learning_rate": 3.879665691361975e-07, |
| "loss": 0.0465, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.8385447394296952, |
| "grad_norm": 1.2517597675323486, |
| "learning_rate": 3.861310635145207e-07, |
| "loss": 0.0509, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.8389380530973451, |
| "grad_norm": 0.7470773458480835, |
| "learning_rate": 3.8429954672171613e-07, |
| "loss": 0.0452, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.8393313667649951, |
| "grad_norm": 1.572190284729004, |
| "learning_rate": 3.824720222138192e-07, |
| "loss": 0.0388, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.839724680432645, |
| "grad_norm": 1.1324615478515625, |
| "learning_rate": 3.806484934393331e-07, |
| "loss": 0.0696, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.840117994100295, |
| "grad_norm": 1.03518807888031, |
| "learning_rate": 3.788289638392206e-07, |
| "loss": 0.0333, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.8405113077679449, |
| "grad_norm": 1.2855054140090942, |
| "learning_rate": 3.7701343684689725e-07, |
| "loss": 0.0573, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.8409046214355949, |
| "grad_norm": 1.5672320127487183, |
| "learning_rate": 3.7520191588822695e-07, |
| "loss": 0.0618, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.8412979351032448, |
| "grad_norm": 1.3046908378601074, |
| "learning_rate": 3.7339440438151383e-07, |
| "loss": 0.0633, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.8416912487708947, |
| "grad_norm": 0.9728895425796509, |
| "learning_rate": 3.7159090573749693e-07, |
| "loss": 0.0287, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8420845624385448, |
| "grad_norm": 1.4470866918563843, |
| "learning_rate": 3.6979142335934246e-07, |
| "loss": 0.0439, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.8424778761061947, |
| "grad_norm": 0.802937924861908, |
| "learning_rate": 3.67995960642637e-07, |
| "loss": 0.0316, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.8428711897738447, |
| "grad_norm": 0.8089593052864075, |
| "learning_rate": 3.6620452097538424e-07, |
| "loss": 0.0506, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.8432645034414946, |
| "grad_norm": 0.9571702480316162, |
| "learning_rate": 3.644171077379949e-07, |
| "loss": 0.0273, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.8436578171091446, |
| "grad_norm": 1.022767186164856, |
| "learning_rate": 3.6263372430328266e-07, |
| "loss": 0.0497, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8440511307767945, |
| "grad_norm": 1.133183479309082, |
| "learning_rate": 3.6085437403645645e-07, |
| "loss": 0.0375, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 1.603365421295166, |
| "learning_rate": 3.5907906029511606e-07, |
| "loss": 0.0535, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.8448377581120944, |
| "grad_norm": 1.052833080291748, |
| "learning_rate": 3.573077864292421e-07, |
| "loss": 0.0419, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.8452310717797443, |
| "grad_norm": 0.8957949280738831, |
| "learning_rate": 3.555405557811936e-07, |
| "loss": 0.054, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.8456243854473943, |
| "grad_norm": 1.3401049375534058, |
| "learning_rate": 3.537773716857004e-07, |
| "loss": 0.0558, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8460176991150442, |
| "grad_norm": 1.3811299800872803, |
| "learning_rate": 3.5201823746985554e-07, |
| "loss": 0.0436, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.8464110127826941, |
| "grad_norm": 1.3221920728683472, |
| "learning_rate": 3.5026315645311114e-07, |
| "loss": 0.0679, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.8468043264503442, |
| "grad_norm": 0.608182966709137, |
| "learning_rate": 3.485121319472695e-07, |
| "loss": 0.0624, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.8471976401179941, |
| "grad_norm": 0.8964172601699829, |
| "learning_rate": 3.4676516725647953e-07, |
| "loss": 0.0394, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.8475909537856441, |
| "grad_norm": 0.7584964632987976, |
| "learning_rate": 3.450222656772292e-07, |
| "loss": 0.0484, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.847984267453294, |
| "grad_norm": 0.3789440095424652, |
| "learning_rate": 3.43283430498339e-07, |
| "loss": 0.0277, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.848377581120944, |
| "grad_norm": 0.7871941924095154, |
| "learning_rate": 3.4154866500095695e-07, |
| "loss": 0.0493, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.8487708947885939, |
| "grad_norm": 1.302708625793457, |
| "learning_rate": 3.3981797245855096e-07, |
| "loss": 0.0799, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.8491642084562439, |
| "grad_norm": 0.7635212540626526, |
| "learning_rate": 3.380913561369037e-07, |
| "loss": 0.0427, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.8605564832687378, |
| "learning_rate": 3.363688192941067e-07, |
| "loss": 0.0462, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8499508357915437, |
| "grad_norm": 0.9630613923072815, |
| "learning_rate": 3.346503651805513e-07, |
| "loss": 0.0637, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.8503441494591937, |
| "grad_norm": 1.0170080661773682, |
| "learning_rate": 3.329359970389279e-07, |
| "loss": 0.061, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.8507374631268436, |
| "grad_norm": 0.8377442359924316, |
| "learning_rate": 3.312257181042142e-07, |
| "loss": 0.0449, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.8511307767944936, |
| "grad_norm": 0.9564546346664429, |
| "learning_rate": 3.2951953160367365e-07, |
| "loss": 0.0496, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.8515240904621436, |
| "grad_norm": 0.5969823002815247, |
| "learning_rate": 3.2781744075684576e-07, |
| "loss": 0.0404, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8519174041297936, |
| "grad_norm": 1.0183027982711792, |
| "learning_rate": 3.261194487755426e-07, |
| "loss": 0.0563, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.8523107177974435, |
| "grad_norm": 1.3610613346099854, |
| "learning_rate": 3.2442555886384145e-07, |
| "loss": 0.0791, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.8527040314650934, |
| "grad_norm": 0.7566685080528259, |
| "learning_rate": 3.2273577421807976e-07, |
| "loss": 0.0415, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.8530973451327434, |
| "grad_norm": 1.1211597919464111, |
| "learning_rate": 3.2105009802684636e-07, |
| "loss": 0.0874, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.8534906588003933, |
| "grad_norm": 1.6669408082962036, |
| "learning_rate": 3.1936853347097923e-07, |
| "loss": 0.0521, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8538839724680433, |
| "grad_norm": 0.9726613163948059, |
| "learning_rate": 3.1769108372355804e-07, |
| "loss": 0.0457, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.8542772861356932, |
| "grad_norm": 1.5157469511032104, |
| "learning_rate": 3.1601775194989693e-07, |
| "loss": 0.0574, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.8546705998033431, |
| "grad_norm": 2.319978713989258, |
| "learning_rate": 3.143485413075398e-07, |
| "loss": 0.0604, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.8550639134709931, |
| "grad_norm": 1.160510778427124, |
| "learning_rate": 3.1268345494625486e-07, |
| "loss": 0.0454, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.855457227138643, |
| "grad_norm": 1.0284311771392822, |
| "learning_rate": 3.1102249600802573e-07, |
| "loss": 0.0375, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.855850540806293, |
| "grad_norm": 0.7068095207214355, |
| "learning_rate": 3.093656676270501e-07, |
| "loss": 0.0409, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.856243854473943, |
| "grad_norm": 0.8698954582214355, |
| "learning_rate": 3.0771297292972986e-07, |
| "loss": 0.0547, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.856637168141593, |
| "grad_norm": 0.7371048331260681, |
| "learning_rate": 3.0606441503466753e-07, |
| "loss": 0.0661, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.8570304818092429, |
| "grad_norm": 0.6116827726364136, |
| "learning_rate": 3.044199970526593e-07, |
| "loss": 0.0199, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.8574237954768928, |
| "grad_norm": 0.9910300374031067, |
| "learning_rate": 3.027797220866896e-07, |
| "loss": 0.0454, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8578171091445428, |
| "grad_norm": 0.9253597855567932, |
| "learning_rate": 3.01143593231924e-07, |
| "loss": 0.0465, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.8582104228121927, |
| "grad_norm": 0.6476548314094543, |
| "learning_rate": 2.995116135757059e-07, |
| "loss": 0.0385, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.8586037364798427, |
| "grad_norm": 0.8749169707298279, |
| "learning_rate": 2.978837861975484e-07, |
| "loss": 0.0474, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.8589970501474926, |
| "grad_norm": 1.4006898403167725, |
| "learning_rate": 2.962601141691296e-07, |
| "loss": 0.0511, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.8593903638151426, |
| "grad_norm": 0.8508985638618469, |
| "learning_rate": 2.9464060055428703e-07, |
| "loss": 0.0549, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8597836774827925, |
| "grad_norm": 1.1002285480499268, |
| "learning_rate": 2.930252484090101e-07, |
| "loss": 0.0283, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.8601769911504424, |
| "grad_norm": 0.8702027201652527, |
| "learning_rate": 2.9141406078143644e-07, |
| "loss": 0.0605, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.8605703048180924, |
| "grad_norm": 0.79606693983078, |
| "learning_rate": 2.8980704071184557e-07, |
| "loss": 0.0598, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.8609636184857424, |
| "grad_norm": 1.1964335441589355, |
| "learning_rate": 2.882041912326525e-07, |
| "loss": 0.046, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.8613569321533924, |
| "grad_norm": 1.1686105728149414, |
| "learning_rate": 2.8660551536840277e-07, |
| "loss": 0.0329, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8617502458210423, |
| "grad_norm": 0.858632504940033, |
| "learning_rate": 2.8501101613576526e-07, |
| "loss": 0.0661, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.8621435594886923, |
| "grad_norm": 0.984893262386322, |
| "learning_rate": 2.834206965435293e-07, |
| "loss": 0.0351, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.8625368731563422, |
| "grad_norm": 1.3127596378326416, |
| "learning_rate": 2.818345595925959e-07, |
| "loss": 0.0387, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.8629301868239921, |
| "grad_norm": 1.4564718008041382, |
| "learning_rate": 2.8025260827597463e-07, |
| "loss": 0.0424, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.8633235004916421, |
| "grad_norm": 0.5872806310653687, |
| "learning_rate": 2.7867484557877607e-07, |
| "loss": 0.0414, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.863716814159292, |
| "grad_norm": 1.0555849075317383, |
| "learning_rate": 2.7710127447820783e-07, |
| "loss": 0.0519, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.864110127826942, |
| "grad_norm": 1.0422883033752441, |
| "learning_rate": 2.7553189794356615e-07, |
| "loss": 0.0562, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.8645034414945919, |
| "grad_norm": 1.2551977634429932, |
| "learning_rate": 2.739667189362347e-07, |
| "loss": 0.0344, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.8648967551622418, |
| "grad_norm": 1.0713584423065186, |
| "learning_rate": 2.724057404096744e-07, |
| "loss": 0.0385, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.8652900688298918, |
| "grad_norm": 0.6667132377624512, |
| "learning_rate": 2.708489653094218e-07, |
| "loss": 0.0525, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8656833824975418, |
| "grad_norm": 0.9178755283355713, |
| "learning_rate": 2.692963965730805e-07, |
| "loss": 0.0722, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.8660766961651918, |
| "grad_norm": 1.2695622444152832, |
| "learning_rate": 2.677480371303162e-07, |
| "loss": 0.0759, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.8664700098328417, |
| "grad_norm": 1.1370331048965454, |
| "learning_rate": 2.662038899028532e-07, |
| "loss": 0.0396, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.8668633235004917, |
| "grad_norm": 0.6956948041915894, |
| "learning_rate": 2.6466395780446657e-07, |
| "loss": 0.062, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.8672566371681416, |
| "grad_norm": 0.5956060886383057, |
| "learning_rate": 2.6312824374097794e-07, |
| "loss": 0.049, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8676499508357916, |
| "grad_norm": 3.8347904682159424, |
| "learning_rate": 2.6159675061024905e-07, |
| "loss": 0.0654, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.8680432645034415, |
| "grad_norm": 1.0327752828598022, |
| "learning_rate": 2.6006948130217815e-07, |
| "loss": 0.024, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.8684365781710914, |
| "grad_norm": 1.1763917207717896, |
| "learning_rate": 2.585464386986908e-07, |
| "loss": 0.0487, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.8688298918387414, |
| "grad_norm": 1.6335638761520386, |
| "learning_rate": 2.570276256737386e-07, |
| "loss": 0.0451, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.8692232055063913, |
| "grad_norm": 1.1163750886917114, |
| "learning_rate": 2.555130450932922e-07, |
| "loss": 0.072, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8696165191740413, |
| "grad_norm": 1.2412861585617065, |
| "learning_rate": 2.54002699815335e-07, |
| "loss": 0.0541, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.8700098328416912, |
| "grad_norm": 0.9547197222709656, |
| "learning_rate": 2.52496592689859e-07, |
| "loss": 0.04, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.8704031465093413, |
| "grad_norm": 1.4851540327072144, |
| "learning_rate": 2.5099472655885777e-07, |
| "loss": 0.0602, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.8707964601769912, |
| "grad_norm": 0.9040324687957764, |
| "learning_rate": 2.4949710425632353e-07, |
| "loss": 0.0395, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.8711897738446411, |
| "grad_norm": 1.1058231592178345, |
| "learning_rate": 2.4800372860823956e-07, |
| "loss": 0.0472, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8715830875122911, |
| "grad_norm": 0.814282238483429, |
| "learning_rate": 2.465146024325765e-07, |
| "loss": 0.0541, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.871976401179941, |
| "grad_norm": 0.9722008109092712, |
| "learning_rate": 2.4502972853928606e-07, |
| "loss": 0.0581, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.872369714847591, |
| "grad_norm": 0.9943141341209412, |
| "learning_rate": 2.435491097302961e-07, |
| "loss": 0.0435, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.8727630285152409, |
| "grad_norm": 1.2543455362319946, |
| "learning_rate": 2.420727487995045e-07, |
| "loss": 0.0613, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.8731563421828908, |
| "grad_norm": 0.8473043441772461, |
| "learning_rate": 2.40600648532775e-07, |
| "loss": 0.0391, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8735496558505408, |
| "grad_norm": 1.0976766347885132, |
| "learning_rate": 2.3913281170793196e-07, |
| "loss": 0.0341, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.8739429695181907, |
| "grad_norm": 0.765153169631958, |
| "learning_rate": 2.376692410947548e-07, |
| "loss": 0.0335, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.8743362831858407, |
| "grad_norm": 1.2966009378433228, |
| "learning_rate": 2.3620993945497217e-07, |
| "loss": 0.0571, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.8747295968534906, |
| "grad_norm": 1.0903987884521484, |
| "learning_rate": 2.347549095422569e-07, |
| "loss": 0.0602, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8751229105211407, |
| "grad_norm": 0.9129044413566589, |
| "learning_rate": 2.3330415410222212e-07, |
| "loss": 0.0508, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8755162241887906, |
| "grad_norm": 1.3771973848342896, |
| "learning_rate": 2.3185767587241447e-07, |
| "loss": 0.0282, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8759095378564405, |
| "grad_norm": 1.1595170497894287, |
| "learning_rate": 2.3041547758230977e-07, |
| "loss": 0.0768, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.8763028515240905, |
| "grad_norm": 0.7576168775558472, |
| "learning_rate": 2.2897756195330773e-07, |
| "loss": 0.0296, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.8766961651917404, |
| "grad_norm": 1.2020797729492188, |
| "learning_rate": 2.2754393169872685e-07, |
| "loss": 0.0392, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8770894788593904, |
| "grad_norm": 1.2221319675445557, |
| "learning_rate": 2.2611458952379872e-07, |
| "loss": 0.0319, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8774827925270403, |
| "grad_norm": 1.1023682355880737, |
| "learning_rate": 2.246895381256639e-07, |
| "loss": 0.0523, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.8778761061946903, |
| "grad_norm": 1.0071845054626465, |
| "learning_rate": 2.232687801933664e-07, |
| "loss": 0.034, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8782694198623402, |
| "grad_norm": 0.8645428419113159, |
| "learning_rate": 2.2185231840784778e-07, |
| "loss": 0.0628, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8786627335299901, |
| "grad_norm": 0.6460661292076111, |
| "learning_rate": 2.204401554419444e-07, |
| "loss": 0.045, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8790560471976401, |
| "grad_norm": 1.7761812210083008, |
| "learning_rate": 2.1903229396037896e-07, |
| "loss": 0.0739, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.87944936086529, |
| "grad_norm": 1.3595634698867798, |
| "learning_rate": 2.1762873661975825e-07, |
| "loss": 0.041, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.8798426745329401, |
| "grad_norm": 0.8807711601257324, |
| "learning_rate": 2.1622948606856765e-07, |
| "loss": 0.0623, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.88023598820059, |
| "grad_norm": 1.0638388395309448, |
| "learning_rate": 2.1483454494716504e-07, |
| "loss": 0.0337, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.88062930186824, |
| "grad_norm": 0.9859362244606018, |
| "learning_rate": 2.1344391588777658e-07, |
| "loss": 0.0389, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8810226155358899, |
| "grad_norm": 1.0022567510604858, |
| "learning_rate": 2.1205760151449206e-07, |
| "loss": 0.0358, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8814159292035398, |
| "grad_norm": 0.8748469948768616, |
| "learning_rate": 2.106756044432598e-07, |
| "loss": 0.0367, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.8818092428711898, |
| "grad_norm": 1.0613561868667603, |
| "learning_rate": 2.0929792728187986e-07, |
| "loss": 0.0608, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.8822025565388397, |
| "grad_norm": 1.8184490203857422, |
| "learning_rate": 2.079245726300022e-07, |
| "loss": 0.0586, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8825958702064897, |
| "grad_norm": 1.0881813764572144, |
| "learning_rate": 2.0655554307911997e-07, |
| "loss": 0.0603, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8829891838741396, |
| "grad_norm": 1.0074139833450317, |
| "learning_rate": 2.05190841212565e-07, |
| "loss": 0.0666, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8833824975417895, |
| "grad_norm": 1.1435564756393433, |
| "learning_rate": 2.038304696055024e-07, |
| "loss": 0.0312, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8837758112094395, |
| "grad_norm": 0.6284701228141785, |
| "learning_rate": 2.0247443082492686e-07, |
| "loss": 0.0235, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.8841691248770894, |
| "grad_norm": 1.6139885187149048, |
| "learning_rate": 2.0112272742965678e-07, |
| "loss": 0.0262, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8845624385447395, |
| "grad_norm": 0.8762457966804504, |
| "learning_rate": 1.997753619703291e-07, |
| "loss": 0.0431, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8849557522123894, |
| "grad_norm": 1.287406086921692, |
| "learning_rate": 1.9843233698939617e-07, |
| "loss": 0.0457, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8853490658800394, |
| "grad_norm": 1.3118491172790527, |
| "learning_rate": 1.9709365502111944e-07, |
| "loss": 0.0487, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8857423795476893, |
| "grad_norm": 0.8101546764373779, |
| "learning_rate": 1.957593185915657e-07, |
| "loss": 0.0458, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.8861356932153392, |
| "grad_norm": 1.5364015102386475, |
| "learning_rate": 1.9442933021860095e-07, |
| "loss": 0.0407, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.8865290068829892, |
| "grad_norm": 0.9168291091918945, |
| "learning_rate": 1.9310369241188732e-07, |
| "loss": 0.0474, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.8869223205506391, |
| "grad_norm": 1.0423481464385986, |
| "learning_rate": 1.9178240767287666e-07, |
| "loss": 0.035, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8873156342182891, |
| "grad_norm": 0.995087742805481, |
| "learning_rate": 1.904654784948079e-07, |
| "loss": 0.0596, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.887708947885939, |
| "grad_norm": 1.1472982168197632, |
| "learning_rate": 1.8915290736269965e-07, |
| "loss": 0.069, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.888102261553589, |
| "grad_norm": 0.7572572231292725, |
| "learning_rate": 1.878446967533476e-07, |
| "loss": 0.061, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.8884955752212389, |
| "grad_norm": 0.5118011832237244, |
| "learning_rate": 1.865408491353199e-07, |
| "loss": 0.0313, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.8399426937103271, |
| "learning_rate": 1.8524136696895068e-07, |
| "loss": 0.0444, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8892822025565389, |
| "grad_norm": 0.8290569186210632, |
| "learning_rate": 1.8394625270633793e-07, |
| "loss": 0.0384, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8896755162241888, |
| "grad_norm": 1.0309621095657349, |
| "learning_rate": 1.8265550879133538e-07, |
| "loss": 0.0522, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8900688298918388, |
| "grad_norm": 2.102466583251953, |
| "learning_rate": 1.8136913765955195e-07, |
| "loss": 0.0684, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.8904621435594887, |
| "grad_norm": 0.9560519456863403, |
| "learning_rate": 1.8008714173834456e-07, |
| "loss": 0.0411, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.8908554572271387, |
| "grad_norm": 0.7714261412620544, |
| "learning_rate": 1.7880952344681402e-07, |
| "loss": 0.0393, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8912487708947886, |
| "grad_norm": 2.210777521133423, |
| "learning_rate": 1.7753628519580097e-07, |
| "loss": 0.0531, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8916420845624385, |
| "grad_norm": 1.3124444484710693, |
| "learning_rate": 1.7626742938788105e-07, |
| "loss": 0.0808, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8920353982300885, |
| "grad_norm": 0.8876106142997742, |
| "learning_rate": 1.7500295841735905e-07, |
| "loss": 0.0299, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8924287118977384, |
| "grad_norm": 0.9470813870429993, |
| "learning_rate": 1.7374287467026767e-07, |
| "loss": 0.0289, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.8928220255653884, |
| "grad_norm": 1.1278401613235474, |
| "learning_rate": 1.7248718052435942e-07, |
| "loss": 0.0557, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8932153392330383, |
| "grad_norm": 1.0883233547210693, |
| "learning_rate": 1.712358783491047e-07, |
| "loss": 0.0493, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.8936086529006882, |
| "grad_norm": 1.8595354557037354, |
| "learning_rate": 1.6998897050568618e-07, |
| "loss": 0.0583, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8940019665683383, |
| "grad_norm": 1.1858155727386475, |
| "learning_rate": 1.6874645934699342e-07, |
| "loss": 0.0406, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.8943952802359882, |
| "grad_norm": 0.8429166674613953, |
| "learning_rate": 1.6750834721762117e-07, |
| "loss": 0.0575, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8947885939036382, |
| "grad_norm": 1.4577648639678955, |
| "learning_rate": 1.6627463645386199e-07, |
| "loss": 0.0412, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8951819075712881, |
| "grad_norm": 0.6947933435440063, |
| "learning_rate": 1.6504532938370427e-07, |
| "loss": 0.0465, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.8955752212389381, |
| "grad_norm": 0.8350834846496582, |
| "learning_rate": 1.6382042832682577e-07, |
| "loss": 0.0438, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.895968534906588, |
| "grad_norm": 1.2530003786087036, |
| "learning_rate": 1.6259993559459091e-07, |
| "loss": 0.0415, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.896361848574238, |
| "grad_norm": 1.0597574710845947, |
| "learning_rate": 1.613838534900447e-07, |
| "loss": 0.0399, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.8967551622418879, |
| "grad_norm": 0.8264654278755188, |
| "learning_rate": 1.601721843079107e-07, |
| "loss": 0.0348, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8971484759095378, |
| "grad_norm": 0.8567057251930237, |
| "learning_rate": 1.5896493033458416e-07, |
| "loss": 0.029, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.8975417895771878, |
| "grad_norm": 1.390363335609436, |
| "learning_rate": 1.5776209384812946e-07, |
| "loss": 0.0815, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8979351032448377, |
| "grad_norm": 0.9575844407081604, |
| "learning_rate": 1.5656367711827602e-07, |
| "loss": 0.0526, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.8983284169124877, |
| "grad_norm": 0.7833372950553894, |
| "learning_rate": 1.553696824064116e-07, |
| "loss": 0.0329, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.8987217305801377, |
| "grad_norm": 0.8829760551452637, |
| "learning_rate": 1.5418011196558085e-07, |
| "loss": 0.0395, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8991150442477877, |
| "grad_norm": 1.0580815076828003, |
| "learning_rate": 1.529949680404799e-07, |
| "loss": 0.0648, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.8995083579154376, |
| "grad_norm": 1.051527738571167, |
| "learning_rate": 1.5181425286745155e-07, |
| "loss": 0.0618, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.8999016715830875, |
| "grad_norm": 1.5211282968521118, |
| "learning_rate": 1.5063796867448243e-07, |
| "loss": 0.047, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.9002949852507375, |
| "grad_norm": 0.3931565582752228, |
| "learning_rate": 1.4946611768119763e-07, |
| "loss": 0.0371, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.9006882989183874, |
| "grad_norm": 0.40819835662841797, |
| "learning_rate": 1.4829870209885605e-07, |
| "loss": 0.0399, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.9010816125860374, |
| "grad_norm": 1.5606259107589722, |
| "learning_rate": 1.471357241303481e-07, |
| "loss": 0.0537, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.9014749262536873, |
| "grad_norm": 0.4650862514972687, |
| "learning_rate": 1.4597718597019055e-07, |
| "loss": 0.0169, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.9018682399213372, |
| "grad_norm": 0.8470922112464905, |
| "learning_rate": 1.4482308980452164e-07, |
| "loss": 0.0308, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.9022615535889872, |
| "grad_norm": 1.1515922546386719, |
| "learning_rate": 1.436734378110985e-07, |
| "loss": 0.0459, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.9026548672566371, |
| "grad_norm": 1.0158207416534424, |
| "learning_rate": 1.425282321592908e-07, |
| "loss": 0.0667, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.9030481809242871, |
| "grad_norm": 0.6387980580329895, |
| "learning_rate": 1.4138747501007966e-07, |
| "loss": 0.0419, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.9034414945919371, |
| "grad_norm": 1.8949992656707764, |
| "learning_rate": 1.4025116851605125e-07, |
| "loss": 0.0556, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.9038348082595871, |
| "grad_norm": 0.8390710949897766, |
| "learning_rate": 1.3911931482139317e-07, |
| "loss": 0.0322, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.904228121927237, |
| "grad_norm": 0.6234549880027771, |
| "learning_rate": 1.379919160618909e-07, |
| "loss": 0.0334, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.904621435594887, |
| "grad_norm": 1.1114718914031982, |
| "learning_rate": 1.368689743649243e-07, |
| "loss": 0.0536, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9050147492625369, |
| "grad_norm": 0.7461351752281189, |
| "learning_rate": 1.3575049184946122e-07, |
| "loss": 0.0371, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.9054080629301868, |
| "grad_norm": 0.9355785250663757, |
| "learning_rate": 1.346364706260564e-07, |
| "loss": 0.0296, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.9058013765978368, |
| "grad_norm": 0.5872256755828857, |
| "learning_rate": 1.3352691279684582e-07, |
| "loss": 0.0281, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.9061946902654867, |
| "grad_norm": 1.7544050216674805, |
| "learning_rate": 1.324218204555433e-07, |
| "loss": 0.056, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.9065880039331367, |
| "grad_norm": 0.6219866871833801, |
| "learning_rate": 1.3132119568743662e-07, |
| "loss": 0.0288, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.9069813176007866, |
| "grad_norm": 1.4340651035308838, |
| "learning_rate": 1.3022504056938196e-07, |
| "loss": 0.0504, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.9073746312684365, |
| "grad_norm": 0.5100427269935608, |
| "learning_rate": 1.2913335716980307e-07, |
| "loss": 0.0473, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.9077679449360865, |
| "grad_norm": 0.650513768196106, |
| "learning_rate": 1.2804614754868466e-07, |
| "loss": 0.0537, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.9081612586037365, |
| "grad_norm": 1.4720587730407715, |
| "learning_rate": 1.2696341375756982e-07, |
| "loss": 0.043, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.9085545722713865, |
| "grad_norm": 1.7473880052566528, |
| "learning_rate": 1.2588515783955564e-07, |
| "loss": 0.0551, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.9089478859390364, |
| "grad_norm": 0.7824367880821228, |
| "learning_rate": 1.2481138182929065e-07, |
| "loss": 0.0299, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.9093411996066864, |
| "grad_norm": 1.2818101644515991, |
| "learning_rate": 1.2374208775296742e-07, |
| "loss": 0.0664, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.9097345132743363, |
| "grad_norm": 1.6559642553329468, |
| "learning_rate": 1.2267727762832388e-07, |
| "loss": 0.0667, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.9101278269419862, |
| "grad_norm": 0.8255678415298462, |
| "learning_rate": 1.2161695346463498e-07, |
| "loss": 0.042, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.9105211406096362, |
| "grad_norm": 0.7617945075035095, |
| "learning_rate": 1.2056111726271192e-07, |
| "loss": 0.0464, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.9109144542772861, |
| "grad_norm": 1.3965145349502563, |
| "learning_rate": 1.195097710148968e-07, |
| "loss": 0.039, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.9113077679449361, |
| "grad_norm": 1.3296297788619995, |
| "learning_rate": 1.1846291670505855e-07, |
| "loss": 0.0552, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.911701081612586, |
| "grad_norm": 0.7849988341331482, |
| "learning_rate": 1.1742055630859117e-07, |
| "loss": 0.0338, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.912094395280236, |
| "grad_norm": 2.0398993492126465, |
| "learning_rate": 1.1638269179240796e-07, |
| "loss": 0.0542, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.9124877089478859, |
| "grad_norm": 0.7769688367843628, |
| "learning_rate": 1.1534932511493846e-07, |
| "loss": 0.0343, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.9128810226155359, |
| "grad_norm": 0.6311588287353516, |
| "learning_rate": 1.1432045822612564e-07, |
| "loss": 0.0483, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.9132743362831859, |
| "grad_norm": 0.9618848562240601, |
| "learning_rate": 1.132960930674204e-07, |
| "loss": 0.0498, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.9136676499508358, |
| "grad_norm": 0.8956164121627808, |
| "learning_rate": 1.1227623157177986e-07, |
| "loss": 0.0316, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.9140609636184858, |
| "grad_norm": 1.1387652158737183, |
| "learning_rate": 1.1126087566366266e-07, |
| "loss": 0.0669, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.9144542772861357, |
| "grad_norm": 0.7763038277626038, |
| "learning_rate": 1.1025002725902484e-07, |
| "loss": 0.0512, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.9148475909537856, |
| "grad_norm": 1.52693510055542, |
| "learning_rate": 1.0924368826531751e-07, |
| "loss": 0.0745, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.9152409046214356, |
| "grad_norm": 1.1928157806396484, |
| "learning_rate": 1.0824186058148278e-07, |
| "loss": 0.047, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.9156342182890855, |
| "grad_norm": 0.6993405818939209, |
| "learning_rate": 1.0724454609794931e-07, |
| "loss": 0.0258, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.9160275319567355, |
| "grad_norm": 0.8654144406318665, |
| "learning_rate": 1.0625174669663036e-07, |
| "loss": 0.0493, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.9164208456243854, |
| "grad_norm": 1.6443697214126587, |
| "learning_rate": 1.0526346425091815e-07, |
| "loss": 0.0641, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.9168141592920354, |
| "grad_norm": 2.2090344429016113, |
| "learning_rate": 1.042797006256821e-07, |
| "loss": 0.0916, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.9172074729596853, |
| "grad_norm": 1.2032400369644165, |
| "learning_rate": 1.0330045767726504e-07, |
| "loss": 0.043, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.9176007866273354, |
| "grad_norm": 1.0382981300354004, |
| "learning_rate": 1.023257372534786e-07, |
| "loss": 0.0478, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.9179941002949853, |
| "grad_norm": 1.3554562330245972, |
| "learning_rate": 1.0135554119360153e-07, |
| "loss": 0.076, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.9183874139626352, |
| "grad_norm": 0.7670255899429321, |
| "learning_rate": 1.0038987132837435e-07, |
| "loss": 0.0666, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.9187807276302852, |
| "grad_norm": 1.3432739973068237, |
| "learning_rate": 9.942872947999672e-08, |
| "loss": 0.0472, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.9191740412979351, |
| "grad_norm": 0.7896971702575684, |
| "learning_rate": 9.847211746212504e-08, |
| "loss": 0.0636, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.9195673549655851, |
| "grad_norm": 0.7464331388473511, |
| "learning_rate": 9.752003707986652e-08, |
| "loss": 0.036, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.919960668633235, |
| "grad_norm": 1.4482289552688599, |
| "learning_rate": 9.657249012977821e-08, |
| "loss": 0.047, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.9203539823008849, |
| "grad_norm": 0.7451487183570862, |
| "learning_rate": 9.562947839986264e-08, |
| "loss": 0.0516, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.9207472959685349, |
| "grad_norm": 1.0219905376434326, |
| "learning_rate": 9.469100366956391e-08, |
| "loss": 0.0515, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.9211406096361848, |
| "grad_norm": 0.776695966720581, |
| "learning_rate": 9.375706770976573e-08, |
| "loss": 0.0289, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.9215339233038348, |
| "grad_norm": 0.9781972169876099, |
| "learning_rate": 9.282767228278672e-08, |
| "loss": 0.0767, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.9219272369714847, |
| "grad_norm": 1.0278164148330688, |
| "learning_rate": 9.190281914237736e-08, |
| "loss": 0.0333, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.9223205506391348, |
| "grad_norm": 1.5040227174758911, |
| "learning_rate": 9.09825100337175e-08, |
| "loss": 0.0788, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.9227138643067847, |
| "grad_norm": 1.5312731266021729, |
| "learning_rate": 9.006674669341214e-08, |
| "loss": 0.0744, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.9231071779744346, |
| "grad_norm": 1.6249146461486816, |
| "learning_rate": 8.915553084948847e-08, |
| "loss": 0.0442, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.9235004916420846, |
| "grad_norm": 1.0247668027877808, |
| "learning_rate": 8.824886422139273e-08, |
| "loss": 0.0621, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.9238938053097345, |
| "grad_norm": 1.506390929222107, |
| "learning_rate": 8.734674851998748e-08, |
| "loss": 0.0755, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.9242871189773845, |
| "grad_norm": 0.8823897838592529, |
| "learning_rate": 8.64491854475466e-08, |
| "loss": 0.0637, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9246804326450344, |
| "grad_norm": 0.7110940217971802, |
| "learning_rate": 8.55561766977539e-08, |
| "loss": 0.0326, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.9250737463126844, |
| "grad_norm": 0.5734057426452637, |
| "learning_rate": 8.46677239556995e-08, |
| "loss": 0.0305, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.9254670599803343, |
| "grad_norm": 0.8686132431030273, |
| "learning_rate": 8.378382889787596e-08, |
| "loss": 0.0405, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.9258603736479842, |
| "grad_norm": 1.6284774541854858, |
| "learning_rate": 8.290449319217603e-08, |
| "loss": 0.0583, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.9262536873156342, |
| "grad_norm": 1.2678624391555786, |
| "learning_rate": 8.202971849788854e-08, |
| "loss": 0.0474, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.9266470009832841, |
| "grad_norm": 1.2101284265518188, |
| "learning_rate": 8.115950646569587e-08, |
| "loss": 0.0391, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.9270403146509342, |
| "grad_norm": 0.6382131576538086, |
| "learning_rate": 8.029385873767115e-08, |
| "loss": 0.0512, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.9274336283185841, |
| "grad_norm": 1.0339092016220093, |
| "learning_rate": 7.943277694727469e-08, |
| "loss": 0.0528, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.927826941986234, |
| "grad_norm": 0.7545960545539856, |
| "learning_rate": 7.857626271935037e-08, |
| "loss": 0.0418, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.928220255653884, |
| "grad_norm": 0.9588167071342468, |
| "learning_rate": 7.772431767012423e-08, |
| "loss": 0.0552, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9286135693215339, |
| "grad_norm": 0.7952490448951721, |
| "learning_rate": 7.68769434071992e-08, |
| "loss": 0.0431, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.9290068829891839, |
| "grad_norm": 1.0601327419281006, |
| "learning_rate": 7.603414152955374e-08, |
| "loss": 0.0262, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.9294001966568338, |
| "grad_norm": 0.8356077075004578, |
| "learning_rate": 7.519591362753848e-08, |
| "loss": 0.0309, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.9297935103244838, |
| "grad_norm": 1.068089246749878, |
| "learning_rate": 7.436226128287288e-08, |
| "loss": 0.0374, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.9301868239921337, |
| "grad_norm": 1.1383631229400635, |
| "learning_rate": 7.35331860686428e-08, |
| "loss": 0.0515, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.9305801376597836, |
| "grad_norm": 0.9927535653114319, |
| "learning_rate": 7.270868954929595e-08, |
| "loss": 0.056, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.9309734513274336, |
| "grad_norm": 0.6153873801231384, |
| "learning_rate": 7.188877328064142e-08, |
| "loss": 0.0437, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.9313667649950835, |
| "grad_norm": 0.8163816928863525, |
| "learning_rate": 7.107343880984496e-08, |
| "loss": 0.0541, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.9317600786627336, |
| "grad_norm": 1.144721269607544, |
| "learning_rate": 7.026268767542671e-08, |
| "loss": 0.055, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.9321533923303835, |
| "grad_norm": 0.9538362622261047, |
| "learning_rate": 6.94565214072579e-08, |
| "loss": 0.0845, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9325467059980335, |
| "grad_norm": 1.0417604446411133, |
| "learning_rate": 6.86549415265586e-08, |
| "loss": 0.054, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.9329400196656834, |
| "grad_norm": 0.8085368275642395, |
| "learning_rate": 6.785794954589365e-08, |
| "loss": 0.0338, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.6007797718048096, |
| "learning_rate": 6.706554696917139e-08, |
| "loss": 0.0314, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.9337266470009833, |
| "grad_norm": 0.8648099303245544, |
| "learning_rate": 6.627773529163994e-08, |
| "loss": 0.0302, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.9341199606686332, |
| "grad_norm": 0.5465229749679565, |
| "learning_rate": 6.549451599988432e-08, |
| "loss": 0.0359, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9345132743362832, |
| "grad_norm": 0.6655777096748352, |
| "learning_rate": 6.471589057182398e-08, |
| "loss": 0.0435, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.9349065880039331, |
| "grad_norm": 1.1010547876358032, |
| "learning_rate": 6.394186047670947e-08, |
| "loss": 0.0377, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.9352999016715831, |
| "grad_norm": 0.7519053816795349, |
| "learning_rate": 6.317242717511995e-08, |
| "loss": 0.033, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.935693215339233, |
| "grad_norm": 0.8617828488349915, |
| "learning_rate": 6.240759211896153e-08, |
| "loss": 0.0434, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.9360865290068829, |
| "grad_norm": 1.5556560754776, |
| "learning_rate": 6.16473567514625e-08, |
| "loss": 0.0893, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.936479842674533, |
| "grad_norm": 1.6594090461730957, |
| "learning_rate": 6.089172250717201e-08, |
| "loss": 0.0667, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.9368731563421829, |
| "grad_norm": 0.7117483019828796, |
| "learning_rate": 6.014069081195673e-08, |
| "loss": 0.0256, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.9372664700098329, |
| "grad_norm": 0.8783112168312073, |
| "learning_rate": 5.9394263082998836e-08, |
| "loss": 0.0439, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.9376597836774828, |
| "grad_norm": 0.73135906457901, |
| "learning_rate": 5.8652440728792504e-08, |
| "loss": 0.0514, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.9380530973451328, |
| "grad_norm": 0.5708735585212708, |
| "learning_rate": 5.791522514914216e-08, |
| "loss": 0.0332, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.9384464110127827, |
| "grad_norm": 1.1698683500289917, |
| "learning_rate": 5.718261773515865e-08, |
| "loss": 0.026, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.9388397246804326, |
| "grad_norm": 0.8288942575454712, |
| "learning_rate": 5.64546198692581e-08, |
| "loss": 0.0401, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.9392330383480826, |
| "grad_norm": 1.1005017757415771, |
| "learning_rate": 5.573123292515775e-08, |
| "loss": 0.0625, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.9396263520157325, |
| "grad_norm": 1.4169667959213257, |
| "learning_rate": 5.50124582678746e-08, |
| "loss": 0.0561, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.9400196656833825, |
| "grad_norm": 1.8534727096557617, |
| "learning_rate": 5.429829725372204e-08, |
| "loss": 0.0563, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.9404129793510324, |
| "grad_norm": 0.49012327194213867, |
| "learning_rate": 5.3588751230307935e-08, |
| "loss": 0.0371, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.9408062930186823, |
| "grad_norm": 1.5290131568908691, |
| "learning_rate": 5.2883821536531545e-08, |
| "loss": 0.0471, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.9411996066863324, |
| "grad_norm": 0.37540706992149353, |
| "learning_rate": 5.218350950258133e-08, |
| "loss": 0.0224, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.9415929203539823, |
| "grad_norm": 1.6441450119018555, |
| "learning_rate": 5.1487816449932174e-08, |
| "loss": 0.0545, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.9419862340216323, |
| "grad_norm": 0.8181889057159424, |
| "learning_rate": 5.079674369134313e-08, |
| "loss": 0.0528, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9423795476892822, |
| "grad_norm": 1.6283776760101318, |
| "learning_rate": 5.0110292530854696e-08, |
| "loss": 0.0528, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.9427728613569322, |
| "grad_norm": 4.418090343475342, |
| "learning_rate": 4.942846426378683e-08, |
| "loss": 0.052, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.9431661750245821, |
| "grad_norm": 0.9668748378753662, |
| "learning_rate": 4.875126017673593e-08, |
| "loss": 0.0441, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.943559488692232, |
| "grad_norm": 1.2723820209503174, |
| "learning_rate": 4.807868154757284e-08, |
| "loss": 0.0504, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.943952802359882, |
| "grad_norm": 1.2000619173049927, |
| "learning_rate": 4.741072964543958e-08, |
| "loss": 0.0669, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9443461160275319, |
| "grad_norm": 1.4198737144470215, |
| "learning_rate": 4.6747405730748765e-08, |
| "loss": 0.0768, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.9447394296951819, |
| "grad_norm": 0.5707858800888062, |
| "learning_rate": 4.6088711055179426e-08, |
| "loss": 0.0363, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.9451327433628318, |
| "grad_norm": 0.9884591698646545, |
| "learning_rate": 4.543464686167537e-08, |
| "loss": 0.0617, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.9455260570304818, |
| "grad_norm": 1.1140447854995728, |
| "learning_rate": 4.478521438444267e-08, |
| "loss": 0.0307, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.9459193706981318, |
| "grad_norm": 1.7241696119308472, |
| "learning_rate": 4.414041484894743e-08, |
| "loss": 0.0468, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.9463126843657818, |
| "grad_norm": 1.4963939189910889, |
| "learning_rate": 4.3500249471913616e-08, |
| "loss": 0.0424, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.9467059980334317, |
| "grad_norm": 1.4940134286880493, |
| "learning_rate": 4.2864719461321036e-08, |
| "loss": 0.062, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.9470993117010816, |
| "grad_norm": 1.2279117107391357, |
| "learning_rate": 4.223382601640208e-08, |
| "loss": 0.0557, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.9474926253687316, |
| "grad_norm": 0.5514369606971741, |
| "learning_rate": 4.160757032764001e-08, |
| "loss": 0.0211, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.9478859390363815, |
| "grad_norm": 1.1696200370788574, |
| "learning_rate": 4.098595357676732e-08, |
| "loss": 0.0525, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9482792527040315, |
| "grad_norm": 1.4047200679779053, |
| "learning_rate": 4.036897693676184e-08, |
| "loss": 0.0582, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.9486725663716814, |
| "grad_norm": 0.9069812893867493, |
| "learning_rate": 3.9756641571847e-08, |
| "loss": 0.0451, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.9490658800393313, |
| "grad_norm": 0.7696250677108765, |
| "learning_rate": 3.914894863748714e-08, |
| "loss": 0.0596, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.9494591937069813, |
| "grad_norm": 1.0009849071502686, |
| "learning_rate": 3.854589928038666e-08, |
| "loss": 0.0531, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.9498525073746312, |
| "grad_norm": 0.6316270232200623, |
| "learning_rate": 3.794749463848835e-08, |
| "loss": 0.0261, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9502458210422812, |
| "grad_norm": 1.1284974813461304, |
| "learning_rate": 3.735373584096924e-08, |
| "loss": 0.0485, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.9506391347099312, |
| "grad_norm": 0.744842529296875, |
| "learning_rate": 3.676462400824088e-08, |
| "loss": 0.0437, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.9510324483775812, |
| "grad_norm": 1.1578047275543213, |
| "learning_rate": 3.618016025194598e-08, |
| "loss": 0.0458, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.9514257620452311, |
| "grad_norm": 1.029968023300171, |
| "learning_rate": 3.560034567495513e-08, |
| "loss": 0.063, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.951819075712881, |
| "grad_norm": 0.8940306305885315, |
| "learning_rate": 3.5025181371367844e-08, |
| "loss": 0.0583, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.952212389380531, |
| "grad_norm": 1.1246992349624634, |
| "learning_rate": 3.4454668426507076e-08, |
| "loss": 0.0446, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.9526057030481809, |
| "grad_norm": 1.069629192352295, |
| "learning_rate": 3.388880791692001e-08, |
| "loss": 0.0422, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.9529990167158309, |
| "grad_norm": 1.080478549003601, |
| "learning_rate": 3.33276009103739e-08, |
| "loss": 0.0547, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.9533923303834808, |
| "grad_norm": 1.105726718902588, |
| "learning_rate": 3.2771048465855546e-08, |
| "loss": 0.0478, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.9537856440511308, |
| "grad_norm": 0.9557194709777832, |
| "learning_rate": 3.221915163356848e-08, |
| "loss": 0.0454, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9541789577187807, |
| "grad_norm": 0.7306869626045227, |
| "learning_rate": 3.167191145493076e-08, |
| "loss": 0.0306, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.9545722713864306, |
| "grad_norm": 0.9311756491661072, |
| "learning_rate": 3.1129328962573865e-08, |
| "loss": 0.0378, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.9549655850540806, |
| "grad_norm": 1.6339657306671143, |
| "learning_rate": 3.05914051803402e-08, |
| "loss": 0.053, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.9553588987217306, |
| "grad_norm": 1.5211260318756104, |
| "learning_rate": 3.005814112328143e-08, |
| "loss": 0.0408, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.9557522123893806, |
| "grad_norm": 1.1606007814407349, |
| "learning_rate": 2.9529537797656215e-08, |
| "loss": 0.0531, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9561455260570305, |
| "grad_norm": 0.5916828513145447, |
| "learning_rate": 2.900559620092891e-08, |
| "loss": 0.0625, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.9565388397246805, |
| "grad_norm": 0.49938130378723145, |
| "learning_rate": 2.8486317321766432e-08, |
| "loss": 0.0395, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.9569321533923304, |
| "grad_norm": 1.587057113647461, |
| "learning_rate": 2.797170214003775e-08, |
| "loss": 0.1053, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.9573254670599803, |
| "grad_norm": 1.176936149597168, |
| "learning_rate": 2.7461751626811916e-08, |
| "loss": 0.0462, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.9577187807276303, |
| "grad_norm": 0.5434470176696777, |
| "learning_rate": 2.6956466744355315e-08, |
| "loss": 0.0268, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.9581120943952802, |
| "grad_norm": 0.6117231845855713, |
| "learning_rate": 2.6455848446130526e-08, |
| "loss": 0.0572, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.9585054080629302, |
| "grad_norm": 1.2302024364471436, |
| "learning_rate": 2.5959897676794134e-08, |
| "loss": 0.0613, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.9588987217305801, |
| "grad_norm": 1.686108946800232, |
| "learning_rate": 2.546861537219586e-08, |
| "loss": 0.0726, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.95929203539823, |
| "grad_norm": 0.9010059833526611, |
| "learning_rate": 2.4982002459375265e-08, |
| "loss": 0.0356, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.95968534906588, |
| "grad_norm": 0.7760159373283386, |
| "learning_rate": 2.450005985656173e-08, |
| "loss": 0.0376, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.96007866273353, |
| "grad_norm": 0.788345456123352, |
| "learning_rate": 2.4022788473170853e-08, |
| "loss": 0.0657, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.96047197640118, |
| "grad_norm": 0.8711709976196289, |
| "learning_rate": 2.355018920980501e-08, |
| "loss": 0.0444, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.9608652900688299, |
| "grad_norm": 0.6124730110168457, |
| "learning_rate": 2.308226295824917e-08, |
| "loss": 0.0542, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.9612586037364799, |
| "grad_norm": 1.0837171077728271, |
| "learning_rate": 2.2619010601470925e-08, |
| "loss": 0.0577, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.9616519174041298, |
| "grad_norm": 1.9453260898590088, |
| "learning_rate": 2.2160433013618533e-08, |
| "loss": 0.058, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9620452310717797, |
| "grad_norm": 0.8556208610534668, |
| "learning_rate": 2.170653106001841e-08, |
| "loss": 0.0281, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.9624385447394297, |
| "grad_norm": 0.9196289777755737, |
| "learning_rate": 2.1257305597175428e-08, |
| "loss": 0.0414, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.9628318584070796, |
| "grad_norm": 1.5880217552185059, |
| "learning_rate": 2.0812757472768175e-08, |
| "loss": 0.0496, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.9632251720747296, |
| "grad_norm": 1.4076353311538696, |
| "learning_rate": 2.037288752565064e-08, |
| "loss": 0.049, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.9636184857423795, |
| "grad_norm": 0.8668321967124939, |
| "learning_rate": 1.99376965858486e-08, |
| "loss": 0.0606, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9640117994100295, |
| "grad_norm": 0.7461321353912354, |
| "learning_rate": 1.9507185474558765e-08, |
| "loss": 0.0343, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.9644051130776794, |
| "grad_norm": 0.6470179557800293, |
| "learning_rate": 1.908135500414743e-08, |
| "loss": 0.0334, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.9647984267453295, |
| "grad_norm": 1.0918750762939453, |
| "learning_rate": 1.866020597814766e-08, |
| "loss": 0.0451, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.9651917404129794, |
| "grad_norm": 0.6877756118774414, |
| "learning_rate": 1.8243739191259603e-08, |
| "loss": 0.0397, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.9655850540806293, |
| "grad_norm": 0.9845160245895386, |
| "learning_rate": 1.7831955429348235e-08, |
| "loss": 0.0227, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9659783677482793, |
| "grad_norm": 1.178027629852295, |
| "learning_rate": 1.7424855469440617e-08, |
| "loss": 0.0941, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.9663716814159292, |
| "grad_norm": 1.0678149461746216, |
| "learning_rate": 1.7022440079726976e-08, |
| "loss": 0.0519, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.9667649950835792, |
| "grad_norm": 0.7598469257354736, |
| "learning_rate": 1.6624710019556844e-08, |
| "loss": 0.0303, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.9671583087512291, |
| "grad_norm": 1.8913023471832275, |
| "learning_rate": 1.623166603943932e-08, |
| "loss": 0.0573, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.967551622418879, |
| "grad_norm": 0.8094140887260437, |
| "learning_rate": 1.584330888104002e-08, |
| "loss": 0.0454, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.967944936086529, |
| "grad_norm": 1.0645431280136108, |
| "learning_rate": 1.5459639277181637e-08, |
| "loss": 0.0482, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.9683382497541789, |
| "grad_norm": 1.1675747632980347, |
| "learning_rate": 1.508065795184116e-08, |
| "loss": 0.0587, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.9687315634218289, |
| "grad_norm": 1.6579506397247314, |
| "learning_rate": 1.4706365620149043e-08, |
| "loss": 0.0389, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.9691248770894788, |
| "grad_norm": 1.4258586168289185, |
| "learning_rate": 1.433676298838671e-08, |
| "loss": 0.0571, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.9695181907571289, |
| "grad_norm": 1.555445671081543, |
| "learning_rate": 1.3971850753987936e-08, |
| "loss": 0.0561, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9699115044247788, |
| "grad_norm": 1.851238489151001, |
| "learning_rate": 1.3611629605534139e-08, |
| "loss": 0.0614, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.9703048180924287, |
| "grad_norm": 1.4167311191558838, |
| "learning_rate": 1.325610022275603e-08, |
| "loss": 0.0541, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.9706981317600787, |
| "grad_norm": 1.103963017463684, |
| "learning_rate": 1.29052632765303e-08, |
| "loss": 0.0515, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.9710914454277286, |
| "grad_norm": 0.8383644819259644, |
| "learning_rate": 1.2559119428879607e-08, |
| "loss": 0.0439, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.9714847590953786, |
| "grad_norm": 1.5626074075698853, |
| "learning_rate": 1.2217669332970084e-08, |
| "loss": 0.0358, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9718780727630285, |
| "grad_norm": 0.965404748916626, |
| "learning_rate": 1.1880913633111335e-08, |
| "loss": 0.0588, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.9722713864306785, |
| "grad_norm": 1.2146902084350586, |
| "learning_rate": 1.1548852964755053e-08, |
| "loss": 0.0473, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.9726647000983284, |
| "grad_norm": 1.4855893850326538, |
| "learning_rate": 1.122148795449307e-08, |
| "loss": 0.0543, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.9730580137659783, |
| "grad_norm": 1.1908034086227417, |
| "learning_rate": 1.0898819220056811e-08, |
| "loss": 0.0486, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.9734513274336283, |
| "grad_norm": 1.0501704216003418, |
| "learning_rate": 1.058084737031534e-08, |
| "loss": 0.0475, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9738446411012782, |
| "grad_norm": 0.6650611162185669, |
| "learning_rate": 1.0267573005275645e-08, |
| "loss": 0.0297, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.9742379547689283, |
| "grad_norm": 0.6201514601707458, |
| "learning_rate": 9.95899671607986e-09, |
| "loss": 0.047, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.9746312684365782, |
| "grad_norm": 1.1360257863998413, |
| "learning_rate": 9.655119085005827e-09, |
| "loss": 0.0363, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.9750245821042282, |
| "grad_norm": 0.8666075468063354, |
| "learning_rate": 9.355940685464305e-09, |
| "loss": 0.0458, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.9754178957718781, |
| "grad_norm": 1.1366305351257324, |
| "learning_rate": 9.061462081999262e-09, |
| "loss": 0.0471, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.975811209439528, |
| "grad_norm": 0.6694433689117432, |
| "learning_rate": 8.771683830285649e-09, |
| "loss": 0.0387, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.976204523107178, |
| "grad_norm": 2.0710513591766357, |
| "learning_rate": 8.486606477129677e-09, |
| "loss": 0.075, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.9765978367748279, |
| "grad_norm": 0.9630718231201172, |
| "learning_rate": 8.206230560466322e-09, |
| "loss": 0.0431, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.9769911504424779, |
| "grad_norm": 0.9957706332206726, |
| "learning_rate": 7.930556609359596e-09, |
| "loss": 0.0398, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.9773844641101278, |
| "grad_norm": 0.8392490148544312, |
| "learning_rate": 7.659585144000892e-09, |
| "loss": 0.1203, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 0.763048529624939, |
| "learning_rate": 7.393316675707584e-09, |
| "loss": 0.048, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.9781710914454277, |
| "grad_norm": 0.591249942779541, |
| "learning_rate": 7.131751706923595e-09, |
| "loss": 0.0276, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.9785644051130776, |
| "grad_norm": 0.7118191719055176, |
| "learning_rate": 6.8748907312163325e-09, |
| "loss": 0.0459, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.9789577187807277, |
| "grad_norm": 1.2333048582077026, |
| "learning_rate": 6.622734233277528e-09, |
| "loss": 0.0547, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.9793510324483776, |
| "grad_norm": 1.8401693105697632, |
| "learning_rate": 6.375282688921569e-09, |
| "loss": 0.0499, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9797443461160276, |
| "grad_norm": 0.8339464068412781, |
| "learning_rate": 6.132536565084945e-09, |
| "loss": 0.0343, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.9801376597836775, |
| "grad_norm": 0.7225338220596313, |
| "learning_rate": 5.894496319824306e-09, |
| "loss": 0.0373, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.9805309734513274, |
| "grad_norm": 0.7467345595359802, |
| "learning_rate": 5.661162402316733e-09, |
| "loss": 0.0294, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.9809242871189774, |
| "grad_norm": 0.7157261967658997, |
| "learning_rate": 5.432535252859472e-09, |
| "loss": 0.0388, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.9813176007866273, |
| "grad_norm": 1.0490740537643433, |
| "learning_rate": 5.208615302866593e-09, |
| "loss": 0.0552, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9817109144542773, |
| "grad_norm": 0.9684942364692688, |
| "learning_rate": 4.989402974871216e-09, |
| "loss": 0.0482, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.9821042281219272, |
| "grad_norm": 0.7083243727684021, |
| "learning_rate": 4.774898682522455e-09, |
| "loss": 0.0354, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.9824975417895772, |
| "grad_norm": 0.6887216567993164, |
| "learning_rate": 4.565102830585699e-09, |
| "loss": 0.0555, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.9828908554572271, |
| "grad_norm": 0.9905696511268616, |
| "learning_rate": 4.360015814941498e-09, |
| "loss": 0.044, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.983284169124877, |
| "grad_norm": 1.4582995176315308, |
| "learning_rate": 4.159638022585011e-09, |
| "loss": 0.0555, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9836774827925271, |
| "grad_norm": 0.8839958906173706, |
| "learning_rate": 3.96396983162517e-09, |
| "loss": 0.0322, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.984070796460177, |
| "grad_norm": 0.9634173512458801, |
| "learning_rate": 3.773011611284128e-09, |
| "loss": 0.0305, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.984464110127827, |
| "grad_norm": 0.9942337870597839, |
| "learning_rate": 3.586763721896147e-09, |
| "loss": 0.0725, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.9848574237954769, |
| "grad_norm": 0.8074241876602173, |
| "learning_rate": 3.4052265149070453e-09, |
| "loss": 0.048, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.9852507374631269, |
| "grad_norm": 1.1746639013290405, |
| "learning_rate": 3.2284003328744706e-09, |
| "loss": 0.0565, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.9856440511307768, |
| "grad_norm": 1.454350233078003, |
| "learning_rate": 3.056285509465684e-09, |
| "loss": 0.0462, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.9860373647984267, |
| "grad_norm": 1.0500266551971436, |
| "learning_rate": 2.888882369457835e-09, |
| "loss": 0.0229, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.9864306784660767, |
| "grad_norm": 0.5939337611198425, |
| "learning_rate": 2.726191228737407e-09, |
| "loss": 0.0441, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.9868239921337266, |
| "grad_norm": 0.7773805856704712, |
| "learning_rate": 2.5682123942993852e-09, |
| "loss": 0.0388, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.9872173058013766, |
| "grad_norm": 0.9417904019355774, |
| "learning_rate": 2.414946164246701e-09, |
| "loss": 0.0448, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9876106194690265, |
| "grad_norm": 0.8849769830703735, |
| "learning_rate": 2.2663928277896763e-09, |
| "loss": 0.0482, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.9880039331366764, |
| "grad_norm": 1.0469379425048828, |
| "learning_rate": 2.122552665245747e-09, |
| "loss": 0.0479, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.9883972468043265, |
| "grad_norm": 0.4294953942298889, |
| "learning_rate": 1.9834259480380756e-09, |
| "loss": 0.017, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.9887905604719764, |
| "grad_norm": 1.0931810140609741, |
| "learning_rate": 1.8490129386963818e-09, |
| "loss": 0.0376, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.9891838741396264, |
| "grad_norm": 0.5045303702354431, |
| "learning_rate": 1.719313890855001e-09, |
| "loss": 0.0203, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.9895771878072763, |
| "grad_norm": 1.2506543397903442, |
| "learning_rate": 1.5943290492539953e-09, |
| "loss": 0.0415, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.9899705014749263, |
| "grad_norm": 0.6282764673233032, |
| "learning_rate": 1.4740586497366538e-09, |
| "loss": 0.043, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.9903638151425762, |
| "grad_norm": 1.0732625722885132, |
| "learning_rate": 1.358502919251159e-09, |
| "loss": 0.049, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.9907571288102262, |
| "grad_norm": 0.8076870441436768, |
| "learning_rate": 1.247662075848921e-09, |
| "loss": 0.0367, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 1.1323729753494263, |
| "learning_rate": 1.1415363286843007e-09, |
| "loss": 0.0549, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.991543756145526, |
| "grad_norm": 1.2635443210601807, |
| "learning_rate": 1.0401258780146084e-09, |
| "loss": 0.0375, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.991937069813176, |
| "grad_norm": 1.430897831916809, |
| "learning_rate": 9.434309151992727e-10, |
| "loss": 0.075, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.9923303834808259, |
| "grad_norm": 1.1660479307174683, |
| "learning_rate": 8.514516226998393e-10, |
| "loss": 0.0562, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.9927236971484759, |
| "grad_norm": 2.029007911682129, |
| "learning_rate": 7.641881740794166e-10, |
| "loss": 0.0481, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.9931170108161259, |
| "grad_norm": 0.7072765827178955, |
| "learning_rate": 6.816407340023978e-10, |
| "loss": 0.0188, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9935103244837759, |
| "grad_norm": 0.8789957165718079, |
| "learning_rate": 6.03809458233906e-10, |
| "loss": 0.0573, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.9939036381514258, |
| "grad_norm": 0.7415314316749573, |
| "learning_rate": 5.306944936406266e-10, |
| "loss": 0.0458, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.9942969518190757, |
| "grad_norm": 0.6154326796531677, |
| "learning_rate": 4.622959781883096e-10, |
| "loss": 0.0236, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.9946902654867257, |
| "grad_norm": 0.810153067111969, |
| "learning_rate": 3.9861404094426734e-10, |
| "loss": 0.0443, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.9950835791543756, |
| "grad_norm": 0.743605375289917, |
| "learning_rate": 3.3964880207459916e-10, |
| "loss": 0.052, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9954768928220256, |
| "grad_norm": 1.1516720056533813, |
| "learning_rate": 2.8540037284557897e-10, |
| "loss": 0.0729, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.9958702064896755, |
| "grad_norm": 1.1776301860809326, |
| "learning_rate": 2.358688556233779e-10, |
| "loss": 0.0401, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.9962635201573254, |
| "grad_norm": 1.0834025144577026, |
| "learning_rate": 1.9105434387239886e-10, |
| "loss": 0.0593, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.9966568338249754, |
| "grad_norm": 1.4529463052749634, |
| "learning_rate": 1.509569221569418e-10, |
| "loss": 0.0423, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.9970501474926253, |
| "grad_norm": 1.1381511688232422, |
| "learning_rate": 1.1557666614037122e-10, |
| "loss": 0.0411, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9974434611602753, |
| "grad_norm": 1.113553762435913, |
| "learning_rate": 8.49136425840058e-11, |
| "loss": 0.0611, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.9978367748279253, |
| "grad_norm": 1.071913719177246, |
| "learning_rate": 5.896790934878383e-11, |
| "loss": 0.0609, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.9982300884955753, |
| "grad_norm": 1.7356159687042236, |
| "learning_rate": 3.7739515393320215e-11, |
| "loss": 0.0524, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.9986234021632252, |
| "grad_norm": 1.0763658285140991, |
| "learning_rate": 2.122850077584948e-11, |
| "loss": 0.0527, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.9990167158308751, |
| "grad_norm": 0.6793241500854492, |
| "learning_rate": 9.434896651727699e-12, |
| "loss": 0.0462, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9994100294985251, |
| "grad_norm": 0.9101441502571106, |
| "learning_rate": 2.358725275652951e-12, |
| "loss": 0.0453, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.999803343166175, |
| "grad_norm": 1.0394845008850098, |
| "learning_rate": 0.0, |
| "loss": 0.0578, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.999803343166175, |
| "step": 2542, |
| "total_flos": 5.5848341785175654e+17, |
| "train_loss": 0.05740805761998535, |
| "train_runtime": 78224.1342, |
| "train_samples_per_second": 1.04, |
| "train_steps_per_second": 0.032 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2542, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.5848341785175654e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|