| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.942630185348632, |
| "eval_steps": 500, |
| "global_step": 350, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01412180052956752, |
| "grad_norm": 5.979931009348192, |
| "learning_rate": 2.285714285714286e-06, |
| "loss": 0.8154, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02824360105913504, |
| "grad_norm": 6.047301062181586, |
| "learning_rate": 4.571428571428572e-06, |
| "loss": 0.8243, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04236540158870256, |
| "grad_norm": 5.589397434568418, |
| "learning_rate": 6.857142857142858e-06, |
| "loss": 0.8057, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05648720211827008, |
| "grad_norm": 4.017126644109988, |
| "learning_rate": 9.142857142857144e-06, |
| "loss": 0.7602, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0706090026478376, |
| "grad_norm": 2.1627060531795967, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.7197, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08473080317740513, |
| "grad_norm": 4.667143281538081, |
| "learning_rate": 1.3714285714285716e-05, |
| "loss": 0.7374, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09885260370697264, |
| "grad_norm": 6.545492179001986, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.7325, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.11297440423654016, |
| "grad_norm": 7.281711055430636, |
| "learning_rate": 1.8285714285714288e-05, |
| "loss": 0.7502, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12709620476610767, |
| "grad_norm": 4.392674048119666, |
| "learning_rate": 2.057142857142857e-05, |
| "loss": 0.7051, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1412180052956752, |
| "grad_norm": 2.9074563359884973, |
| "learning_rate": 2.2857142857142858e-05, |
| "loss": 0.6593, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 2.205323794009288, |
| "learning_rate": 2.5142857142857143e-05, |
| "loss": 0.6276, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.16946160635481025, |
| "grad_norm": 1.4150033586173336, |
| "learning_rate": 2.742857142857143e-05, |
| "loss": 0.6013, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.18358340688437777, |
| "grad_norm": 1.3251831618492345, |
| "learning_rate": 2.9714285714285717e-05, |
| "loss": 0.5859, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.1977052074139453, |
| "grad_norm": 3.1805546649918544, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.5824, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2118270079435128, |
| "grad_norm": 1.4989971887780031, |
| "learning_rate": 3.4285714285714284e-05, |
| "loss": 0.5708, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.22594880847308033, |
| "grad_norm": 1.203788524759605, |
| "learning_rate": 3.6571428571428576e-05, |
| "loss": 0.5563, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.24007060900264784, |
| "grad_norm": 1.3578810581727971, |
| "learning_rate": 3.885714285714286e-05, |
| "loss": 0.5566, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.25419240953221534, |
| "grad_norm": 0.8074275719021523, |
| "learning_rate": 4.114285714285714e-05, |
| "loss": 0.5507, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.26831421006178285, |
| "grad_norm": 1.1227844753364196, |
| "learning_rate": 4.342857142857143e-05, |
| "loss": 0.5418, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2824360105913504, |
| "grad_norm": 0.8760389990351023, |
| "learning_rate": 4.5714285714285716e-05, |
| "loss": 0.5333, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2965578111209179, |
| "grad_norm": 1.1881496989284341, |
| "learning_rate": 4.8e-05, |
| "loss": 0.5305, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.9681459485298871, |
| "learning_rate": 5.0285714285714286e-05, |
| "loss": 0.5189, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.324801412180053, |
| "grad_norm": 1.4344211614758422, |
| "learning_rate": 5.257142857142858e-05, |
| "loss": 0.5198, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3389232127096205, |
| "grad_norm": 0.8492140987790524, |
| "learning_rate": 5.485714285714286e-05, |
| "loss": 0.5102, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.353045013239188, |
| "grad_norm": 1.2351656998878342, |
| "learning_rate": 5.714285714285715e-05, |
| "loss": 0.5098, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.36716681376875554, |
| "grad_norm": 0.898578091846962, |
| "learning_rate": 5.9428571428571434e-05, |
| "loss": 0.5015, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.38128861429832306, |
| "grad_norm": 1.5089247050740433, |
| "learning_rate": 6.171428571428573e-05, |
| "loss": 0.507, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3954104148278906, |
| "grad_norm": 0.9864208925736987, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.5034, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.4095322153574581, |
| "grad_norm": 1.0206985743120736, |
| "learning_rate": 6.62857142857143e-05, |
| "loss": 0.4983, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4236540158870256, |
| "grad_norm": 1.4229934179471342, |
| "learning_rate": 6.857142857142857e-05, |
| "loss": 0.508, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.43777581641659313, |
| "grad_norm": 0.9625460430091453, |
| "learning_rate": 7.085714285714287e-05, |
| "loss": 0.5016, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.45189761694616065, |
| "grad_norm": 1.1144628190568628, |
| "learning_rate": 7.314285714285715e-05, |
| "loss": 0.4939, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 0.9463549200929555, |
| "learning_rate": 7.542857142857144e-05, |
| "loss": 0.4924, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4801412180052957, |
| "grad_norm": 0.8892598203382347, |
| "learning_rate": 7.771428571428572e-05, |
| "loss": 0.4902, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4942630185348632, |
| "grad_norm": 0.9413854750510515, |
| "learning_rate": 8e-05, |
| "loss": 0.4852, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5083848190644307, |
| "grad_norm": 0.9034630826375731, |
| "learning_rate": 7.999801067823773e-05, |
| "loss": 0.4853, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5225066195939982, |
| "grad_norm": 1.2269858722315412, |
| "learning_rate": 7.999204291082095e-05, |
| "loss": 0.4764, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5366284201235657, |
| "grad_norm": 0.9045227868888749, |
| "learning_rate": 7.998209729134014e-05, |
| "loss": 0.4809, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5507502206531333, |
| "grad_norm": 1.2176978127037603, |
| "learning_rate": 7.996817480904718e-05, |
| "loss": 0.4719, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5648720211827007, |
| "grad_norm": 1.2333419409501036, |
| "learning_rate": 7.99502768487569e-05, |
| "loss": 0.477, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5789938217122683, |
| "grad_norm": 0.7483281237491166, |
| "learning_rate": 7.99284051907094e-05, |
| "loss": 0.4724, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5931156222418358, |
| "grad_norm": 0.6151558817864059, |
| "learning_rate": 7.990256201039297e-05, |
| "loss": 0.4662, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6072374227714034, |
| "grad_norm": 0.6814568571856022, |
| "learning_rate": 7.987274987832764e-05, |
| "loss": 0.4621, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.9240497143419791, |
| "learning_rate": 7.983897175980957e-05, |
| "loss": 0.4665, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6354810238305384, |
| "grad_norm": 1.2034986971304784, |
| "learning_rate": 7.980123101461606e-05, |
| "loss": 0.4761, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.649602824360106, |
| "grad_norm": 0.8192788227089312, |
| "learning_rate": 7.975953139667141e-05, |
| "loss": 0.4652, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6637246248896734, |
| "grad_norm": 0.7683306980890072, |
| "learning_rate": 7.97138770536735e-05, |
| "loss": 0.4619, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.677846425419241, |
| "grad_norm": 0.7567015766907312, |
| "learning_rate": 7.966427252668121e-05, |
| "loss": 0.4638, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6919682259488085, |
| "grad_norm": 0.6846820764750615, |
| "learning_rate": 7.961072274966282e-05, |
| "loss": 0.4527, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.706090026478376, |
| "grad_norm": 0.7395598100512276, |
| "learning_rate": 7.955323304900514e-05, |
| "loss": 0.4571, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7202118270079435, |
| "grad_norm": 0.5351232158771764, |
| "learning_rate": 7.949180914298383e-05, |
| "loss": 0.4496, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.7343336275375111, |
| "grad_norm": 0.6324424558337066, |
| "learning_rate": 7.942645714119452e-05, |
| "loss": 0.4593, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7484554280670785, |
| "grad_norm": 0.547964739600884, |
| "learning_rate": 7.93571835439452e-05, |
| "loss": 0.4502, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7625772285966461, |
| "grad_norm": 0.7115536296101671, |
| "learning_rate": 7.928399524160956e-05, |
| "loss": 0.447, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 0.742782814289987, |
| "learning_rate": 7.920689951394175e-05, |
| "loss": 0.4461, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7908208296557812, |
| "grad_norm": 0.6862659469941464, |
| "learning_rate": 7.912590402935223e-05, |
| "loss": 0.4473, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.8049426301853486, |
| "grad_norm": 0.6235041641613883, |
| "learning_rate": 7.904101684414498e-05, |
| "loss": 0.4472, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.8190644307149162, |
| "grad_norm": 0.44600795869954046, |
| "learning_rate": 7.895224640171625e-05, |
| "loss": 0.4442, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.8331862312444837, |
| "grad_norm": 0.48251979778530707, |
| "learning_rate": 7.88596015317147e-05, |
| "loss": 0.4449, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.8473080317740512, |
| "grad_norm": 0.4787558150068957, |
| "learning_rate": 7.876309144916312e-05, |
| "loss": 0.4433, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8614298323036187, |
| "grad_norm": 0.41827598666685606, |
| "learning_rate": 7.86627257535419e-05, |
| "loss": 0.4401, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8755516328331863, |
| "grad_norm": 0.4724100749619687, |
| "learning_rate": 7.855851442783414e-05, |
| "loss": 0.4374, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.8896734333627537, |
| "grad_norm": 0.6571994588226032, |
| "learning_rate": 7.845046783753276e-05, |
| "loss": 0.4409, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.9037952338923213, |
| "grad_norm": 0.9369294338435781, |
| "learning_rate": 7.833859672960943e-05, |
| "loss": 0.4407, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.9179170344218888, |
| "grad_norm": 1.1601560447987704, |
| "learning_rate": 7.822291223144564e-05, |
| "loss": 0.4602, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 0.6934703654331164, |
| "learning_rate": 7.810342584972585e-05, |
| "loss": 0.4369, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.9461606354810238, |
| "grad_norm": 0.5791439547503463, |
| "learning_rate": 7.798014946929306e-05, |
| "loss": 0.4356, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.9602824360105914, |
| "grad_norm": 0.8373041828808443, |
| "learning_rate": 7.785309535196657e-05, |
| "loss": 0.4504, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9744042365401588, |
| "grad_norm": 0.6796500376958069, |
| "learning_rate": 7.772227613532242e-05, |
| "loss": 0.4392, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9885260370697264, |
| "grad_norm": 0.6686880597044009, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 0.4474, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.002647837599294, |
| "grad_norm": 0.6901488338737102, |
| "learning_rate": 7.74493948255895e-05, |
| "loss": 0.5108, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.0167696381288613, |
| "grad_norm": 0.7139924415191212, |
| "learning_rate": 7.730735987493711e-05, |
| "loss": 0.4227, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.030891438658429, |
| "grad_norm": 0.7623382444431029, |
| "learning_rate": 7.71616141071401e-05, |
| "loss": 0.419, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.0450132391879965, |
| "grad_norm": 0.8179708530719029, |
| "learning_rate": 7.701217201895987e-05, |
| "loss": 0.4182, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.059135039717564, |
| "grad_norm": 0.6036364923611257, |
| "learning_rate": 7.685904847481631e-05, |
| "loss": 0.4147, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0732568402471314, |
| "grad_norm": 0.5415944966587694, |
| "learning_rate": 7.670225870530936e-05, |
| "loss": 0.4192, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.087378640776699, |
| "grad_norm": 0.548496642769106, |
| "learning_rate": 7.654181830570404e-05, |
| "loss": 0.4193, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.1015004413062666, |
| "grad_norm": 0.4357435844414465, |
| "learning_rate": 7.637774323437929e-05, |
| "loss": 0.4126, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.1156222418358341, |
| "grad_norm": 0.5890851003105865, |
| "learning_rate": 7.62100498112406e-05, |
| "loss": 0.4193, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.1297440423654015, |
| "grad_norm": 0.5417176133106055, |
| "learning_rate": 7.603875471609677e-05, |
| "loss": 0.4069, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.143865842894969, |
| "grad_norm": 0.5234067170715418, |
| "learning_rate": 7.586387498700084e-05, |
| "loss": 0.4187, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.1579876434245366, |
| "grad_norm": 0.4795761329002007, |
| "learning_rate": 7.568542801855535e-05, |
| "loss": 0.4101, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1721094439541042, |
| "grad_norm": 0.510485207368403, |
| "learning_rate": 7.550343156018217e-05, |
| "loss": 0.4074, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.1862312444836718, |
| "grad_norm": 0.5160993194955293, |
| "learning_rate": 7.531790371435709e-05, |
| "loss": 0.4105, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.2003530450132391, |
| "grad_norm": 0.6272135654421417, |
| "learning_rate": 7.512886293480914e-05, |
| "loss": 0.4131, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.2144748455428067, |
| "grad_norm": 0.7144516241332823, |
| "learning_rate": 7.49363280246852e-05, |
| "loss": 0.4123, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.2285966460723743, |
| "grad_norm": 1.0197175196301183, |
| "learning_rate": 7.474031813467956e-05, |
| "loss": 0.4199, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.2427184466019416, |
| "grad_norm": 0.9885970877399597, |
| "learning_rate": 7.454085276112925e-05, |
| "loss": 0.4152, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.2568402471315092, |
| "grad_norm": 0.5518795345815659, |
| "learning_rate": 7.433795174407465e-05, |
| "loss": 0.4064, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.2709620476610768, |
| "grad_norm": 0.42697954065556326, |
| "learning_rate": 7.413163526528623e-05, |
| "loss": 0.409, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.2850838481906444, |
| "grad_norm": 0.698380780251885, |
| "learning_rate": 7.392192384625704e-05, |
| "loss": 0.4054, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.299205648720212, |
| "grad_norm": 0.6703174317830842, |
| "learning_rate": 7.370883834616157e-05, |
| "loss": 0.4099, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.3133274492497793, |
| "grad_norm": 0.3951173073488556, |
| "learning_rate": 7.349239995978095e-05, |
| "loss": 0.4084, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.3274492497793469, |
| "grad_norm": 0.43174109319559356, |
| "learning_rate": 7.327263021539478e-05, |
| "loss": 0.4048, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.3415710503089144, |
| "grad_norm": 0.5360712514545947, |
| "learning_rate": 7.30495509726398e-05, |
| "loss": 0.4068, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.3556928508384818, |
| "grad_norm": 0.42774436448586106, |
| "learning_rate": 7.282318442033567e-05, |
| "loss": 0.4034, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.3698146513680494, |
| "grad_norm": 0.5210499488927217, |
| "learning_rate": 7.259355307427781e-05, |
| "loss": 0.4078, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.383936451897617, |
| "grad_norm": 0.7093148406292331, |
| "learning_rate": 7.236067977499791e-05, |
| "loss": 0.4084, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3980582524271845, |
| "grad_norm": 0.8196300420238753, |
| "learning_rate": 7.212458768549208e-05, |
| "loss": 0.4069, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.412180052956752, |
| "grad_norm": 0.9973540383790642, |
| "learning_rate": 7.188530028891691e-05, |
| "loss": 0.4047, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4263018534863194, |
| "grad_norm": 1.0704815886198962, |
| "learning_rate": 7.164284138625367e-05, |
| "loss": 0.4075, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.440423654015887, |
| "grad_norm": 0.5848553932345868, |
| "learning_rate": 7.13972350939409e-05, |
| "loss": 0.4036, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.37289550464762866, |
| "learning_rate": 7.114850584147577e-05, |
| "loss": 0.4068, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.468667255075022, |
| "grad_norm": 0.6651429035225815, |
| "learning_rate": 7.089667836898399e-05, |
| "loss": 0.4053, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.4827890556045895, |
| "grad_norm": 0.6931193008736451, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 0.4002, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.496910856134157, |
| "grad_norm": 0.3938085941153356, |
| "learning_rate": 7.038382926277113e-05, |
| "loss": 0.4013, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.5110326566637247, |
| "grad_norm": 0.410899316731272, |
| "learning_rate": 7.012285864014445e-05, |
| "loss": 0.404, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.5251544571932922, |
| "grad_norm": 0.5933306150673846, |
| "learning_rate": 6.985889181460602e-05, |
| "loss": 0.3992, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.5392762577228596, |
| "grad_norm": 0.47465582200581674, |
| "learning_rate": 6.959195504190337e-05, |
| "loss": 0.4022, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.5533980582524272, |
| "grad_norm": 0.29047076547162964, |
| "learning_rate": 6.932207487319305e-05, |
| "loss": 0.3933, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.5675198587819947, |
| "grad_norm": 0.3955673661524972, |
| "learning_rate": 6.904927815239972e-05, |
| "loss": 0.4014, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.581641659311562, |
| "grad_norm": 0.4729958849916794, |
| "learning_rate": 6.877359201354606e-05, |
| "loss": 0.4014, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.5957634598411299, |
| "grad_norm": 0.3117935062342313, |
| "learning_rate": 6.84950438780538e-05, |
| "loss": 0.4021, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.6098852603706972, |
| "grad_norm": 0.29707807435124145, |
| "learning_rate": 6.821366145201636e-05, |
| "loss": 0.4003, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.6240070609002648, |
| "grad_norm": 0.43753418225532925, |
| "learning_rate": 6.792947272344292e-05, |
| "loss": 0.3992, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.6381288614298324, |
| "grad_norm": 0.27791625901461003, |
| "learning_rate": 6.76425059594746e-05, |
| "loss": 0.3982, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.6522506619593997, |
| "grad_norm": 0.2525501356536547, |
| "learning_rate": 6.73527897035728e-05, |
| "loss": 0.4053, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.6663724624889673, |
| "grad_norm": 0.3669470139311434, |
| "learning_rate": 6.706035277268022e-05, |
| "loss": 0.4024, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.6804942630185349, |
| "grad_norm": 0.30825865476024705, |
| "learning_rate": 6.676522425435433e-05, |
| "loss": 0.3945, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.6946160635481022, |
| "grad_norm": 0.28018108144253323, |
| "learning_rate": 6.646743350387438e-05, |
| "loss": 0.3984, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.70873786407767, |
| "grad_norm": 0.30987982118204843, |
| "learning_rate": 6.616701014132138e-05, |
| "loss": 0.4021, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.7228596646072374, |
| "grad_norm": 0.3633571089136772, |
| "learning_rate": 6.586398404863198e-05, |
| "loss": 0.4026, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.736981465136805, |
| "grad_norm": 0.36013913213670684, |
| "learning_rate": 6.555838536662624e-05, |
| "loss": 0.3925, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.7511032656663725, |
| "grad_norm": 0.36709693358558493, |
| "learning_rate": 6.525024449200956e-05, |
| "loss": 0.3976, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.7652250661959399, |
| "grad_norm": 0.44695442666677676, |
| "learning_rate": 6.493959207434934e-05, |
| "loss": 0.3982, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.7793468667255075, |
| "grad_norm": 0.4500722428050271, |
| "learning_rate": 6.462645901302633e-05, |
| "loss": 0.3947, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.793468667255075, |
| "grad_norm": 0.39821702121821073, |
| "learning_rate": 6.431087645416121e-05, |
| "loss": 0.4015, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.8075904677846424, |
| "grad_norm": 0.42798393839154475, |
| "learning_rate": 6.399287578751656e-05, |
| "loss": 0.3959, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.8217122683142102, |
| "grad_norm": 0.4978207058435827, |
| "learning_rate": 6.367248864337471e-05, |
| "loss": 0.3975, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.8358340688437775, |
| "grad_norm": 0.4727933665511357, |
| "learning_rate": 6.334974688939161e-05, |
| "loss": 0.3961, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.849955869373345, |
| "grad_norm": 0.30157997491072186, |
| "learning_rate": 6.302468262742695e-05, |
| "loss": 0.3923, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.8640776699029127, |
| "grad_norm": 0.30111696128126747, |
| "learning_rate": 6.269732819035128e-05, |
| "loss": 0.3895, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.87819947043248, |
| "grad_norm": 0.33866239621320493, |
| "learning_rate": 6.236771613882987e-05, |
| "loss": 0.3933, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.8923212709620476, |
| "grad_norm": 0.2963866045397337, |
| "learning_rate": 6.20358792580841e-05, |
| "loss": 0.3865, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.9064430714916152, |
| "grad_norm": 0.2821832464959724, |
| "learning_rate": 6.170185055463039e-05, |
| "loss": 0.3985, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.9205648720211828, |
| "grad_norm": 0.26513081199542754, |
| "learning_rate": 6.136566325299715e-05, |
| "loss": 0.3972, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.9346866725507503, |
| "grad_norm": 0.25040847849987535, |
| "learning_rate": 6.102735079242019e-05, |
| "loss": 0.398, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.9488084730803177, |
| "grad_norm": 0.303971008854815, |
| "learning_rate": 6.068694682351651e-05, |
| "loss": 0.3957, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.9629302736098853, |
| "grad_norm": 0.2610849344447032, |
| "learning_rate": 6.0344485204937274e-05, |
| "loss": 0.3953, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.9770520741394528, |
| "grad_norm": 0.24540141466965165, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.3955, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.9911738746690202, |
| "grad_norm": 0.2807585102662493, |
| "learning_rate": 5.965352547330046e-05, |
| "loss": 0.4096, |
| "step": 141 |
| }, |
| { |
| "epoch": 2.005295675198588, |
| "grad_norm": 0.4269953277008037, |
| "learning_rate": 5.930509608730444e-05, |
| "loss": 0.4441, |
| "step": 142 |
| }, |
| { |
| "epoch": 2.0194174757281553, |
| "grad_norm": 0.5672907609303462, |
| "learning_rate": 5.895474649891995e-05, |
| "loss": 0.3728, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.0335392762577227, |
| "grad_norm": 0.7266748405757633, |
| "learning_rate": 5.860251155605003e-05, |
| "loss": 0.3745, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.0476610767872905, |
| "grad_norm": 1.0069160934332146, |
| "learning_rate": 5.824842629412653e-05, |
| "loss": 0.3832, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.061782877316858, |
| "grad_norm": 0.9424187541004289, |
| "learning_rate": 5.7892525932625305e-05, |
| "loss": 0.3779, |
| "step": 146 |
| }, |
| { |
| "epoch": 2.0759046778464256, |
| "grad_norm": 0.42574191446629944, |
| "learning_rate": 5.75348458715631e-05, |
| "loss": 0.3718, |
| "step": 147 |
| }, |
| { |
| "epoch": 2.090026478375993, |
| "grad_norm": 0.7408316783846461, |
| "learning_rate": 5.7175421687976374e-05, |
| "loss": 0.3699, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.1041482789055603, |
| "grad_norm": 0.6750908749341442, |
| "learning_rate": 5.681428913238263e-05, |
| "loss": 0.367, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.118270079435128, |
| "grad_norm": 0.486610272879909, |
| "learning_rate": 5.645148412522447e-05, |
| "loss": 0.3752, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.1323918799646955, |
| "grad_norm": 0.5306866815139071, |
| "learning_rate": 5.60870427532967e-05, |
| "loss": 0.3657, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.146513680494263, |
| "grad_norm": 0.4884339447717486, |
| "learning_rate": 5.572100126615695e-05, |
| "loss": 0.3701, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.1606354810238306, |
| "grad_norm": 0.39396923901380754, |
| "learning_rate": 5.535339607252003e-05, |
| "loss": 0.364, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.174757281553398, |
| "grad_norm": 0.3784748162116266, |
| "learning_rate": 5.4984263736636494e-05, |
| "loss": 0.3641, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.1888790820829658, |
| "grad_norm": 0.38537106208995364, |
| "learning_rate": 5.461364097465581e-05, |
| "loss": 0.3634, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.203000882612533, |
| "grad_norm": 0.33639666599879814, |
| "learning_rate": 5.424156465097428e-05, |
| "loss": 0.3676, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.2171226831421005, |
| "grad_norm": 0.3286791724075738, |
| "learning_rate": 5.38680717745683e-05, |
| "loss": 0.3649, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.2312444836716683, |
| "grad_norm": 0.3129994921836922, |
| "learning_rate": 5.349319949531321e-05, |
| "loss": 0.3646, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.2453662842012356, |
| "grad_norm": 0.3031016329231297, |
| "learning_rate": 5.3116985100288185e-05, |
| "loss": 0.3682, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.259488084730803, |
| "grad_norm": 0.27541410223019297, |
| "learning_rate": 5.2739466010067385e-05, |
| "loss": 0.3606, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.2736098852603708, |
| "grad_norm": 0.36257443661095795, |
| "learning_rate": 5.23606797749979e-05, |
| "loss": 0.3638, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.287731685789938, |
| "grad_norm": 0.2567579985831816, |
| "learning_rate": 5.1980664071464776e-05, |
| "loss": 0.3667, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.301853486319506, |
| "grad_norm": 0.31190867351244567, |
| "learning_rate": 5.159945669814345e-05, |
| "loss": 0.3696, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.3159752868490733, |
| "grad_norm": 0.2937382011800516, |
| "learning_rate": 5.121709557224011e-05, |
| "loss": 0.3606, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.3300970873786406, |
| "grad_norm": 0.2256249267158452, |
| "learning_rate": 5.0833618725720214e-05, |
| "loss": 0.365, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.3442188879082084, |
| "grad_norm": 0.298331814145165, |
| "learning_rate": 5.044906430152554e-05, |
| "loss": 0.3667, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.358340688437776, |
| "grad_norm": 0.18895739371171252, |
| "learning_rate": 5.006347054978035e-05, |
| "loss": 0.3699, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.3724624889673436, |
| "grad_norm": 0.25034317840687215, |
| "learning_rate": 4.967687582398671e-05, |
| "loss": 0.3587, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.386584289496911, |
| "grad_norm": 0.17907966208059622, |
| "learning_rate": 4.9289318577209706e-05, |
| "loss": 0.3636, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.4007060900264783, |
| "grad_norm": 0.21210095036882018, |
| "learning_rate": 4.890083735825258e-05, |
| "loss": 0.3605, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.414827890556046, |
| "grad_norm": 0.16489305774518265, |
| "learning_rate": 4.851147080782249e-05, |
| "loss": 0.3648, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.4289496910856134, |
| "grad_norm": 0.19143993377462817, |
| "learning_rate": 4.812125765468705e-05, |
| "loss": 0.3606, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.443071491615181, |
| "grad_norm": 0.17804983590295367, |
| "learning_rate": 4.773023671182213e-05, |
| "loss": 0.3637, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.4571932921447486, |
| "grad_norm": 0.16707259472270428, |
| "learning_rate": 4.73384468725513e-05, |
| "loss": 0.3636, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.471315092674316, |
| "grad_norm": 0.17481885632199456, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 0.3645, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.4854368932038833, |
| "grad_norm": 0.1681053608116463, |
| "learning_rate": 4.6552716456605514e-05, |
| "loss": 0.3605, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.499558693733451, |
| "grad_norm": 0.14964611415536702, |
| "learning_rate": 4.615885403346134e-05, |
| "loss": 0.3562, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.5136804942630184, |
| "grad_norm": 0.14164675176141614, |
| "learning_rate": 4.576437901319921e-05, |
| "loss": 0.3636, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.5278022947925862, |
| "grad_norm": 0.16548274190466053, |
| "learning_rate": 4.5369330632706223e-05, |
| "loss": 0.3648, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.5419240953221536, |
| "grad_norm": 0.15269683467677936, |
| "learning_rate": 4.4973748185899416e-05, |
| "loss": 0.3612, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.556045895851721, |
| "grad_norm": 0.16869434151649507, |
| "learning_rate": 4.457767101981728e-05, |
| "loss": 0.3677, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.5701676963812887, |
| "grad_norm": 0.13337265767063033, |
| "learning_rate": 4.418113853070614e-05, |
| "loss": 0.3626, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.584289496910856, |
| "grad_norm": 0.14682144236789746, |
| "learning_rate": 4.378419016010149e-05, |
| "loss": 0.364, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.598411297440424, |
| "grad_norm": 0.150937900490833, |
| "learning_rate": 4.338686539090493e-05, |
| "loss": 0.3615, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.6125330979699912, |
| "grad_norm": 0.1341377364551312, |
| "learning_rate": 4.298920374345698e-05, |
| "loss": 0.3596, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.6266548984995586, |
| "grad_norm": 0.15572962430762588, |
| "learning_rate": 4.259124477160607e-05, |
| "loss": 0.3625, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.6407766990291264, |
| "grad_norm": 0.1475404012486826, |
| "learning_rate": 4.219302805877441e-05, |
| "loss": 0.3617, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.6548984995586937, |
| "grad_norm": 0.1781262720167099, |
| "learning_rate": 4.17945932140206e-05, |
| "loss": 0.3666, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.6690203000882615, |
| "grad_norm": 0.13824587532461255, |
| "learning_rate": 4.139597986810005e-05, |
| "loss": 0.3629, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.683142100617829, |
| "grad_norm": 0.15963593698467365, |
| "learning_rate": 4.0997227669522924e-05, |
| "loss": 0.3628, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.6972639011473962, |
| "grad_norm": 0.12511715922220792, |
| "learning_rate": 4.059837628061055e-05, |
| "loss": 0.3638, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.7113857016769636, |
| "grad_norm": 0.15752313446706914, |
| "learning_rate": 4.019946537355033e-05, |
| "loss": 0.3614, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.7255075022065314, |
| "grad_norm": 0.13647413322377422, |
| "learning_rate": 3.9800534626449683e-05, |
| "loss": 0.3634, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.7396293027360987, |
| "grad_norm": 0.13525074863232164, |
| "learning_rate": 3.940162371938947e-05, |
| "loss": 0.3587, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.7537511032656665, |
| "grad_norm": 0.13297285710552217, |
| "learning_rate": 3.9002772330477096e-05, |
| "loss": 0.3599, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.767872903795234, |
| "grad_norm": 0.14225004712058384, |
| "learning_rate": 3.860402013189998e-05, |
| "loss": 0.3575, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.7819947043248012, |
| "grad_norm": 0.13373630438071715, |
| "learning_rate": 3.820540678597942e-05, |
| "loss": 0.3648, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.796116504854369, |
| "grad_norm": 0.12615478953418785, |
| "learning_rate": 3.78069719412256e-05, |
| "loss": 0.3609, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.8102383053839364, |
| "grad_norm": 0.12669967225071216, |
| "learning_rate": 3.740875522839393e-05, |
| "loss": 0.3608, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.824360105913504, |
| "grad_norm": 0.13635382545910668, |
| "learning_rate": 3.7010796256543034e-05, |
| "loss": 0.3549, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.8384819064430715, |
| "grad_norm": 0.11546629160995592, |
| "learning_rate": 3.661313460909507e-05, |
| "loss": 0.3593, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.852603706972639, |
| "grad_norm": 0.12139128794186867, |
| "learning_rate": 3.621580983989852e-05, |
| "loss": 0.3608, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.8667255075022067, |
| "grad_norm": 0.12319344865206981, |
| "learning_rate": 3.581886146929387e-05, |
| "loss": 0.3605, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.880847308031774, |
| "grad_norm": 0.14742473593815408, |
| "learning_rate": 3.542232898018273e-05, |
| "loss": 0.3582, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.894969108561342, |
| "grad_norm": 0.11086460953888361, |
| "learning_rate": 3.5026251814100604e-05, |
| "loss": 0.359, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.13533789741325936, |
| "learning_rate": 3.4630669367293797e-05, |
| "loss": 0.3562, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.9232127096204765, |
| "grad_norm": 0.11573276006772669, |
| "learning_rate": 3.4235620986800806e-05, |
| "loss": 0.3641, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.937334510150044, |
| "grad_norm": 0.12838446326005826, |
| "learning_rate": 3.384114596653866e-05, |
| "loss": 0.361, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.9514563106796117, |
| "grad_norm": 0.12304575149956651, |
| "learning_rate": 3.344728354339449e-05, |
| "loss": 0.3586, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.965578111209179, |
| "grad_norm": 0.12773291501034634, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 0.3559, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.979699911738747, |
| "grad_norm": 0.16335068209235123, |
| "learning_rate": 3.266155312744871e-05, |
| "loss": 0.3631, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.993821712268314, |
| "grad_norm": 0.1186978138033666, |
| "learning_rate": 3.226976328817788e-05, |
| "loss": 0.3927, |
| "step": 212 |
| }, |
| { |
| "epoch": 3.0079435127978815, |
| "grad_norm": 0.16211984652497452, |
| "learning_rate": 3.187874234531296e-05, |
| "loss": 0.3822, |
| "step": 213 |
| }, |
| { |
| "epoch": 3.0220653133274493, |
| "grad_norm": 0.14214772364476422, |
| "learning_rate": 3.1488529192177526e-05, |
| "loss": 0.3393, |
| "step": 214 |
| }, |
| { |
| "epoch": 3.0361871138570167, |
| "grad_norm": 0.13255124874063956, |
| "learning_rate": 3.109916264174743e-05, |
| "loss": 0.3373, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.0503089143865845, |
| "grad_norm": 0.16606000923059963, |
| "learning_rate": 3.071068142279031e-05, |
| "loss": 0.3371, |
| "step": 216 |
| }, |
| { |
| "epoch": 3.064430714916152, |
| "grad_norm": 0.14657630327267304, |
| "learning_rate": 3.0323124176013297e-05, |
| "loss": 0.3355, |
| "step": 217 |
| }, |
| { |
| "epoch": 3.078552515445719, |
| "grad_norm": 0.1341605905929287, |
| "learning_rate": 2.993652945021966e-05, |
| "loss": 0.3377, |
| "step": 218 |
| }, |
| { |
| "epoch": 3.092674315975287, |
| "grad_norm": 0.14490108611743277, |
| "learning_rate": 2.955093569847447e-05, |
| "loss": 0.3366, |
| "step": 219 |
| }, |
| { |
| "epoch": 3.1067961165048543, |
| "grad_norm": 0.13919821523407064, |
| "learning_rate": 2.9166381274279803e-05, |
| "loss": 0.3312, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.120917917034422, |
| "grad_norm": 0.16300975058477254, |
| "learning_rate": 2.8782904427759898e-05, |
| "loss": 0.3311, |
| "step": 221 |
| }, |
| { |
| "epoch": 3.1350397175639895, |
| "grad_norm": 0.1183225077661534, |
| "learning_rate": 2.8400543301856553e-05, |
| "loss": 0.3282, |
| "step": 222 |
| }, |
| { |
| "epoch": 3.149161518093557, |
| "grad_norm": 0.14092204872317698, |
| "learning_rate": 2.8019335928535234e-05, |
| "loss": 0.3297, |
| "step": 223 |
| }, |
| { |
| "epoch": 3.1632833186231246, |
| "grad_norm": 0.1282390396455681, |
| "learning_rate": 2.7639320225002108e-05, |
| "loss": 0.327, |
| "step": 224 |
| }, |
| { |
| "epoch": 3.177405119152692, |
| "grad_norm": 0.12936573725572997, |
| "learning_rate": 2.7260533989932628e-05, |
| "loss": 0.3346, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.1915269196822593, |
| "grad_norm": 0.11727309920196596, |
| "learning_rate": 2.688301489971183e-05, |
| "loss": 0.3271, |
| "step": 226 |
| }, |
| { |
| "epoch": 3.205648720211827, |
| "grad_norm": 0.12274146196879084, |
| "learning_rate": 2.6506800504686806e-05, |
| "loss": 0.328, |
| "step": 227 |
| }, |
| { |
| "epoch": 3.2197705207413945, |
| "grad_norm": 0.11029811005681434, |
| "learning_rate": 2.6131928225431713e-05, |
| "loss": 0.33, |
| "step": 228 |
| }, |
| { |
| "epoch": 3.233892321270962, |
| "grad_norm": 0.12463320131443856, |
| "learning_rate": 2.575843534902573e-05, |
| "loss": 0.3358, |
| "step": 229 |
| }, |
| { |
| "epoch": 3.2480141218005296, |
| "grad_norm": 0.11256203223325899, |
| "learning_rate": 2.53863590253442e-05, |
| "loss": 0.3364, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.262135922330097, |
| "grad_norm": 0.10841743259905046, |
| "learning_rate": 2.501573626336352e-05, |
| "loss": 0.3337, |
| "step": 231 |
| }, |
| { |
| "epoch": 3.2762577228596648, |
| "grad_norm": 0.11593566286716334, |
| "learning_rate": 2.464660392747999e-05, |
| "loss": 0.3301, |
| "step": 232 |
| }, |
| { |
| "epoch": 3.290379523389232, |
| "grad_norm": 0.10969283000201786, |
| "learning_rate": 2.427899873384306e-05, |
| "loss": 0.332, |
| "step": 233 |
| }, |
| { |
| "epoch": 3.3045013239187995, |
| "grad_norm": 0.12033857141829916, |
| "learning_rate": 2.3912957246703305e-05, |
| "loss": 0.3377, |
| "step": 234 |
| }, |
| { |
| "epoch": 3.3186231244483673, |
| "grad_norm": 0.10210001952439796, |
| "learning_rate": 2.3548515874775547e-05, |
| "loss": 0.3297, |
| "step": 235 |
| }, |
| { |
| "epoch": 3.3327449249779346, |
| "grad_norm": 0.12241287674636975, |
| "learning_rate": 2.3185710867617387e-05, |
| "loss": 0.3361, |
| "step": 236 |
| }, |
| { |
| "epoch": 3.3468667255075024, |
| "grad_norm": 0.10969299118083352, |
| "learning_rate": 2.2824578312023632e-05, |
| "loss": 0.3322, |
| "step": 237 |
| }, |
| { |
| "epoch": 3.3609885260370698, |
| "grad_norm": 0.12151530040465547, |
| "learning_rate": 2.24651541284369e-05, |
| "loss": 0.3361, |
| "step": 238 |
| }, |
| { |
| "epoch": 3.375110326566637, |
| "grad_norm": 0.10631863902215113, |
| "learning_rate": 2.210747406737469e-05, |
| "loss": 0.3344, |
| "step": 239 |
| }, |
| { |
| "epoch": 3.389232127096205, |
| "grad_norm": 0.11983276963310185, |
| "learning_rate": 2.175157370587348e-05, |
| "loss": 0.3324, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.4033539276257723, |
| "grad_norm": 0.10203118790788067, |
| "learning_rate": 2.1397488443949985e-05, |
| "loss": 0.3366, |
| "step": 241 |
| }, |
| { |
| "epoch": 3.4174757281553396, |
| "grad_norm": 0.11460733945580791, |
| "learning_rate": 2.1045253501080058e-05, |
| "loss": 0.3335, |
| "step": 242 |
| }, |
| { |
| "epoch": 3.4315975286849074, |
| "grad_norm": 0.10361959122829918, |
| "learning_rate": 2.0694903912695574e-05, |
| "loss": 0.3342, |
| "step": 243 |
| }, |
| { |
| "epoch": 3.4457193292144748, |
| "grad_norm": 0.10602009006473866, |
| "learning_rate": 2.0346474526699552e-05, |
| "loss": 0.3343, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.459841129744042, |
| "grad_norm": 0.0981614565374733, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.3342, |
| "step": 245 |
| }, |
| { |
| "epoch": 3.47396293027361, |
| "grad_norm": 0.10563881070295801, |
| "learning_rate": 1.9655514795062746e-05, |
| "loss": 0.3317, |
| "step": 246 |
| }, |
| { |
| "epoch": 3.4880847308031773, |
| "grad_norm": 0.0982393867459211, |
| "learning_rate": 1.931305317648349e-05, |
| "loss": 0.336, |
| "step": 247 |
| }, |
| { |
| "epoch": 3.502206531332745, |
| "grad_norm": 0.10341107342114168, |
| "learning_rate": 1.897264920757981e-05, |
| "loss": 0.3329, |
| "step": 248 |
| }, |
| { |
| "epoch": 3.5163283318623124, |
| "grad_norm": 0.1009205150822494, |
| "learning_rate": 1.8634336747002853e-05, |
| "loss": 0.3363, |
| "step": 249 |
| }, |
| { |
| "epoch": 3.5304501323918798, |
| "grad_norm": 0.09562831286129422, |
| "learning_rate": 1.829814944536963e-05, |
| "loss": 0.3366, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.5445719329214476, |
| "grad_norm": 0.10055162803558056, |
| "learning_rate": 1.7964120741915905e-05, |
| "loss": 0.3359, |
| "step": 251 |
| }, |
| { |
| "epoch": 3.558693733451015, |
| "grad_norm": 0.10362087580690618, |
| "learning_rate": 1.7632283861170135e-05, |
| "loss": 0.33, |
| "step": 252 |
| }, |
| { |
| "epoch": 3.5728155339805827, |
| "grad_norm": 0.09578324331311534, |
| "learning_rate": 1.7302671809648735e-05, |
| "loss": 0.3336, |
| "step": 253 |
| }, |
| { |
| "epoch": 3.58693733451015, |
| "grad_norm": 0.1021943484963981, |
| "learning_rate": 1.6975317372573066e-05, |
| "loss": 0.334, |
| "step": 254 |
| }, |
| { |
| "epoch": 3.6010591350397174, |
| "grad_norm": 0.10104477227737499, |
| "learning_rate": 1.6650253110608415e-05, |
| "loss": 0.3352, |
| "step": 255 |
| }, |
| { |
| "epoch": 3.615180935569285, |
| "grad_norm": 0.09719144111824624, |
| "learning_rate": 1.6327511356625302e-05, |
| "loss": 0.3339, |
| "step": 256 |
| }, |
| { |
| "epoch": 3.6293027360988526, |
| "grad_norm": 0.10082549447043057, |
| "learning_rate": 1.6007124212483453e-05, |
| "loss": 0.3303, |
| "step": 257 |
| }, |
| { |
| "epoch": 3.6434245366284204, |
| "grad_norm": 0.09855344501708733, |
| "learning_rate": 1.5689123545838804e-05, |
| "loss": 0.3319, |
| "step": 258 |
| }, |
| { |
| "epoch": 3.6575463371579877, |
| "grad_norm": 0.10038693196972406, |
| "learning_rate": 1.537354098697367e-05, |
| "loss": 0.3285, |
| "step": 259 |
| }, |
| { |
| "epoch": 3.671668137687555, |
| "grad_norm": 0.10993218050906065, |
| "learning_rate": 1.5060407925650662e-05, |
| "loss": 0.3346, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.6857899382171224, |
| "grad_norm": 0.09881058692426582, |
| "learning_rate": 1.4749755507990449e-05, |
| "loss": 0.3265, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.69991173874669, |
| "grad_norm": 0.11110424733317653, |
| "learning_rate": 1.4441614633373773e-05, |
| "loss": 0.3367, |
| "step": 262 |
| }, |
| { |
| "epoch": 3.7140335392762576, |
| "grad_norm": 0.09507466207790345, |
| "learning_rate": 1.413601595136802e-05, |
| "loss": 0.335, |
| "step": 263 |
| }, |
| { |
| "epoch": 3.7281553398058254, |
| "grad_norm": 0.10341229060389236, |
| "learning_rate": 1.383298985867863e-05, |
| "loss": 0.3324, |
| "step": 264 |
| }, |
| { |
| "epoch": 3.7422771403353927, |
| "grad_norm": 0.09734360531860331, |
| "learning_rate": 1.3532566496125634e-05, |
| "loss": 0.3313, |
| "step": 265 |
| }, |
| { |
| "epoch": 3.75639894086496, |
| "grad_norm": 0.09174570798780135, |
| "learning_rate": 1.3234775745645684e-05, |
| "loss": 0.3351, |
| "step": 266 |
| }, |
| { |
| "epoch": 3.770520741394528, |
| "grad_norm": 0.10147835781586892, |
| "learning_rate": 1.2939647227319791e-05, |
| "loss": 0.3353, |
| "step": 267 |
| }, |
| { |
| "epoch": 3.784642541924095, |
| "grad_norm": 0.09808246222031777, |
| "learning_rate": 1.2647210296427197e-05, |
| "loss": 0.3323, |
| "step": 268 |
| }, |
| { |
| "epoch": 3.798764342453663, |
| "grad_norm": 0.09735163985861015, |
| "learning_rate": 1.2357494040525416e-05, |
| "loss": 0.3391, |
| "step": 269 |
| }, |
| { |
| "epoch": 3.8128861429832304, |
| "grad_norm": 0.08930562493255255, |
| "learning_rate": 1.2070527276557092e-05, |
| "loss": 0.3327, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.8270079435127977, |
| "grad_norm": 0.09744814905553326, |
| "learning_rate": 1.178633854798365e-05, |
| "loss": 0.33, |
| "step": 271 |
| }, |
| { |
| "epoch": 3.8411297440423655, |
| "grad_norm": 0.09183836496663382, |
| "learning_rate": 1.1504956121946216e-05, |
| "loss": 0.3317, |
| "step": 272 |
| }, |
| { |
| "epoch": 3.855251544571933, |
| "grad_norm": 0.08801876422756064, |
| "learning_rate": 1.1226407986453963e-05, |
| "loss": 0.3294, |
| "step": 273 |
| }, |
| { |
| "epoch": 3.8693733451015007, |
| "grad_norm": 0.08798928229950856, |
| "learning_rate": 1.0950721847600282e-05, |
| "loss": 0.3282, |
| "step": 274 |
| }, |
| { |
| "epoch": 3.883495145631068, |
| "grad_norm": 0.09000845113363774, |
| "learning_rate": 1.0677925126806956e-05, |
| "loss": 0.335, |
| "step": 275 |
| }, |
| { |
| "epoch": 3.8976169461606354, |
| "grad_norm": 0.09609952332604478, |
| "learning_rate": 1.040804495809665e-05, |
| "loss": 0.3352, |
| "step": 276 |
| }, |
| { |
| "epoch": 3.911738746690203, |
| "grad_norm": 0.09426777621829556, |
| "learning_rate": 1.0141108185393995e-05, |
| "loss": 0.3307, |
| "step": 277 |
| }, |
| { |
| "epoch": 3.9258605472197705, |
| "grad_norm": 0.08749576305220681, |
| "learning_rate": 9.877141359855567e-06, |
| "loss": 0.3316, |
| "step": 278 |
| }, |
| { |
| "epoch": 3.9399823477493383, |
| "grad_norm": 0.08573388419725536, |
| "learning_rate": 9.616170737228882e-06, |
| "loss": 0.3301, |
| "step": 279 |
| }, |
| { |
| "epoch": 3.9541041482789057, |
| "grad_norm": 0.08677743094561904, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 0.3309, |
| "step": 280 |
| }, |
| { |
| "epoch": 3.968225948808473, |
| "grad_norm": 0.08456912932018501, |
| "learning_rate": 9.103321631016024e-06, |
| "loss": 0.3294, |
| "step": 281 |
| }, |
| { |
| "epoch": 3.9823477493380404, |
| "grad_norm": 0.0892840459688823, |
| "learning_rate": 8.851494158524242e-06, |
| "loss": 0.3299, |
| "step": 282 |
| }, |
| { |
| "epoch": 3.996469549867608, |
| "grad_norm": 0.09785834932292316, |
| "learning_rate": 8.602764906059109e-06, |
| "loss": 0.3734, |
| "step": 283 |
| }, |
| { |
| "epoch": 4.010591350397176, |
| "grad_norm": 0.1159182382828669, |
| "learning_rate": 8.35715861374636e-06, |
| "loss": 0.3432, |
| "step": 284 |
| }, |
| { |
| "epoch": 4.024713150926743, |
| "grad_norm": 0.11348869033645836, |
| "learning_rate": 8.114699711083113e-06, |
| "loss": 0.3187, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.038834951456311, |
| "grad_norm": 0.09626843456466473, |
| "learning_rate": 7.875412314507942e-06, |
| "loss": 0.3213, |
| "step": 286 |
| }, |
| { |
| "epoch": 4.052956751985878, |
| "grad_norm": 0.0918806636447836, |
| "learning_rate": 7.639320225002106e-06, |
| "loss": 0.3169, |
| "step": 287 |
| }, |
| { |
| "epoch": 4.067078552515445, |
| "grad_norm": 0.09514043448978982, |
| "learning_rate": 7.406446925722211e-06, |
| "loss": 0.3148, |
| "step": 288 |
| }, |
| { |
| "epoch": 4.081200353045014, |
| "grad_norm": 0.10508295602012874, |
| "learning_rate": 7.176815579664343e-06, |
| "loss": 0.3132, |
| "step": 289 |
| }, |
| { |
| "epoch": 4.095322153574581, |
| "grad_norm": 0.10091079365331981, |
| "learning_rate": 6.950449027360213e-06, |
| "loss": 0.3175, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.109443954104148, |
| "grad_norm": 0.0973346460822993, |
| "learning_rate": 6.7273697846052515e-06, |
| "loss": 0.3184, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.123565754633716, |
| "grad_norm": 0.09115379235697503, |
| "learning_rate": 6.507600040219073e-06, |
| "loss": 0.3164, |
| "step": 292 |
| }, |
| { |
| "epoch": 4.137687555163283, |
| "grad_norm": 0.08901902718597547, |
| "learning_rate": 6.291161653838434e-06, |
| "loss": 0.3177, |
| "step": 293 |
| }, |
| { |
| "epoch": 4.151809355692851, |
| "grad_norm": 0.09132299423316595, |
| "learning_rate": 6.078076153742962e-06, |
| "loss": 0.3131, |
| "step": 294 |
| }, |
| { |
| "epoch": 4.165931156222419, |
| "grad_norm": 0.09543903005749907, |
| "learning_rate": 5.868364734713776e-06, |
| "loss": 0.3142, |
| "step": 295 |
| }, |
| { |
| "epoch": 4.180052956751986, |
| "grad_norm": 0.09061531269851537, |
| "learning_rate": 5.662048255925357e-06, |
| "loss": 0.3204, |
| "step": 296 |
| }, |
| { |
| "epoch": 4.194174757281553, |
| "grad_norm": 0.08551951038992002, |
| "learning_rate": 5.459147238870768e-06, |
| "loss": 0.3158, |
| "step": 297 |
| }, |
| { |
| "epoch": 4.208296557811121, |
| "grad_norm": 0.08387425510980595, |
| "learning_rate": 5.259681865320447e-06, |
| "loss": 0.3194, |
| "step": 298 |
| }, |
| { |
| "epoch": 4.222418358340688, |
| "grad_norm": 0.0901228464398898, |
| "learning_rate": 5.063671975314814e-06, |
| "loss": 0.3163, |
| "step": 299 |
| }, |
| { |
| "epoch": 4.236540158870256, |
| "grad_norm": 0.08691256583540367, |
| "learning_rate": 4.871137065190854e-06, |
| "loss": 0.315, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.250661959399824, |
| "grad_norm": 0.0878527835574059, |
| "learning_rate": 4.6820962856429205e-06, |
| "loss": 0.3176, |
| "step": 301 |
| }, |
| { |
| "epoch": 4.264783759929391, |
| "grad_norm": 0.0840437037057203, |
| "learning_rate": 4.496568439817836e-06, |
| "loss": 0.322, |
| "step": 302 |
| }, |
| { |
| "epoch": 4.278905560458958, |
| "grad_norm": 0.08904988122589128, |
| "learning_rate": 4.314571981444666e-06, |
| "loss": 0.311, |
| "step": 303 |
| }, |
| { |
| "epoch": 4.293027360988526, |
| "grad_norm": 0.08120215219780037, |
| "learning_rate": 4.136125012999168e-06, |
| "loss": 0.3203, |
| "step": 304 |
| }, |
| { |
| "epoch": 4.307149161518094, |
| "grad_norm": 0.08522052695009742, |
| "learning_rate": 3.961245283903239e-06, |
| "loss": 0.3161, |
| "step": 305 |
| }, |
| { |
| "epoch": 4.321270962047661, |
| "grad_norm": 0.08319753808748938, |
| "learning_rate": 3.7899501887594102e-06, |
| "loss": 0.315, |
| "step": 306 |
| }, |
| { |
| "epoch": 4.335392762577229, |
| "grad_norm": 0.08198211403858394, |
| "learning_rate": 3.622256765620713e-06, |
| "loss": 0.3165, |
| "step": 307 |
| }, |
| { |
| "epoch": 4.349514563106796, |
| "grad_norm": 0.07827444542073485, |
| "learning_rate": 3.458181694295961e-06, |
| "loss": 0.3114, |
| "step": 308 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 0.07827005931051699, |
| "learning_rate": 3.297741294690644e-06, |
| "loss": 0.3125, |
| "step": 309 |
| }, |
| { |
| "epoch": 4.3777581641659316, |
| "grad_norm": 0.07833274350751808, |
| "learning_rate": 3.140951525183691e-06, |
| "loss": 0.3156, |
| "step": 310 |
| }, |
| { |
| "epoch": 4.391879964695499, |
| "grad_norm": 0.08055700180528477, |
| "learning_rate": 2.987827981040132e-06, |
| "loss": 0.3144, |
| "step": 311 |
| }, |
| { |
| "epoch": 4.406001765225066, |
| "grad_norm": 0.0799614180245514, |
| "learning_rate": 2.8383858928598963e-06, |
| "loss": 0.3157, |
| "step": 312 |
| }, |
| { |
| "epoch": 4.420123565754634, |
| "grad_norm": 0.0722165779006397, |
| "learning_rate": 2.692640125062895e-06, |
| "loss": 0.3116, |
| "step": 313 |
| }, |
| { |
| "epoch": 4.434245366284201, |
| "grad_norm": 0.07776220076295337, |
| "learning_rate": 2.550605174410512e-06, |
| "loss": 0.3206, |
| "step": 314 |
| }, |
| { |
| "epoch": 4.448367166813769, |
| "grad_norm": 0.07577160557474086, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 0.3119, |
| "step": 315 |
| }, |
| { |
| "epoch": 4.4624889673433366, |
| "grad_norm": 0.07292199486310709, |
| "learning_rate": 2.2777238646775768e-06, |
| "loss": 0.314, |
| "step": 316 |
| }, |
| { |
| "epoch": 4.476610767872904, |
| "grad_norm": 0.07321270589774292, |
| "learning_rate": 2.14690464803343e-06, |
| "loss": 0.3116, |
| "step": 317 |
| }, |
| { |
| "epoch": 4.490732568402471, |
| "grad_norm": 0.07971761444372055, |
| "learning_rate": 2.0198505307069462e-06, |
| "loss": 0.3162, |
| "step": 318 |
| }, |
| { |
| "epoch": 4.504854368932039, |
| "grad_norm": 0.0823725656624792, |
| "learning_rate": 1.896574150274151e-06, |
| "loss": 0.318, |
| "step": 319 |
| }, |
| { |
| "epoch": 4.518976169461606, |
| "grad_norm": 0.07311612247681858, |
| "learning_rate": 1.7770877685543687e-06, |
| "loss": 0.3146, |
| "step": 320 |
| }, |
| { |
| "epoch": 4.533097969991174, |
| "grad_norm": 0.0754285797360244, |
| "learning_rate": 1.6614032703905714e-06, |
| "loss": 0.3188, |
| "step": 321 |
| }, |
| { |
| "epoch": 4.5472197705207416, |
| "grad_norm": 0.07192329712907819, |
| "learning_rate": 1.5495321624672443e-06, |
| "loss": 0.3117, |
| "step": 322 |
| }, |
| { |
| "epoch": 4.561341571050309, |
| "grad_norm": 0.07683729191513318, |
| "learning_rate": 1.4414855721658705e-06, |
| "loss": 0.3179, |
| "step": 323 |
| }, |
| { |
| "epoch": 4.575463371579876, |
| "grad_norm": 0.07466087193345237, |
| "learning_rate": 1.3372742464581134e-06, |
| "loss": 0.3169, |
| "step": 324 |
| }, |
| { |
| "epoch": 4.589585172109444, |
| "grad_norm": 0.07472750780066512, |
| "learning_rate": 1.2369085508368862e-06, |
| "loss": 0.313, |
| "step": 325 |
| }, |
| { |
| "epoch": 4.603706972639012, |
| "grad_norm": 0.07567268942020543, |
| "learning_rate": 1.1403984682852998e-06, |
| "loss": 0.3162, |
| "step": 326 |
| }, |
| { |
| "epoch": 4.617828773168579, |
| "grad_norm": 0.07193466653913613, |
| "learning_rate": 1.0477535982837473e-06, |
| "loss": 0.3169, |
| "step": 327 |
| }, |
| { |
| "epoch": 4.631950573698147, |
| "grad_norm": 0.07310364397796111, |
| "learning_rate": 9.589831558550222e-07, |
| "loss": 0.3147, |
| "step": 328 |
| }, |
| { |
| "epoch": 4.646072374227714, |
| "grad_norm": 0.07226831665121733, |
| "learning_rate": 8.740959706477725e-07, |
| "loss": 0.3155, |
| "step": 329 |
| }, |
| { |
| "epoch": 4.660194174757281, |
| "grad_norm": 0.07380784680617208, |
| "learning_rate": 7.93100486058247e-07, |
| "loss": 0.3172, |
| "step": 330 |
| }, |
| { |
| "epoch": 4.674315975286849, |
| "grad_norm": 0.07265097137199653, |
| "learning_rate": 7.160047583904473e-07, |
| "loss": 0.3123, |
| "step": 331 |
| }, |
| { |
| "epoch": 4.688437775816417, |
| "grad_norm": 0.07526606061681983, |
| "learning_rate": 6.428164560548134e-07, |
| "loss": 0.3126, |
| "step": 332 |
| }, |
| { |
| "epoch": 4.702559576345984, |
| "grad_norm": 0.07096951660387449, |
| "learning_rate": 5.735428588054825e-07, |
| "loss": 0.3091, |
| "step": 333 |
| }, |
| { |
| "epoch": 4.716681376875552, |
| "grad_norm": 0.07491929428893927, |
| "learning_rate": 5.081908570161753e-07, |
| "loss": 0.3168, |
| "step": 334 |
| }, |
| { |
| "epoch": 4.730803177405119, |
| "grad_norm": 0.07068035565889964, |
| "learning_rate": 4.467669509948591e-07, |
| "loss": 0.3168, |
| "step": 335 |
| }, |
| { |
| "epoch": 4.744924977934687, |
| "grad_norm": 0.07006153238881019, |
| "learning_rate": 3.8927725033718553e-07, |
| "loss": 0.3096, |
| "step": 336 |
| }, |
| { |
| "epoch": 4.7590467784642545, |
| "grad_norm": 0.07031296479074185, |
| "learning_rate": 3.3572747331878984e-07, |
| "loss": 0.3127, |
| "step": 337 |
| }, |
| { |
| "epoch": 4.773168578993822, |
| "grad_norm": 0.07086156685048181, |
| "learning_rate": 2.8612294632650586e-07, |
| "loss": 0.3165, |
| "step": 338 |
| }, |
| { |
| "epoch": 4.787290379523389, |
| "grad_norm": 0.07041702874195928, |
| "learning_rate": 2.404686033285897e-07, |
| "loss": 0.3211, |
| "step": 339 |
| }, |
| { |
| "epoch": 4.801412180052957, |
| "grad_norm": 0.07111545002538634, |
| "learning_rate": 1.9876898538394362e-07, |
| "loss": 0.3139, |
| "step": 340 |
| }, |
| { |
| "epoch": 4.815533980582524, |
| "grad_norm": 0.06964445264833816, |
| "learning_rate": 1.6102824019043728e-07, |
| "loss": 0.3119, |
| "step": 341 |
| }, |
| { |
| "epoch": 4.829655781112092, |
| "grad_norm": 0.07185826317569316, |
| "learning_rate": 1.2725012167236207e-07, |
| "loss": 0.3189, |
| "step": 342 |
| }, |
| { |
| "epoch": 4.8437775816416595, |
| "grad_norm": 0.07175971991165786, |
| "learning_rate": 9.74379896070321e-08, |
| "loss": 0.3144, |
| "step": 343 |
| }, |
| { |
| "epoch": 4.857899382171227, |
| "grad_norm": 0.07027377563502572, |
| "learning_rate": 7.159480929059381e-08, |
| "loss": 0.3208, |
| "step": 344 |
| }, |
| { |
| "epoch": 4.872021182700794, |
| "grad_norm": 0.07130198834034268, |
| "learning_rate": 4.9723151243106225e-08, |
| "loss": 0.3164, |
| "step": 345 |
| }, |
| { |
| "epoch": 4.886142983230362, |
| "grad_norm": 0.07512577557190175, |
| "learning_rate": 3.1825190952829986e-08, |
| "loss": 0.3183, |
| "step": 346 |
| }, |
| { |
| "epoch": 4.90026478375993, |
| "grad_norm": 0.0718819094759202, |
| "learning_rate": 1.7902708659867096e-08, |
| "loss": 0.3185, |
| "step": 347 |
| }, |
| { |
| "epoch": 4.914386584289497, |
| "grad_norm": 0.0706893833001464, |
| "learning_rate": 7.957089179058131e-09, |
| "loss": 0.3142, |
| "step": 348 |
| }, |
| { |
| "epoch": 4.9285083848190645, |
| "grad_norm": 0.07170028442056126, |
| "learning_rate": 1.9893217622790616e-09, |
| "loss": 0.3181, |
| "step": 349 |
| }, |
| { |
| "epoch": 4.942630185348632, |
| "grad_norm": 0.07142066838497432, |
| "learning_rate": 0.0, |
| "loss": 0.313, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.942630185348632, |
| "step": 350, |
| "total_flos": 9.306564393200255e+18, |
| "train_loss": 0.0, |
| "train_runtime": 1.9909, |
| "train_samples_per_second": 91040.986, |
| "train_steps_per_second": 175.798 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 350, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.306564393200255e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|